diff options
Diffstat (limited to 'target')
446 files changed, 34722 insertions, 18102 deletions
diff --git a/target/alpha/cpu-param.h b/target/alpha/cpu-param.h index ff06e41..a799f42 100644 --- a/target/alpha/cpu-param.h +++ b/target/alpha/cpu-param.h @@ -18,14 +18,12 @@ * a 4k minimum to match x86 host, which can minimize emulation issues. */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 12 # define TARGET_VIRT_ADDR_SPACE_BITS 63 #else # define TARGET_PAGE_BITS 13 # define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) #endif -/* Alpha processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) +#define TARGET_INSN_START_EXTRA_WORDS 0 #endif diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 584c2aa..932cdda 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -23,8 +23,9 @@ #include "qapi/error.h" #include "qemu/qemu-print.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ops.h" #include "fpu/softfloat.h" @@ -40,6 +41,18 @@ static vaddr alpha_cpu_get_pc(CPUState *cs) return env->pc; } +static TCGTBCPUState alpha_get_tb_cpu_state(CPUState *cs) +{ + CPUAlphaState *env = cpu_env(cs); + uint32_t flags = env->flags & ENV_FLAG_TB_MASK; + +#ifdef CONFIG_USER_ONLY + flags |= TB_FLAG_UNALIGN * !cs->prctl_unalign_sigbus; +#endif + + return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; +} + static void alpha_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -73,10 +86,10 @@ static bool alpha_cpu_has_work(CPUState *cs) assume that if a CPU really wants to stay asleep, it will mask interrupts at the chipset level, which will prevent these bits from being set in the first place. */ - return cs->interrupt_request & (CPU_INTERRUPT_HARD - | CPU_INTERRUPT_TIMER - | CPU_INTERRUPT_SMP - | CPU_INTERRUPT_MCHK); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD + | CPU_INTERRUPT_TIMER + | CPU_INTERRUPT_SMP + | CPU_INTERRUPT_MCHK); } #endif /* !CONFIG_USER_ONLY */ @@ -231,28 +244,34 @@ static const struct SysemuCPUOps alpha_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps alpha_tcg_ops = { + /* Alpha processors have a weak memory model */ + .guest_default_memory_order = 0, + .mttcg_supported = true, + .initialize = alpha_translate_init, .translate_code = alpha_translate_code, + .get_tb_cpu_state = alpha_get_tb_cpu_state, .synchronize_from_tb = alpha_cpu_synchronize_from_tb, .restore_state_to_opc = alpha_restore_state_to_opc, + .mmu_index = alpha_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = alpha_cpu_record_sigsegv, .record_sigbus = alpha_cpu_record_sigbus, #else .tlb_fill = alpha_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = alpha_cpu_exec_interrupt, .cpu_exec_halt = alpha_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = alpha_cpu_do_interrupt, .do_transaction_failed = alpha_cpu_do_transaction_failed, .do_unaligned_access = alpha_cpu_do_unaligned_access, #endif /* !CONFIG_USER_ONLY */ }; -static void alpha_cpu_class_init(ObjectClass *oc, void *data) +static void alpha_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -262,12 +281,12 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data) &acc->parent_realize); cc->class_by_name = alpha_cpu_class_by_name; - cc->mmu_index = alpha_cpu_mmu_index; cc->dump_state = alpha_cpu_dump_state; cc->set_pc = alpha_cpu_set_pc; cc->get_pc = alpha_cpu_get_pc; cc->gdb_read_register = alpha_cpu_gdb_read_register; cc->gdb_write_register = alpha_cpu_gdb_write_register; + cc->gdb_core_xml_file = "alpha-core.xml"; #ifndef CONFIG_USER_ONLY dc->vmsd = &vmstate_alpha_cpu; cc->sysemu_ops = &alpha_sysemu_ops; diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index 80562ad..45944e4 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -21,7 +21,9 @@ #define ALPHA_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" #define ICACHE_LINE_SIZE 32 @@ -287,8 +289,6 @@ void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags); int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -#include "exec/cpu-all.h" - enum { FEATURE_ASN = 0x00000001, FEATURE_SPS = 0x00000002, @@ -464,17 +464,6 @@ void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MemTxResult response, uintptr_t retaddr); #endif -static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - *pc = env->pc; - *cs_base = 0; - *pflags = env->flags & ENV_FLAG_TB_MASK; -#ifdef CONFIG_USER_ONLY - *pflags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus; -#endif -} - #ifdef CONFIG_USER_ONLY /* Copied from linux ieee_swcr_to_fpcr. */ static inline uint64_t alpha_ieee_swcr_to_fpcr(uint64_t swcr) diff --git a/target/alpha/fpu_helper.c b/target/alpha/fpu_helper.c index 6aefb9b..30f3c7f 100644 --- a/target/alpha/fpu_helper.c +++ b/target/alpha/fpu_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" diff --git a/target/alpha/helper.c b/target/alpha/helper.c index 57cefcb..096eac3 100644 --- a/target/alpha/helper.c +++ b/target/alpha/helper.c @@ -22,9 +22,11 @@ #include "cpu.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "fpu/softfloat-types.h" #include "exec/helper-proto.h" #include "qemu/qemu-print.h" +#include "system/memory.h" #define CONVERT_BIT(X, SRC, DST) \ diff --git a/target/alpha/helper.h b/target/alpha/helper.h index d60f208..788d2fb 100644 --- a/target/alpha/helper.h +++ b/target/alpha/helper.h @@ -90,7 +90,6 @@ DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64) #if !defined (CONFIG_USER_ONLY) DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64) -DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env) DEF_HELPER_1(halt, void, i64) diff --git a/target/alpha/int_helper.c b/target/alpha/int_helper.c index 5672696..6bfe635 100644 --- a/target/alpha/int_helper.c +++ b/target/alpha/int_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" diff --git a/target/alpha/machine.c b/target/alpha/machine.c index f09834f..5f302b1 100644 --- a/target/alpha/machine.c +++ b/target/alpha/machine.c @@ -74,7 +74,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, AlphaCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, AlphaCPU, 1, vmstate_env, CPUAlphaState), VMSTATE_END_OF_LIST() }; diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c index 872955f..2113fe3 100644 --- a/target/alpha/mem_helper.c +++ b/target/alpha/mem_helper.c @@ -20,8 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retaddr) { diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c index 51e3254..87e3760 100644 --- a/target/alpha/sys_helper.c +++ b/target/alpha/sys_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/cputlb.h" -#include "exec/tb-flush.h" #include "exec/helper-proto.h" #include "system/runstate.h" #include "system/system.h" @@ -38,11 +37,6 @@ void helper_tbis(CPUAlphaState *env, uint64_t p) tlb_flush_page(env_cpu(env), p); } -void helper_tb_flush(CPUAlphaState *env) -{ - tb_flush(env_cpu(env)); -} - void helper_halt(uint64_t restart) { if (restart) { diff --git a/target/alpha/translate.c b/target/alpha/translate.c index 2156c02..f11b382 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -21,12 +21,12 @@ #include "cpu.h" #include "system/cpus.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/translator.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "exec/log.h" #define HELPER_H "helper.h" @@ -48,8 +48,6 @@ struct DisasContext { #ifdef CONFIG_USER_ONLY MemOp unalign; -#else - uint64_t palbr; #endif uint32_t tbflags; int mem_idx; @@ -1155,7 +1153,6 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) #else { TCGv tmp = tcg_temp_new(); - uint64_t entry; gen_pc_disp(ctx, tmp, 0); if (ctx->tbflags & ENV_FLAG_PAL_MODE) { @@ -1165,12 +1162,11 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) } tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUAlphaState, exc_addr)); - entry = ctx->palbr; - entry += (palcode & 0x80 - ? 0x2000 + (palcode - 0x80) * 64 - : 0x1000 + palcode * 64); - - tcg_gen_movi_i64(cpu_pc, entry); + tcg_gen_ld_i64(cpu_pc, tcg_env, offsetof(CPUAlphaState, palbr)); + tcg_gen_addi_i64(cpu_pc, cpu_pc, + palcode & 0x80 + ? 0x2000 + (palcode - 0x80) * 64 + : 0x1000 + palcode * 64); return DISAS_PC_UPDATED; } #endif @@ -1292,11 +1288,7 @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno) case 7: /* PALBR */ tcg_gen_st_i64(vb, tcg_env, offsetof(CPUAlphaState, palbr)); - /* Changing the PAL base register implies un-chaining all of the TBs - that ended with a CALL_PAL. Since the base register usually only - changes during boot, flushing everything works well. */ - gen_helper_tb_flush(tcg_env); - return DISAS_PC_STALE; + break; case 32 ... 39: /* Accessing the "non-shadow" general registers. */ @@ -2874,7 +2866,6 @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) ctx->ir = cpu_std_ir; ctx->unalign = (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN); #else - ctx->palbr = env->palbr; ctx->ir = (ctx->tbflags & ENV_FLAG_PAL_MODE ? cpu_pal_ir : cpu_std_ir); #endif diff --git a/target/alpha/vax_helper.c b/target/alpha/vax_helper.c index f94fb51..c1d201e 100644 --- a/target/alpha/vax_helper.c +++ b/target/alpha/vax_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index c40df4e..1dd7984 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -143,7 +143,6 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, return 0; } -#ifdef TARGET_AARCH64 static off_t sve_zreg_offset(uint32_t vq, int n) { off_t off = sizeof(struct aarch64_user_sve_header); @@ -231,7 +230,6 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, return 0; } -#endif int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, DumpState *s) @@ -273,11 +271,9 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, return ret; } -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sve, cpu)) { ret = aarch64_write_elf64_sve(f, env, cpuid, s); } -#endif return ret; } @@ -451,11 +447,9 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) if (class == ELFCLASS64) { note_size = AARCH64_PRSTATUS_NOTE_SIZE; note_size += AARCH64_PRFPREG_NOTE_SIZE; -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sve, cpu)) { note_size += AARCH64_SVE_NOTE_SIZE(&cpu->env); } -#endif } else { note_size = ARM_PRSTATUS_NOTE_SIZE; if (cpu_isar_feature(aa32_vfp_simd, cpu)) { diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index 20c70c7..a788376 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -17,24 +17,12 @@ #include "qemu/main-loop.h" #include "system/tcg.h" #include "target/arm/multiprocessing.h" - -#ifndef DEBUG_ARM_POWERCTL -#define DEBUG_ARM_POWERCTL 0 -#endif - -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_ARM_POWERCTL) { \ - fprintf(stderr, "[ARM]%s: " fmt , __func__, ##args); \ - } \ - } while (0) +#include "trace.h" CPUState *arm_get_cpu_by_id(uint64_t id) { CPUState *cpu; - DPRINTF("cpu %" PRId64 "\n", id); - CPU_FOREACH(cpu) { ARMCPU *armcpu = ARM_CPU(cpu); @@ -102,9 +90,9 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, assert(bql_locked()); - DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 - "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry, - context_id); + trace_arm_powerctl_set_cpu_on(cpuid, target_el, + target_aa64 ? "aarch64" : "aarch32", + entry, context_id); /* requested EL level need to be in the 1 to 3 range */ assert((target_el > 0) && (target_el < 4)); @@ -208,6 +196,8 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid) assert(bql_locked()); + trace_arm_powerctl_set_cpu_on_and_reset(cpuid); + /* Retrieve the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); if (!target_cpu_state) { @@ -261,7 +251,7 @@ int arm_set_cpu_off(uint64_t cpuid) assert(bql_locked()); - DPRINTF("cpu %" PRId64 "\n", cpuid); + trace_arm_powerctl_set_cpu_off(cpuid); /* change to the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); @@ -297,7 +287,7 @@ int arm_reset_cpu(uint64_t cpuid) assert(bql_locked()); - DPRINTF("cpu %" PRId64 "\n", cpuid); + trace_arm_powerctl_set_cpu_off(cpuid); /* change to the cpu we are resetting */ target_cpu_state = arm_get_cpu_by_id(cpuid); diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index 883c0a0..d292c97 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -21,15 +21,17 @@ */ #include "qemu/osdep.h" +#include "qemu/target-info.h" #include "hw/boards.h" #include "kvm_arm.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-commands-misc-arm.h" #include "qobject/qdict.h" #include "qom/qom-qobject.h" +#include "cpu.h" static GICCapability *gic_cap_new(int version) { @@ -46,7 +48,7 @@ static inline void gic_cap_kvm_probe(GICCapability *v2, GICCapability *v3) #ifdef CONFIG_KVM int fdarray[3]; - if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, NULL)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, NULL)) { return; } @@ -240,7 +242,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_ARM_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, arm_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.c index da51f2d..2b77ce9 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.c @@ -7,12 +7,12 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef TARGET_ARM_COMMON_SEMI_TARGET_H -#define TARGET_ARM_COMMON_SEMI_TARGET_H - +#include "qemu/osdep.h" +#include "cpu.h" +#include "semihosting/common-semi.h" #include "target/arm/cpu-qom.h" -static inline target_ulong common_semi_arg(CPUState *cs, int argno) +uint64_t common_semi_arg(CPUState *cs, int argno) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; @@ -23,7 +23,7 @@ static inline target_ulong common_semi_arg(CPUState *cs, int argno) } } -static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +void common_semi_set_ret(CPUState *cs, uint64_t ret) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; @@ -34,27 +34,25 @@ static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) } } -static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +bool common_semi_sys_exit_is_extended(CPUState *cs) { - return nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cpu_env(cs)); + return is_a64(cpu_env(cs)); } -static inline bool is_64bit_semihosting(CPUArchState *env) +bool is_64bit_semihosting(CPUArchState *env) { return is_a64(env); } -static inline target_ulong common_semi_stack_bottom(CPUState *cs) +uint64_t common_semi_stack_bottom(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; return is_a64(env) ? env->xregs[31] : env->regs[13]; } -static inline bool common_semi_has_synccache(CPUArchState *env) +bool common_semi_has_synccache(CPUArchState *env) { /* Ok for A64, invalid for A32/T32 */ return is_a64(env); } - -#endif diff --git a/target/arm/cpregs-gcs.c b/target/arm/cpregs-gcs.c new file mode 100644 index 0000000..1ed52a2 --- /dev/null +++ b/target/arm/cpregs-gcs.c @@ -0,0 +1,156 @@ +/* + * QEMU ARM CP Register GCS regiters and instructions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "exec/icount.h" +#include "hw/irq.h" +#include "cpu.h" +#include "cpu-features.h" +#include "cpregs.h" +#include "internals.h" + + +static CPAccessResult access_gcs(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_GCSEN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcs_el0(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 0 && !(env->cp15.gcscr_el[0] & GCSCRE0_NTR)) { + return CP_ACCESS_TRAP_EL1; + } + return access_gcs(env, ri, isread); +} + +static void gcspr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Bits [2:0] are RES0, so we might as well clear them now, + * rather than upon each usage a-la GetCurrentGCSPointer. + */ + raw_write(env, ri, value & ~7); +} + +static CPAccessResult access_gcspushm(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + if (!(env->cp15.gcscr_el[el] & GCSCR_PUSHMEN)) { + return CP_ACCESS_TRAP_BIT | (el ? el : 1); + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcspushx(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Trap if lock taken, and enabled. */ + if (!(env->pstate & PSTATE_EXLOCK)) { + int el = arm_current_el(env); + if (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN) { + return CP_ACCESS_EXLOCK; + } + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcspopcx(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Trap if lock not taken, and enabled. */ + if (env->pstate & PSTATE_EXLOCK) { + int el = arm_current_el(env); + if (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN) { + return CP_ACCESS_EXLOCK; + } + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo gcs_reginfo[] = { + { .name = "GCSCRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 2, + .access = PL1_RW, .accessfn = access_gcs, .fgt = FGT_NGCS_EL0, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[0]) }, + { .name = "GCSCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL1_RW, .accessfn = access_gcs, .fgt = FGT_NGCS_EL1, + .nv2_redirect_offset = 0x8d0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 5, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 5, 0), + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[1]) }, + { .name = "GCSCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL2_RW, .accessfn = access_gcs, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[2]) }, + { .name = "GCSCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[3]) }, + + { .name = "GCSPR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL0_R | PL1_W, .accessfn = access_gcs_el0, + .fgt = FGT_NGCS_EL0, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[0]) }, + { .name = "GCSPR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL1_RW, .accessfn = access_gcs, + .fgt = FGT_NGCS_EL1, .writefn = gcspr_write, + .nv2_redirect_offset = 0x8c0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 5, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 5, 1), + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[1]) }, + { .name = "GCSPR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL2_RW, .accessfn = access_gcs, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[2]) }, + { .name = "GCSPR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL3_RW, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[2]) }, + + { .name = "GCSPUSHM", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 0, + .access = PL0_W, .accessfn = access_gcspushm, + .fgt = FGT_NGCSPUSHM_EL1, .type = ARM_CP_GCSPUSHM }, + { .name = "GCSPOPM", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 1, + .access = PL0_R, .type = ARM_CP_GCSPOPM }, + { .name = "GCSSS1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 2, + .access = PL0_W, .type = ARM_CP_GCSSS1 }, + { .name = "GCSSS2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 3, + .access = PL0_R, .type = ARM_CP_GCSSS2 }, + { .name = "GCSPUSHX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 4, + .access = PL1_W, .accessfn = access_gcspushx, .fgt = FGT_NGCSEPP, + .type = ARM_CP_GCSPUSHX }, + { .name = "GCSPOPCX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 5, + .access = PL1_W, .accessfn = access_gcspopcx, .fgt = FGT_NGCSEPP, + .type = ARM_CP_GCSPOPCX }, + { .name = "GCSPOPX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 6, + .access = PL1_W, .type = ARM_CP_GCSPOPX }, +}; + +void define_gcs_cpregs(ARMCPU *cpu) +{ + if (cpu_isar_feature(aa64_gcs, cpu)) { + define_arm_cp_regs(cpu, gcs_reginfo); + } +} diff --git a/target/arm/cpregs-pmu.c b/target/arm/cpregs-pmu.c new file mode 100644 index 0000000..31c01ed --- /dev/null +++ b/target/arm/cpregs-pmu.c @@ -0,0 +1,1346 @@ +/* + * QEMU ARM CP Register PMU insns + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "exec/icount.h" +#include "hw/irq.h" +#include "cpu.h" +#include "cpu-features.h" +#include "cpregs.h" +#include "internals.h" + + +#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ + +/* + * Check for traps to performance monitor registers, which are controlled + * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. + */ +static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + + if (el < 2 && (mdcr_el2 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +typedef struct pm_event { + uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */ + /* If the event is supported on this CPU (used to generate PMCEID[01]) */ + bool (*supported)(CPUARMState *); + /* + * Retrieve the current count of the underlying event. The programmed + * counters hold a difference from the return value from this function + */ + uint64_t (*get_count)(CPUARMState *); + /* + * Return how many nanoseconds it will take (at a minimum) for count events + * to occur. A negative value indicates the counter will never overflow, or + * that the counter has otherwise arranged for the overflow bit to be set + * and the PMU interrupt to be raised on overflow. + */ + int64_t (*ns_per_count)(uint64_t); +} pm_event; + +static bool event_always_supported(CPUARMState *env) +{ + return true; +} + +static uint64_t swinc_get_count(CPUARMState *env) +{ + /* + * SW_INCR events are written directly to the pmevcntr's by writes to + * PMSWINC, so there is no underlying count maintained by the PMU itself + */ + return 0; +} + +static int64_t swinc_ns_per(uint64_t ignored) +{ + return -1; +} + +/* + * Return the underlying cycle count for the PMU cycle counters. If we're in + * usermode, simply return 0. + */ +static uint64_t cycles_get_count(CPUARMState *env) +{ +#ifndef CONFIG_USER_ONLY + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + ARM_CPU_FREQ, NANOSECONDS_PER_SECOND); +#else + return cpu_get_host_ticks(); +#endif +} + +#ifndef CONFIG_USER_ONLY +static int64_t cycles_ns_per(uint64_t cycles) +{ + return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles; +} + +static bool instructions_supported(CPUARMState *env) +{ + /* Precise instruction counting */ + return icount_enabled() == ICOUNT_PRECISE; +} + +static uint64_t instructions_get_count(CPUARMState *env) +{ + assert(icount_enabled() == ICOUNT_PRECISE); + return (uint64_t)icount_get_raw(); +} + +static int64_t instructions_ns_per(uint64_t icount) +{ + assert(icount_enabled() == ICOUNT_PRECISE); + return icount_to_ns((int64_t)icount); +} +#endif + +static bool pmuv3p1_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmuv3p1, env_archcpu(env)); +} + +static bool pmuv3p4_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmuv3p4, env_archcpu(env)); +} + +static uint64_t zero_event_get_count(CPUARMState *env) +{ + /* For events which on QEMU never fire, so their count is always zero */ + return 0; +} + +static int64_t zero_event_ns_per(uint64_t cycles) +{ + /* An event which never fires can never overflow */ + return -1; +} + +static const pm_event pm_events[] = { + { .number = 0x000, /* SW_INCR */ + .supported = event_always_supported, + .get_count = swinc_get_count, + .ns_per_count = swinc_ns_per, + }, +#ifndef CONFIG_USER_ONLY + { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */ + .supported = instructions_supported, + .get_count = instructions_get_count, + .ns_per_count = instructions_ns_per, + }, + { .number = 0x011, /* CPU_CYCLES, Cycle */ + .supported = event_always_supported, + .get_count = cycles_get_count, + .ns_per_count = cycles_ns_per, + }, +#endif + { .number = 0x023, /* STALL_FRONTEND */ + .supported = pmuv3p1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x024, /* STALL_BACKEND */ + .supported = pmuv3p1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x03c, /* STALL */ + .supported = pmuv3p4_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, +}; + +/* + * Note: Before increasing MAX_EVENT_ID beyond 0x3f into the 0x40xx range of + * events (i.e. the statistical profiling extension), this implementation + * should first be updated to something sparse instead of the current + * supported_event_map[] array. + */ +#define MAX_EVENT_ID 0x3c +#define UNSUPPORTED_EVENT UINT16_MAX +static uint16_t supported_event_map[MAX_EVENT_ID + 1]; + +/* + * Called upon CPU initialization to initialize PMCEID[01]_EL0 and build a map + * of ARM event numbers to indices in our pm_events array. + * + * Note: Events in the 0x40XX range are not currently supported. + */ +void pmu_init(ARMCPU *cpu) +{ + unsigned int i; + + /* + * Empty supported_event_map and cpu->pmceid[01] before adding supported + * events to them + */ + for (i = 0; i < ARRAY_SIZE(supported_event_map); i++) { + supported_event_map[i] = UNSUPPORTED_EVENT; + } + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; + + for (i = 0; i < ARRAY_SIZE(pm_events); i++) { + const pm_event *cnt = &pm_events[i]; + assert(cnt->number <= MAX_EVENT_ID); + /* We do not currently support events in the 0x40xx range */ + assert(cnt->number <= 0x3f); + + if (cnt->supported(&cpu->env)) { + supported_event_map[cnt->number] = i; + uint64_t event_mask = 1ULL << (cnt->number & 0x1f); + if (cnt->number & 0x20) { + cpu->pmceid1 |= event_mask; + } else { + cpu->pmceid0 |= event_mask; + } + } + } +} + +/* + * Check at runtime whether a PMU event is supported for the current machine + */ +static bool event_supported(uint16_t number) +{ + if (number > MAX_EVENT_ID) { + return false; + } + return supported_event_map[number] != UNSUPPORTED_EVENT; +} + +static CPAccessResult do_pmreg_access(CPUARMState *env, bool is_pmcr) +{ + /* + * Performance monitor registers user accessibility is controlled + * by PMUSERENR. MDCR_EL2.TPM/TPMCR and MDCR_EL3.TPM allow configurable + * trapping to EL2 or EL3 for other accesses. + */ + int el = arm_current_el(env); + + if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) { + return CP_ACCESS_TRAP_EL1; + } + if (el < 2) { + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + + if (mdcr_el2 & MDCR_TPM) { + return CP_ACCESS_TRAP_EL2; + } + if (is_pmcr && (mdcr_el2 & MDCR_TPMCR)) { + return CP_ACCESS_TRAP_EL2; + } + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL3; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_pmreg_access(env, false); +} + +static CPAccessResult pmreg_access_pmcr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + return do_pmreg_access(env, true); +} + +static CPAccessResult pmreg_access_xevcntr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* ER: event counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 3)) != 0 + && isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_swinc(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* SW: software increment write trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 1)) != 0 + && !isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_selr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* ER: event counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 3)) != 0) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_ccntr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* CR: cycle counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 2)) != 0 + && isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +/* + * Returns true if the counter (pass 31 for PMCCNTR) should count events using + * the current EL, security state, and register configuration. + */ +static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) +{ + uint64_t filter; + bool e, p, u, nsk, nsu, nsh, m; + bool enabled, prohibited = false, filtered; + bool secure = arm_is_secure(env); + int el = arm_current_el(env); + uint64_t mdcr_el2; + uint8_t hpmn; + + /* + * We might be called for M-profile cores where MDCR_EL2 doesn't + * exist and arm_mdcr_el2_eff() will assert, so this early-exit check + * must be before we read that value. + */ + if (!arm_feature(env, ARM_FEATURE_PMU)) { + return false; + } + + mdcr_el2 = arm_mdcr_el2_eff(env); + hpmn = mdcr_el2 & MDCR_HPMN; + + if (!arm_feature(env, ARM_FEATURE_EL2) || + (counter < hpmn || counter == 31)) { + e = env->cp15.c9_pmcr & PMCRE; + } else { + e = mdcr_el2 & MDCR_HPME; + } + enabled = e && (env->cp15.c9_pmcnten & (1 << counter)); + + /* Is event counting prohibited? */ + if (el == 2 && (counter < hpmn || counter == 31)) { + prohibited = mdcr_el2 & MDCR_HPMD; + } + if (secure) { + prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME); + } + + if (counter == 31) { + /* + * The cycle counter defaults to running. PMCR.DP says "disable + * the cycle counter when event counting is prohibited". + * Some MDCR bits disable the cycle counter specifically. + */ + prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP; + if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + if (secure) { + prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD); + } + if (el == 2) { + prohibited = prohibited || (mdcr_el2 & MDCR_HCCD); + } + } + } + + if (counter == 31) { + filter = env->cp15.pmccfiltr_el0; + } else { + filter = env->cp15.c14_pmevtyper[counter]; + } + + p = filter & PMXEVTYPER_P; + u = filter & PMXEVTYPER_U; + nsk = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSK); + nsu = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSU); + nsh = arm_feature(env, ARM_FEATURE_EL2) && (filter & PMXEVTYPER_NSH); + m = arm_el_is_aa64(env, 1) && + arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_M); + + if (el == 0) { + filtered = secure ? u : u != nsu; + } else if (el == 1) { + filtered = secure ? p : p != nsk; + } else if (el == 2) { + filtered = !nsh; + } else { /* EL3 */ + filtered = m != p; + } + + if (counter != 31) { + /* + * If not checking PMCCNTR, ensure the counter is setup to an event we + * support + */ + uint16_t event = filter & PMXEVTYPER_EVTCOUNT; + if (!event_supported(event)) { + return false; + } + } + + return enabled && !prohibited && !filtered; +} + +static void pmu_update_irq(CPUARMState *env) +{ + ARMCPU *cpu = env_archcpu(env); + qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) && + (env->cp15.c9_pminten & env->cp15.c9_pmovsr)); +} + +static bool pmccntr_clockdiv_enabled(CPUARMState *env) +{ + /* + * Return true if the clock divider is enabled and the cycle counter + * is supposed to tick only once every 64 clock cycles. This is + * controlled by PMCR.D, but if PMCR.LC is set to enable the long + * (64-bit) cycle counter PMCR.D has no effect. + */ + return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD; +} + +static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) +{ + /* Return true if the specified event counter is configured to be 64 bit */ + + /* This isn't intended to be used with the cycle counter */ + assert(counter < 31); + + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + return false; + } + + if (arm_feature(env, ARM_FEATURE_EL2)) { + /* + * MDCR_EL2.HLP still applies even when EL2 is disabled in the + * current security state, so we don't use arm_mdcr_el2_eff() here. + */ + bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; + int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; + + if (counter >= hpmn) { + return hlp; + } + } + return env->cp15.c9_pmcr & PMCRLP; +} + +/* + * Ensure c15_ccnt is the guest-visible count so that operations such as + * enabling/disabling the counter or filtering, modifying the count itself, + * etc. can be done logically. This is essentially a no-op if the counter is + * not enabled at the time of the call. + */ +static void pmccntr_op_start(CPUARMState *env) +{ + uint64_t cycles = cycles_get_count(env); + + if (pmu_counter_enabled(env, 31)) { + uint64_t eff_cycles = cycles; + if (pmccntr_clockdiv_enabled(env)) { + eff_cycles /= 64; + } + + uint64_t new_pmccntr = eff_cycles - env->cp15.c15_ccnt_delta; + + uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \ + 1ull << 63 : 1ull << 31; + if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) { + env->cp15.c9_pmovsr |= (1ULL << 31); + pmu_update_irq(env); + } + + env->cp15.c15_ccnt = new_pmccntr; + } + env->cp15.c15_ccnt_delta = cycles; +} + +/* + * If PMCCNTR is enabled, recalculate the delta between the clock and the + * guest-visible count. A call to pmccntr_op_finish should follow every call to + * pmccntr_op_start. + */ +static void pmccntr_op_finish(CPUARMState *env) +{ + if (pmu_counter_enabled(env, 31)) { +#ifndef CONFIG_USER_ONLY + /* Calculate when the counter will next overflow */ + uint64_t remaining_cycles = -env->cp15.c15_ccnt; + if (!(env->cp15.c9_pmcr & PMCRLC)) { + remaining_cycles = (uint32_t)remaining_cycles; + } + int64_t overflow_in = cycles_ns_per(remaining_cycles); + + if (overflow_in > 0) { + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } + } +#endif + + uint64_t prev_cycles = env->cp15.c15_ccnt_delta; + if (pmccntr_clockdiv_enabled(env)) { + prev_cycles /= 64; + } + env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt; + } +} + +static void pmevcntr_op_start(CPUARMState *env, uint8_t counter) +{ + + uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; + uint64_t count = 0; + if (event_supported(event)) { + uint16_t event_idx = supported_event_map[event]; + count = pm_events[event_idx].get_count(env); + } + + if (pmu_counter_enabled(env, counter)) { + uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; + uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ? + 1ULL << 63 : 1ULL << 31; + + if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) { + env->cp15.c9_pmovsr |= (1 << counter); + pmu_update_irq(env); + } + env->cp15.c14_pmevcntr[counter] = new_pmevcntr; + } + env->cp15.c14_pmevcntr_delta[counter] = count; +} + +static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter) +{ + if (pmu_counter_enabled(env, counter)) { +#ifndef CONFIG_USER_ONLY + uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; + uint16_t event_idx = supported_event_map[event]; + uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1); + int64_t overflow_in; + + if (!pmevcntr_is_64_bit(env, counter)) { + delta = (uint32_t)delta; + } + overflow_in = pm_events[event_idx].ns_per_count(delta); + + if (overflow_in > 0) { + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } + } +#endif + + env->cp15.c14_pmevcntr_delta[counter] -= + env->cp15.c14_pmevcntr[counter]; + } +} + +void pmu_op_start(CPUARMState *env) +{ + unsigned int i; + pmccntr_op_start(env); + for (i = 0; i < pmu_num_counters(env); i++) { + pmevcntr_op_start(env, i); + } +} + +void pmu_op_finish(CPUARMState *env) +{ + unsigned int i; + pmccntr_op_finish(env); + for (i = 0; i < pmu_num_counters(env); i++) { + pmevcntr_op_finish(env, i); + } +} + +void pmu_pre_el_change(ARMCPU *cpu, void *ignored) +{ + pmu_op_start(&cpu->env); +} + +void pmu_post_el_change(ARMCPU *cpu, void *ignored) +{ + pmu_op_finish(&cpu->env); +} + +void arm_pmu_timer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + /* + * Update all the counter values based on the current underlying counts, + * triggering interrupts to be raised, if necessary. pmu_op_finish() also + * has the effect of setting the cpu->pmu_timer to the next earliest time a + * counter may expire. + */ + pmu_op_start(&cpu->env); + pmu_op_finish(&cpu->env); +} + +static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + + if (value & PMCRC) { + /* The counter has been reset */ + env->cp15.c15_ccnt = 0; + } + + if (value & PMCRP) { + unsigned int i; + for (i = 0; i < pmu_num_counters(env); i++) { + env->cp15.c14_pmevcntr[i] = 0; + } + } + + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; + env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); + + pmu_op_finish(env); +} + +static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t pmcr = env->cp15.c9_pmcr; + + /* + * If EL2 is implemented and enabled for the current security state, reads + * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN. + */ + if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) { + pmcr &= ~PMCRN_MASK; + pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT; + } + + return pmcr; +} + +static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned int i; + uint64_t overflow_mask, new_pmswinc; + + for (i = 0; i < pmu_num_counters(env); i++) { + /* Increment a counter's count iff: */ + if ((value & (1 << i)) && /* counter's bit is set */ + /* counter is enabled and not filtered */ + pmu_counter_enabled(env, i) && + /* counter is SW_INCR */ + (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) { + pmevcntr_op_start(env, i); + + /* + * Detect if this write causes an overflow since we can't predict + * PMSWINC overflows like we can for other events + */ + new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; + + overflow_mask = pmevcntr_is_64_bit(env, i) ? + 1ULL << 63 : 1ULL << 31; + + if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) { + env->cp15.c9_pmovsr |= (1 << i); + pmu_update_irq(env); + } + + env->cp15.c14_pmevcntr[i] = new_pmswinc; + + pmevcntr_op_finish(env, i); + } + } +} + +static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t ret; + pmccntr_op_start(env); + ret = env->cp15.c15_ccnt; + pmccntr_op_finish(env); + return ret; +} + +static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and + * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the + * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are + * accessed. + */ + env->cp15.c9_pmselr = value & 0x1f; +} + +static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + env->cp15.c15_ccnt = value; + pmccntr_op_finish(env); +} + +static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t cur_val = pmccntr_read(env, NULL); + + pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value)); +} + +static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0; + pmccntr_op_finish(env); +} + +static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + /* M is not accessible from AArch32 */ + env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) | + (value & PMCCFILTR); + pmccntr_op_finish(env); +} + +static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* M is not visible in AArch32 */ + return env->cp15.pmccfiltr_el0 & PMCCFILTR; +} + +static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + value &= pmu_counter_mask(env); + env->cp15.c9_pmcnten |= value; + pmu_op_finish(env); +} + +static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + value &= pmu_counter_mask(env); + env->cp15.c9_pmcnten &= ~value; + pmu_op_finish(env); +} + +static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pmovsr &= ~value; + pmu_update_irq(env); +} + +static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pmovsr |= value; + pmu_update_irq(env); +} + +static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value, const uint8_t counter) +{ + if (counter == 31) { + pmccfiltr_write(env, ri, value); + } else if (counter < pmu_num_counters(env)) { + pmevcntr_op_start(env, counter); + + /* + * If this counter's event type is changing, store the current + * underlying count for the new type in c14_pmevcntr_delta[counter] so + * pmevcntr_op_finish has the correct baseline when it converts back to + * a delta. + */ + uint16_t old_event = env->cp15.c14_pmevtyper[counter] & + PMXEVTYPER_EVTCOUNT; + uint16_t new_event = value & PMXEVTYPER_EVTCOUNT; + if (old_event != new_event) { + uint64_t count = 0; + if (event_supported(new_event)) { + uint16_t event_idx = supported_event_map[new_event]; + count = pm_events[event_idx].get_count(env); + } + env->cp15.c14_pmevcntr_delta[counter] = count; + } + + env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; + pmevcntr_op_finish(env, counter); + } + /* + * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when + * PMSELR value is equal to or greater than the number of implemented + * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. + */ +} + +static uint64_t pmevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri, + const uint8_t counter) +{ + if (counter == 31) { + return env->cp15.pmccfiltr_el0; + } else if (counter < pmu_num_counters(env)) { + return env->cp15.c14_pmevtyper[counter]; + } else { + /* + * We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER + * are CONSTRAINED UNPREDICTABLE. See comments in pmevtyper_write(). + */ + return 0; + } +} + +static void pmevtyper_writefn(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + pmevtyper_write(env, ri, value, counter); +} + +static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + env->cp15.c14_pmevtyper[counter] = value; + + /* + * pmevtyper_rawwrite is called between a pair of pmu_op_start and + * pmu_op_finish calls when loading saved state for a migration. Because + * we're potentially updating the type of event here, the value written to + * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a + * different counter type. Therefore, we need to set this value to the + * current count for the counter type we're writing so that pmu_op_finish + * has the correct count for its calculation. + */ + uint16_t event = value & PMXEVTYPER_EVTCOUNT; + if (event_supported(event)) { + uint16_t event_idx = supported_event_map[event]; + env->cp15.c14_pmevcntr_delta[counter] = + pm_events[event_idx].get_count(env); + } +} + +static uint64_t pmevtyper_readfn(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + return pmevtyper_read(env, ri, counter); +} + +static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmevtyper_write(env, ri, value, env->cp15.c9_pmselr & 31); +} + +static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return pmevtyper_read(env, ri, env->cp15.c9_pmselr & 31); +} + +static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value, uint8_t counter) +{ + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + value &= MAKE_64BIT_MASK(0, 32); + } + if (counter < pmu_num_counters(env)) { + pmevcntr_op_start(env, counter); + env->cp15.c14_pmevcntr[counter] = value; + pmevcntr_op_finish(env, counter); + } + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ +} + +static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, + uint8_t counter) +{ + if (counter < pmu_num_counters(env)) { + uint64_t ret; + pmevcntr_op_start(env, counter); + ret = env->cp15.c14_pmevcntr[counter]; + pmevcntr_op_finish(env, counter); + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + ret &= MAKE_64BIT_MASK(0, 32); + } + return ret; + } else { + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ + return 0; + } +} + +static void pmevcntr_writefn(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + pmevcntr_write(env, ri, value, counter); +} + +static uint64_t pmevcntr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + return pmevcntr_read(env, ri, counter); +} + +static void pmevcntr_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + assert(counter < pmu_num_counters(env)); + env->cp15.c14_pmevcntr[counter] = value; + pmevcntr_write(env, ri, value, counter); +} + +static uint64_t pmevcntr_rawread(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + assert(counter < pmu_num_counters(env)); + return env->cp15.c14_pmevcntr[counter]; +} + +static void pmxevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmevcntr_write(env, ri, value, env->cp15.c9_pmselr & 31); +} + +static uint64_t pmxevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return pmevcntr_read(env, ri, env->cp15.c9_pmselr & 31); +} + +static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + if (arm_feature(env, ARM_FEATURE_V8)) { + env->cp15.c9_pmuserenr = value & 0xf; + } else { + env->cp15.c9_pmuserenr = value & 1; + } +} + +static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* We have no event counters so only the C bit can be changed */ + value &= pmu_counter_mask(env); + env->cp15.c9_pminten |= value; + pmu_update_irq(env); +} + +static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pminten &= ~value; + pmu_update_irq(env); +} + +static const ARMCPRegInfo v7_pm_reginfo[] = { + /* + * Performance monitors are implementation defined in v7, + * but with an ARM recommended set of registers, which we + * follow. + * + * Performance registers fall into three categories: + * (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR) + * (b) RO in PL0 (ie UNDEF on write), RW in PL1 (PMUSERENR) + * (c) UNDEF in PL0 if PMUSERENR.EN==0, otherwise accessible (all others) + * For the cases controlled by PMUSERENR we must set .access to PL0_RW + * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. + */ + { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenset_write, + .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .raw_writefn = raw_write }, + { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, + .writefn = pmcntenset_write, .raw_writefn = raw_write }, + { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .writefn = pmcntenclr_write, .raw_writefn = raw_write, + .type = ARM_CP_ALIAS | ARM_CP_IO }, + { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenclr_write, .raw_writefn = raw_write }, + { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, + .access = PL0_RW, .type = ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), + .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, + .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, + .writefn = pmswinc_write }, + { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4, + .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, + .writefn = pmswinc_write }, + { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, + .access = PL0_RW, .type = ARM_CP_ALIAS, + .fgt = FGT_PMSELR_EL0, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr), + .accessfn = pmreg_access_selr, .writefn = pmselr_write, + .raw_writefn = raw_write}, + { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5, + .access = PL0_RW, .accessfn = pmreg_access_selr, + .fgt = FGT_PMSELR_EL0, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr), + .writefn = pmselr_write, .raw_writefn = raw_write, }, + { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, + .fgt = FGT_PMCCNTR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt), + .readfn = pmccntr_read, .writefn = pmccntr_write, + .raw_readfn = raw_read, .raw_writefn = raw_write, }, + { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7, + .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .resetvalue = 0, }, + { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, + .writefn = pmccfiltr_write, .raw_writefn = raw_write, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), + .resetvalue = 0, }, + { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, + { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, + { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, + .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, + { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, + .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, + { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, + .access = PL0_R | PL1_RW, .accessfn = access_tpm, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmuserenr), + .resetvalue = 0, + .writefn = pmuserenr_write, .raw_writefn = raw_write }, + { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 0, + .access = PL0_R | PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), + .resetvalue = 0, + .writefn = pmuserenr_write, .raw_writefn = raw_write }, + { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten), + .resetvalue = 0, + .writefn = pmintenset_write, .raw_writefn = raw_write }, + { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenset_write, .raw_writefn = raw_write, + .resetvalue = 0x0 }, + { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenclr_write, .raw_writefn = raw_write }, + { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenclr_write, .raw_writefn = raw_write }, +}; + +static const ARMCPRegInfo pmovsset_cp_reginfo[] = { + /* PMOVSSET is not implemented in v7 before v7ve */ + { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsset_write, + .raw_writefn = raw_write }, + { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsset_write, + .raw_writefn = raw_write }, +}; + +void define_pm_cpregs(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_V7)) { + /* + * v7 performance monitor control register: same implementor + * field as main ID register, and we implement four counters in + * addition to the cycle count register. + */ + static const ARMCPRegInfo pmcr = { + .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, + .fgt = FGT_PMCR_EL0, + .type = ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), + .accessfn = pmreg_access_pmcr, + .readfn = pmcr_read, .raw_readfn = raw_read, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + const ARMCPRegInfo pmcr64 = { + .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_pmcr, + .fgt = FGT_PMCR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), + .resetvalue = cpu->isar.reset_pmcr_el0, + .readfn = pmcr_read, .raw_readfn = raw_read, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + + define_one_arm_cp_reg(cpu, &pmcr); + define_one_arm_cp_reg(cpu, &pmcr64); + define_arm_cp_regs(cpu, v7_pm_reginfo); + /* + * 32-bit AArch32 PMCCNTR. We don't expose this to GDB if the + * new-in-v8 PMUv3 64-bit AArch32 PMCCNTR register is implemented + * (as that will provide the GDB user's view of "PMCCNTR"). + */ + ARMCPRegInfo pmccntr = { + .name = "PMCCNTR", + .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, + .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO, + .fgt = FGT_PMCCNTR_EL0, + .readfn = pmccntr_read, .writefn = pmccntr_write32, + }; + if (arm_feature(env, ARM_FEATURE_V8)) { + pmccntr.type |= ARM_CP_NO_GDB; + } + define_one_arm_cp_reg(cpu, &pmccntr); + + for (unsigned i = 0, pmcrn = pmu_num_counters(env); i < pmcrn; i++) { + g_autofree char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); + g_autofree char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); + g_autofree char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); + g_autofree char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); + + ARMCPRegInfo pmev_regs[] = { + { .name = pmevcntr_name, .cp = 15, .crn = 14, + .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVCNTRN_EL0, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .accessfn = pmreg_access_xevcntr }, + { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr, + .type = ARM_CP_IO, + .fgt = FGT_PMEVCNTRN_EL0, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .raw_readfn = pmevcntr_rawread, + .raw_writefn = pmevcntr_rawwrite }, + { .name = pmevtyper_name, .cp = 15, .crn = 14, + .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVTYPERN_EL0, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .accessfn = pmreg_access }, + { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .type = ARM_CP_IO, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .raw_writefn = pmevtyper_rawwrite }, + }; + define_arm_cp_regs(cpu, pmev_regs); + } + } + if (arm_feature(env, ARM_FEATURE_V7VE)) { + define_arm_cp_regs(cpu, pmovsset_cp_reginfo); + } + + if (arm_feature(env, ARM_FEATURE_V8)) { + const ARMCPRegInfo v8_pm_reginfo[] = { + { .name = "PMCEID0", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid0, 0, 32) }, + { .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = cpu->pmceid0 }, + { .name = "PMCEID1", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid1, 0, 32) }, + { .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = cpu->pmceid1 }, + /* AArch32 64-bit PMCCNTR view: added in PMUv3 with Armv8 */ + { .name = "PMCCNTR", .state = ARM_CP_STATE_AA32, + .cp = 15, .crm = 9, .opc1 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, .resetvalue = 0, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_64BIT, + .fgt = FGT_PMCCNTR_EL0, .readfn = pmccntr_read, + .writefn = pmccntr_write, }, + }; + define_arm_cp_regs(cpu, v8_pm_reginfo); + } + + if (cpu_isar_feature(aa32_pmuv3p1, cpu)) { + ARMCPRegInfo v81_pmu_regs[] = { + { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid0, 32, 32) }, + { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid1, 32, 32) }, + }; + define_arm_cp_regs(cpu, v81_pmu_regs); + } + + if (cpu_isar_feature(any_pmuv3p4, cpu)) { + static const ARMCPRegInfo v84_pmmir = { + .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, + .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMMIR_EL1, + .resetvalue = 0 + }; + define_one_arm_cp_reg(cpu, &v84_pmmir); + } +} diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h index 2183de8..763de5e 100644 --- a/target/arm/cpregs.h +++ b/target/arm/cpregs.h @@ -22,7 +22,9 @@ #define TARGET_ARM_CPREGS_H #include "hw/registerfields.h" +#include "exec/memop.h" #include "target/arm/kvm-consts.h" +#include "cpu.h" /* * ARMCPRegInfo type field bits: @@ -45,6 +47,14 @@ enum { ARM_CP_DC_ZVA = 0x0005, ARM_CP_DC_GVA = 0x0006, ARM_CP_DC_GZVA = 0x0007, + /* Special: gcs instructions */ + ARM_CP_GCSPUSHM = 0x0008, + ARM_CP_GCSPOPM = 0x0009, + ARM_CP_GCSPUSHX = 0x000a, + ARM_CP_GCSPOPX = 0x000b, + ARM_CP_GCSPOPCX = 0x000c, + ARM_CP_GCSSS1 = 0x000d, + ARM_CP_GCSSS2 = 0x000e, /* Flag: reads produce resetvalue; writes ignored. */ ARM_CP_CONST = 1 << 4, @@ -134,6 +144,11 @@ enum { * identically to the normal one, other than FGT trapping handling.) */ ARM_CP_ADD_TLBI_NXS = 1 << 21, + /* + * Flag: even though this sysreg has opc1 == 4 or 5, it + * should not trap to EL2 when HCR_EL2.NV is set. + */ + ARM_CP_NV_NO_TRAP = 1 << 22, }; /* @@ -173,16 +188,20 @@ enum { * add a bit to distinguish between secure and non-secure cpregs in the * hashtable. */ -#define CP_REG_NS_SHIFT 29 -#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT) +#define CP_REG_AA32_NS_SHIFT 29 +#define CP_REG_AA32_NS_MASK (1 << CP_REG_AA32_NS_SHIFT) + +/* Distinguish 32-bit and 64-bit views of AArch32 system registers. */ +#define CP_REG_AA32_64BIT_SHIFT 15 +#define CP_REG_AA32_64BIT_MASK (1 << CP_REG_AA32_64BIT_SHIFT) #define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \ - ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \ - ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) + (((ns) << CP_REG_AA32_NS_SHIFT) | \ + ((is64) << CP_REG_AA32_64BIT_SHIFT) | \ + ((cp) << 16) | ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) -#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \ - (CP_REG_AA64_MASK | \ - ((cp) << CP_REG_ARM_COPROC_SHIFT) | \ +#define ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2) \ + (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \ ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ @@ -200,14 +219,14 @@ static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid) cpregid |= CP_REG_AA64_MASK; } else { if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) { - cpregid |= (1 << 15); + cpregid |= CP_REG_AA32_64BIT_MASK; } /* * KVM is always non-secure so add the NS flag on AArch32 register * entries. */ - cpregid |= 1 << CP_REG_NS_SHIFT; + cpregid |= CP_REG_AA32_NS_MASK; } return cpregid; } @@ -224,8 +243,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) kvmid = cpregid & ~CP_REG_AA64_MASK; kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64; } else { - kvmid = cpregid & ~(1 << 15); - if (cpregid & (1 << 15)) { + kvmid = cpregid & ~CP_REG_AA32_64BIT_MASK; + if (cpregid & CP_REG_AA32_64BIT_MASK) { kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM; } else { kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM; @@ -345,6 +364,14 @@ typedef enum CPAccessResult { * specified target EL. */ CP_ACCESS_UNDEFINED = (2 << 2), + + /* + * Access fails with EXLOCK, a GCS exception syndrome. + * These traps are always to the current execution EL, + * which is the same as the usual target EL because + * they cannot occur from EL0. + */ + CP_ACCESS_EXLOCK = (3 << 2), } CPAccessResult; /* Indexes into fgt_read[] */ @@ -407,10 +434,19 @@ FIELD(HFGRTR_EL2, ERXPFGCTL_EL1, 47, 1) FIELD(HFGRTR_EL2, ERXPFGCDN_EL1, 48, 1) FIELD(HFGRTR_EL2, ERXADDR_EL1, 49, 1) FIELD(HFGRTR_EL2, NACCDATA_EL1, 50, 1) -/* 51-53: RES0 */ +/* 51: RES0 */ +FIELD(HFGRTR_EL2, NGCS_EL0, 52, 1) +FIELD(HFGRTR_EL2, NGCS_EL1, 53, 1) FIELD(HFGRTR_EL2, NSMPRI_EL1, 54, 1) FIELD(HFGRTR_EL2, NTPIDR2_EL0, 55, 1) -/* 56-63: RES0 */ +FIELD(HFGRTR_EL2, NRCWMASK_EL1, 56, 1) +FIELD(HFGRTR_EL2, NPIRE0_EL1, 57, 1) +FIELD(HFGRTR_EL2, NPIR_EL1, 58, 1) +FIELD(HFGRTR_EL2, NPOR_EL0, 59, 1) +FIELD(HFGRTR_EL2, NPOR_EL1, 60, 1) +FIELD(HFGRTR_EL2, NS2POR_EL1, 61, 1) +FIELD(HFGRTR_EL2, NMAIR2_EL1, 62, 1) +FIELD(HFGRTR_EL2, NAMAIR2_EL1, 63, 1) /* These match HFGRTR but bits for RO registers are RES0 */ FIELD(HFGWTR_EL2, AFSR0_EL1, 0, 1) @@ -451,8 +487,18 @@ FIELD(HFGWTR_EL2, ERXPFGCTL_EL1, 47, 1) FIELD(HFGWTR_EL2, ERXPFGCDN_EL1, 48, 1) FIELD(HFGWTR_EL2, ERXADDR_EL1, 49, 1) FIELD(HFGWTR_EL2, NACCDATA_EL1, 50, 1) +FIELD(HFGWTR_EL2, NGCS_EL0, 52, 1) +FIELD(HFGWTR_EL2, NGCS_EL1, 53, 1) FIELD(HFGWTR_EL2, NSMPRI_EL1, 54, 1) FIELD(HFGWTR_EL2, NTPIDR2_EL0, 55, 1) +FIELD(HFGWTR_EL2, NRCWMASK_EL1, 56, 1) +FIELD(HFGWTR_EL2, NPIRE0_EL1, 57, 1) +FIELD(HFGWTR_EL2, NPIR_EL1, 58, 1) +FIELD(HFGWTR_EL2, NPOR_EL0, 59, 1) +FIELD(HFGWTR_EL2, NPOR_EL1, 60, 1) +FIELD(HFGWTR_EL2, NS2POR_EL1, 61, 1) +FIELD(HFGWTR_EL2, NMAIR2_EL1, 62, 1) +FIELD(HFGWTR_EL2, NAMAIR2_EL1, 63, 1) FIELD(HFGITR_EL2, ICIALLUIS, 0, 1) FIELD(HFGITR_EL2, ICIALLU, 1, 1) @@ -511,6 +557,11 @@ FIELD(HFGITR_EL2, SVC_EL1, 53, 1) FIELD(HFGITR_EL2, DCCVAC, 54, 1) FIELD(HFGITR_EL2, NBRBINJ, 55, 1) FIELD(HFGITR_EL2, NBRBIALL, 56, 1) +FIELD(HFGITR_EL2, NGCSPUSHM_EL1, 57, 1) +FIELD(HFGITR_EL2, NGCSSTR_EL1, 58, 1) +FIELD(HFGITR_EL2, NGCSEPP, 59, 1) +FIELD(HFGITR_EL2, COSPRCTX, 60, 1) +FIELD(HFGITR_EL2, ATS1E1A, 62, 1) FIELD(HDFGRTR_EL2, DBGBCRN_EL1, 0, 1) FIELD(HDFGRTR_EL2, DBGBVRN_EL1, 1, 1) @@ -749,8 +800,12 @@ typedef enum FGTBit { DO_BIT(HFGRTR, VBAR_EL1), DO_BIT(HFGRTR, ICC_IGRPENN_EL1), DO_BIT(HFGRTR, ERRIDR_EL1), + DO_REV_BIT(HFGRTR, NGCS_EL0), + DO_REV_BIT(HFGRTR, NGCS_EL1), DO_REV_BIT(HFGRTR, NSMPRI_EL1), DO_REV_BIT(HFGRTR, NTPIDR2_EL0), + DO_REV_BIT(HFGRTR, NPIRE0_EL1), + DO_REV_BIT(HFGRTR, NPIR_EL1), /* Trap bits in HDFGRTR_EL2 / HDFGWTR_EL2, starting from bit 0. */ DO_BIT(HDFGRTR, DBGBCRN_EL1), @@ -829,6 +884,9 @@ typedef enum FGTBit { DO_BIT(HFGITR, DVPRCTX), DO_BIT(HFGITR, CPPRCTX), DO_BIT(HFGITR, DCCVAC), + DO_REV_BIT(HFGITR, NGCSPUSHM_EL1), + DO_REV_BIT(HFGITR, NGCSEPP), + DO_BIT(HFGITR, ATS1E1A), } FGTBit; #undef DO_BIT @@ -840,15 +898,15 @@ typedef struct ARMCPRegInfo ARMCPRegInfo; * Access functions for coprocessor registers. These cannot fail and * may not raise exceptions. */ -typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); -typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, +typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *ri); +typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); /* Access permission check functions for coprocessor registers. */ typedef CPAccessResult CPAccessFn(CPUARMState *env, - const ARMCPRegInfo *opaque, + const ARMCPRegInfo *ri, bool isread); /* Hook function for register reset */ -typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); +typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *ri); #define CP_ANY 0xff @@ -906,11 +964,19 @@ struct ARMCPRegInfo { uint32_t nv2_redirect_offset; /* - * The opaque pointer passed to define_arm_cp_regs_with_opaque() when - * this register was defined: can be used to hand data through to the - * register read/write functions, since they are passed the ARMCPRegInfo*. + * With VHE, with E2H, at EL2, access to this EL0/EL1 reg redirects + * to the EL2 reg with the specified key. + */ + uint32_t vhe_redir_to_el2; + + /* + * For VHE. Before registration, this field holds the key for an + * EL02/EL12 reg to be created to point back to this EL0/EL1 reg. + * After registration, this field is set only on the EL02/EL12 reg + * and points back to the EL02/EL12 reg for redirection with E2H. */ - void *opaque; + uint32_t vhe_redir_to_el01; + /* * Value of this register, if it is ARM_CP_CONST. Otherwise, if * fieldoffset is non-zero, the reset value of the register. @@ -978,52 +1044,17 @@ struct ARMCPRegInfo { * fieldoffset is 0 then no reset will be done. */ CPResetFn *resetfn; - - /* - * "Original" readfn, writefn, accessfn. - * For ARMv8.1-VHE register aliases, we overwrite the read/write - * accessor functions of various EL1/EL0 to perform the runtime - * check for which sysreg should actually be modified, and then - * forwards the operation. Before overwriting the accessors, - * the original function is copied here, so that accesses that - * really do go to the EL1/EL0 version proceed normally. - * (The corresponding EL2 register is linked via opaque.) - */ - CPReadFn *orig_readfn; - CPWriteFn *orig_writefn; - CPAccessFn *orig_accessfn; }; -/* - * Macros which are lvalues for the field in CPUARMState for the - * ARMCPRegInfo *ri. - */ -#define CPREG_FIELD32(env, ri) \ - (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) -#define CPREG_FIELD64(env, ri) \ - (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) - -void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *reg, - void *opaque); +void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs); +void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len); -static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) -{ - define_one_arm_cp_reg_with_opaque(cpu, regs, NULL); -} - -void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, - void *opaque, size_t len); - -#define define_arm_cp_regs_with_opaque(CPU, REGS, OPAQUE) \ - do { \ - QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ - define_arm_cp_regs_with_opaque_len(CPU, REGS, OPAQUE, \ - ARRAY_SIZE(REGS)); \ +#define define_arm_cp_regs(CPU, REGS) \ + do { \ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ + define_arm_cp_regs_len(CPU, REGS, ARRAY_SIZE(REGS)); \ } while (0) -#define define_arm_cp_regs(CPU, REGS) \ - define_arm_cp_regs_with_opaque(CPU, REGS, NULL) - const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); /* @@ -1064,6 +1095,9 @@ void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, /* CPReadFn that can be used for read-as-zero behaviour */ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); +/* CPReadFn that just reads the value from ri->fieldoffset */ +uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri); + /* CPWriteFn that just writes the value to ri->fieldoffset */ void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); @@ -1071,15 +1105,16 @@ void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); * CPResetFn that does nothing, for use if no reset is required even * if fieldoffset is non zero. */ -void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri); /* - * Return true if this reginfo struct's field in the cpu state struct - * is 64 bits wide. + * Return MO_32 if the field in CPUARMState is uint32_t or + * MO_64 if the field in CPUARMState is uint64_t. */ -static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) +static inline MemOp cpreg_field_type(const ARMCPRegInfo *ri) { - return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); + return (ri->state == ARM_CP_STATE_AA64 || (ri->type & ARM_CP_64BIT) + ? MO_64 : MO_32); } static inline bool cp_access_ok(int current_el, @@ -1139,7 +1174,7 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) * means that the right set of registers is exactly those where * the opc1 field is 4 or 5. (You can see this also in the assert * we do that the opc1 field and the permissions mask line up in - * define_one_arm_cp_reg_with_opaque().) + * define_one_arm_cp_reg().) * Checking the opc1 field is easier for us and avoids the problem * that we do not consistently use the right architectural names * for all sysregs, since we treat the name field as largely for debug. @@ -1148,12 +1183,17 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) * fragile to future new sysregs, but this seems the least likely * to break. * - * In particular, note that the released sysreg XML defines that - * the FEAT_MEC sysregs and instructions do not follow this FEAT_NV - * trapping rule, so we will need to add an ARM_CP_* flag to indicate - * "register does not trap on NV" to handle those if/when we implement - * FEAT_MEC. + * In particular, note that the FEAT_MEC sysregs and instructions + * are exceptions to this trapping rule, so they are marked as + * ARM_CP_NV_NO_TRAP to indicate that they should not be trapped + * to EL2. (They are an exception because the FEAT_MEC sysregs UNDEF + * unless in Realm, and Realm is not expected to be virtualized.) */ + + if (ri->type & ARM_CP_NV_NO_TRAP) { + return false; + } + return ri->opc1 == 4 || ri->opc1 == 5; } diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index 525e4ce..37f1eca 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -22,6 +22,423 @@ #include "hw/registerfields.h" #include "qemu/host-utils.h" +#include "cpu.h" +#include "cpu-sysregs.h" + +/* + * System register ID fields. + */ +FIELD(CLIDR_EL1, CTYPE1, 0, 3) +FIELD(CLIDR_EL1, CTYPE2, 3, 3) +FIELD(CLIDR_EL1, CTYPE3, 6, 3) +FIELD(CLIDR_EL1, CTYPE4, 9, 3) +FIELD(CLIDR_EL1, CTYPE5, 12, 3) +FIELD(CLIDR_EL1, CTYPE6, 15, 3) +FIELD(CLIDR_EL1, CTYPE7, 18, 3) +FIELD(CLIDR_EL1, LOUIS, 21, 3) +FIELD(CLIDR_EL1, LOC, 24, 3) +FIELD(CLIDR_EL1, LOUU, 27, 3) +FIELD(CLIDR_EL1, ICB, 30, 3) + +/* When FEAT_CCIDX is implemented */ +FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3) +FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21) +FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24) + +/* When FEAT_CCIDX is not implemented */ +FIELD(CCSIDR_EL1, LINESIZE, 0, 3) +FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10) +FIELD(CCSIDR_EL1, NUMSETS, 13, 15) + +FIELD(CTR_EL0, IMINLINE, 0, 4) +FIELD(CTR_EL0, L1IP, 14, 2) +FIELD(CTR_EL0, DMINLINE, 16, 4) +FIELD(CTR_EL0, ERG, 20, 4) +FIELD(CTR_EL0, CWG, 24, 4) +FIELD(CTR_EL0, IDC, 28, 1) +FIELD(CTR_EL0, DIC, 29, 1) +FIELD(CTR_EL0, TMINLINE, 32, 6) + +FIELD(MIDR_EL1, REVISION, 0, 4) +FIELD(MIDR_EL1, PARTNUM, 4, 12) +FIELD(MIDR_EL1, ARCHITECTURE, 16, 4) +FIELD(MIDR_EL1, VARIANT, 20, 4) +FIELD(MIDR_EL1, IMPLEMENTER, 24, 8) + +FIELD(ID_ISAR0, SWAP, 0, 4) +FIELD(ID_ISAR0, BITCOUNT, 4, 4) +FIELD(ID_ISAR0, BITFIELD, 8, 4) +FIELD(ID_ISAR0, CMPBRANCH, 12, 4) +FIELD(ID_ISAR0, COPROC, 16, 4) +FIELD(ID_ISAR0, DEBUG, 20, 4) +FIELD(ID_ISAR0, DIVIDE, 24, 4) + +FIELD(ID_ISAR1, ENDIAN, 0, 4) +FIELD(ID_ISAR1, EXCEPT, 4, 4) +FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) +FIELD(ID_ISAR1, EXTEND, 12, 4) +FIELD(ID_ISAR1, IFTHEN, 16, 4) +FIELD(ID_ISAR1, IMMEDIATE, 20, 4) +FIELD(ID_ISAR1, INTERWORK, 24, 4) +FIELD(ID_ISAR1, JAZELLE, 28, 4) + +FIELD(ID_ISAR2, LOADSTORE, 0, 4) +FIELD(ID_ISAR2, MEMHINT, 4, 4) +FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) +FIELD(ID_ISAR2, MULT, 12, 4) +FIELD(ID_ISAR2, MULTS, 16, 4) +FIELD(ID_ISAR2, MULTU, 20, 4) +FIELD(ID_ISAR2, PSR_AR, 24, 4) +FIELD(ID_ISAR2, REVERSAL, 28, 4) + +FIELD(ID_ISAR3, SATURATE, 0, 4) +FIELD(ID_ISAR3, SIMD, 4, 4) +FIELD(ID_ISAR3, SVC, 8, 4) +FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) +FIELD(ID_ISAR3, TABBRANCH, 16, 4) +FIELD(ID_ISAR3, T32COPY, 20, 4) +FIELD(ID_ISAR3, TRUENOP, 24, 4) +FIELD(ID_ISAR3, T32EE, 28, 4) + +FIELD(ID_ISAR4, UNPRIV, 0, 4) +FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) +FIELD(ID_ISAR4, WRITEBACK, 8, 4) +FIELD(ID_ISAR4, SMC, 12, 4) +FIELD(ID_ISAR4, BARRIER, 16, 4) +FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) +FIELD(ID_ISAR4, PSR_M, 24, 4) +FIELD(ID_ISAR4, SWP_FRAC, 28, 4) + +FIELD(ID_ISAR5, SEVL, 0, 4) +FIELD(ID_ISAR5, AES, 4, 4) +FIELD(ID_ISAR5, SHA1, 8, 4) +FIELD(ID_ISAR5, SHA2, 12, 4) +FIELD(ID_ISAR5, CRC32, 16, 4) +FIELD(ID_ISAR5, RDM, 24, 4) +FIELD(ID_ISAR5, VCMA, 28, 4) + +FIELD(ID_ISAR6, JSCVT, 0, 4) +FIELD(ID_ISAR6, DP, 4, 4) +FIELD(ID_ISAR6, FHM, 8, 4) +FIELD(ID_ISAR6, SB, 12, 4) +FIELD(ID_ISAR6, SPECRES, 16, 4) +FIELD(ID_ISAR6, BF16, 20, 4) +FIELD(ID_ISAR6, I8MM, 24, 4) + +FIELD(ID_MMFR0, VMSA, 0, 4) +FIELD(ID_MMFR0, PMSA, 4, 4) +FIELD(ID_MMFR0, OUTERSHR, 8, 4) +FIELD(ID_MMFR0, SHARELVL, 12, 4) +FIELD(ID_MMFR0, TCM, 16, 4) +FIELD(ID_MMFR0, AUXREG, 20, 4) +FIELD(ID_MMFR0, FCSE, 24, 4) +FIELD(ID_MMFR0, INNERSHR, 28, 4) + +FIELD(ID_MMFR1, L1HVDVA, 0, 4) +FIELD(ID_MMFR1, L1UNIVA, 4, 4) +FIELD(ID_MMFR1, L1HVDSW, 8, 4) +FIELD(ID_MMFR1, L1UNISW, 12, 4) +FIELD(ID_MMFR1, L1HVD, 16, 4) +FIELD(ID_MMFR1, L1UNI, 20, 4) +FIELD(ID_MMFR1, L1TSTCLN, 24, 4) +FIELD(ID_MMFR1, BPRED, 28, 4) + +FIELD(ID_MMFR2, L1HVDFG, 0, 4) +FIELD(ID_MMFR2, L1HVDBG, 4, 4) +FIELD(ID_MMFR2, L1HVDRNG, 8, 4) +FIELD(ID_MMFR2, HVDTLB, 12, 4) +FIELD(ID_MMFR2, UNITLB, 16, 4) +FIELD(ID_MMFR2, MEMBARR, 20, 4) +FIELD(ID_MMFR2, WFISTALL, 24, 4) +FIELD(ID_MMFR2, HWACCFLG, 28, 4) + +FIELD(ID_MMFR3, CMAINTVA, 0, 4) +FIELD(ID_MMFR3, CMAINTSW, 4, 4) +FIELD(ID_MMFR3, BPMAINT, 8, 4) +FIELD(ID_MMFR3, MAINTBCST, 12, 4) +FIELD(ID_MMFR3, PAN, 16, 4) +FIELD(ID_MMFR3, COHWALK, 20, 4) +FIELD(ID_MMFR3, CMEMSZ, 24, 4) +FIELD(ID_MMFR3, SUPERSEC, 28, 4) + +FIELD(ID_MMFR4, SPECSEI, 0, 4) +FIELD(ID_MMFR4, AC2, 4, 4) +FIELD(ID_MMFR4, XNX, 8, 4) +FIELD(ID_MMFR4, CNP, 12, 4) +FIELD(ID_MMFR4, HPDS, 16, 4) +FIELD(ID_MMFR4, LSM, 20, 4) +FIELD(ID_MMFR4, CCIDX, 24, 4) +FIELD(ID_MMFR4, EVT, 28, 4) + +FIELD(ID_MMFR5, ETS, 0, 4) +FIELD(ID_MMFR5, NTLBPA, 4, 4) + +FIELD(ID_PFR0, STATE0, 0, 4) +FIELD(ID_PFR0, STATE1, 4, 4) +FIELD(ID_PFR0, STATE2, 8, 4) +FIELD(ID_PFR0, STATE3, 12, 4) +FIELD(ID_PFR0, CSV2, 16, 4) +FIELD(ID_PFR0, AMU, 20, 4) +FIELD(ID_PFR0, DIT, 24, 4) +FIELD(ID_PFR0, RAS, 28, 4) + +FIELD(ID_PFR1, PROGMOD, 0, 4) +FIELD(ID_PFR1, SECURITY, 4, 4) +FIELD(ID_PFR1, MPROGMOD, 8, 4) +FIELD(ID_PFR1, VIRTUALIZATION, 12, 4) +FIELD(ID_PFR1, GENTIMER, 16, 4) +FIELD(ID_PFR1, SEC_FRAC, 20, 4) +FIELD(ID_PFR1, VIRT_FRAC, 24, 4) +FIELD(ID_PFR1, GIC, 28, 4) + +FIELD(ID_PFR2, CSV3, 0, 4) +FIELD(ID_PFR2, SSBS, 4, 4) +FIELD(ID_PFR2, RAS_FRAC, 8, 4) + +FIELD(ID_AA64ISAR0, AES, 4, 4) +FIELD(ID_AA64ISAR0, SHA1, 8, 4) +FIELD(ID_AA64ISAR0, SHA2, 12, 4) +FIELD(ID_AA64ISAR0, CRC32, 16, 4) +FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, TME, 24, 4) +FIELD(ID_AA64ISAR0, RDM, 28, 4) +FIELD(ID_AA64ISAR0, SHA3, 32, 4) +FIELD(ID_AA64ISAR0, SM3, 36, 4) +FIELD(ID_AA64ISAR0, SM4, 40, 4) +FIELD(ID_AA64ISAR0, DP, 44, 4) +FIELD(ID_AA64ISAR0, FHM, 48, 4) +FIELD(ID_AA64ISAR0, TS, 52, 4) +FIELD(ID_AA64ISAR0, TLB, 56, 4) +FIELD(ID_AA64ISAR0, RNDR, 60, 4) + +FIELD(ID_AA64ISAR1, DPB, 0, 4) +FIELD(ID_AA64ISAR1, APA, 4, 4) +FIELD(ID_AA64ISAR1, API, 8, 4) +FIELD(ID_AA64ISAR1, JSCVT, 12, 4) +FIELD(ID_AA64ISAR1, FCMA, 16, 4) +FIELD(ID_AA64ISAR1, LRCPC, 20, 4) +FIELD(ID_AA64ISAR1, GPA, 24, 4) +FIELD(ID_AA64ISAR1, GPI, 28, 4) +FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) +FIELD(ID_AA64ISAR1, SB, 36, 4) +FIELD(ID_AA64ISAR1, SPECRES, 40, 4) +FIELD(ID_AA64ISAR1, BF16, 44, 4) +FIELD(ID_AA64ISAR1, DGH, 48, 4) +FIELD(ID_AA64ISAR1, I8MM, 52, 4) +FIELD(ID_AA64ISAR1, XS, 56, 4) +FIELD(ID_AA64ISAR1, LS64, 60, 4) + +FIELD(ID_AA64ISAR2, WFXT, 0, 4) +FIELD(ID_AA64ISAR2, RPRES, 4, 4) +FIELD(ID_AA64ISAR2, GPA3, 8, 4) +FIELD(ID_AA64ISAR2, APA3, 12, 4) +FIELD(ID_AA64ISAR2, MOPS, 16, 4) +FIELD(ID_AA64ISAR2, BC, 20, 4) +FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) +FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) +FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) +FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) +FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) +FIELD(ID_AA64ISAR2, RPRFM, 48, 4) +FIELD(ID_AA64ISAR2, CSSC, 52, 4) +FIELD(ID_AA64ISAR2, LUT, 56, 4) +FIELD(ID_AA64ISAR2, ATS1A, 60, 4) + +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) +FIELD(ID_AA64PFR0, SEL2, 36, 4) +FIELD(ID_AA64PFR0, MPAM, 40, 4) +FIELD(ID_AA64PFR0, AMU, 44, 4) +FIELD(ID_AA64PFR0, DIT, 48, 4) +FIELD(ID_AA64PFR0, RME, 52, 4) +FIELD(ID_AA64PFR0, CSV2, 56, 4) +FIELD(ID_AA64PFR0, CSV3, 60, 4) + +FIELD(ID_AA64PFR1, BT, 0, 4) +FIELD(ID_AA64PFR1, SSBS, 4, 4) +FIELD(ID_AA64PFR1, MTE, 8, 4) +FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) +FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) +FIELD(ID_AA64PFR1, SME, 24, 4) +FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) +FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) +FIELD(ID_AA64PFR1, NMI, 36, 4) +FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) +FIELD(ID_AA64PFR1, GCS, 44, 4) +FIELD(ID_AA64PFR1, THE, 48, 4) +FIELD(ID_AA64PFR1, MTEX, 52, 4) +FIELD(ID_AA64PFR1, DF2, 56, 4) +FIELD(ID_AA64PFR1, PFAR, 60, 4) + +FIELD(ID_AA64PFR2, MTEPERM, 0, 4) +FIELD(ID_AA64PFR2, MTESTOREONLY, 4, 4) +FIELD(ID_AA64PFR2, MTEFAR, 8, 4) +FIELD(ID_AA64PFR2, FPMR, 32, 4) + +FIELD(ID_AA64MMFR0, PARANGE, 0, 4) +FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) +FIELD(ID_AA64MMFR0, BIGEND, 8, 4) +FIELD(ID_AA64MMFR0, SNSMEM, 12, 4) +FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4) +FIELD(ID_AA64MMFR0, TGRAN16, 20, 4) +FIELD(ID_AA64MMFR0, TGRAN64, 24, 4) +FIELD(ID_AA64MMFR0, TGRAN4, 28, 4) +FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) +FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) +FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) +FIELD(ID_AA64MMFR0, EXS, 44, 4) +FIELD(ID_AA64MMFR0, FGT, 56, 4) +FIELD(ID_AA64MMFR0, ECV, 60, 4) + +FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) +FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) +FIELD(ID_AA64MMFR1, VH, 8, 4) +FIELD(ID_AA64MMFR1, HPDS, 12, 4) +FIELD(ID_AA64MMFR1, LO, 16, 4) +FIELD(ID_AA64MMFR1, PAN, 20, 4) +FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) +FIELD(ID_AA64MMFR1, XNX, 28, 4) +FIELD(ID_AA64MMFR1, TWED, 32, 4) +FIELD(ID_AA64MMFR1, ETS, 36, 4) +FIELD(ID_AA64MMFR1, HCX, 40, 4) +FIELD(ID_AA64MMFR1, AFP, 44, 4) +FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) +FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) +FIELD(ID_AA64MMFR1, CMOW, 56, 4) +FIELD(ID_AA64MMFR1, ECBHB, 60, 4) + +FIELD(ID_AA64MMFR2, CNP, 0, 4) +FIELD(ID_AA64MMFR2, UAO, 4, 4) +FIELD(ID_AA64MMFR2, LSM, 8, 4) +FIELD(ID_AA64MMFR2, IESB, 12, 4) +FIELD(ID_AA64MMFR2, VARANGE, 16, 4) +FIELD(ID_AA64MMFR2, CCIDX, 20, 4) +FIELD(ID_AA64MMFR2, NV, 24, 4) +FIELD(ID_AA64MMFR2, ST, 28, 4) +FIELD(ID_AA64MMFR2, AT, 32, 4) +FIELD(ID_AA64MMFR2, IDS, 36, 4) +FIELD(ID_AA64MMFR2, FWB, 40, 4) +FIELD(ID_AA64MMFR2, TTL, 48, 4) +FIELD(ID_AA64MMFR2, BBM, 52, 4) +FIELD(ID_AA64MMFR2, EVT, 56, 4) +FIELD(ID_AA64MMFR2, E0PD, 60, 4) + +FIELD(ID_AA64MMFR3, TCRX, 0, 4) +FIELD(ID_AA64MMFR3, SCTLRX, 4, 4) +FIELD(ID_AA64MMFR3, S1PIE, 8, 4) +FIELD(ID_AA64MMFR3, S2PIE, 12, 4) +FIELD(ID_AA64MMFR3, S1POE, 16, 4) +FIELD(ID_AA64MMFR3, S2POE, 20, 4) +FIELD(ID_AA64MMFR3, AIE, 24, 4) +FIELD(ID_AA64MMFR3, MEC, 28, 4) +FIELD(ID_AA64MMFR3, D128, 32, 4) +FIELD(ID_AA64MMFR3, D128_2, 36, 4) +FIELD(ID_AA64MMFR3, SNERR, 40, 4) +FIELD(ID_AA64MMFR3, ANERR, 44, 4) +FIELD(ID_AA64MMFR3, SDERR, 52, 4) +FIELD(ID_AA64MMFR3, ADERR, 56, 4) +FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4) + +FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) +FIELD(ID_AA64DFR0, TRACEVER, 4, 4) +FIELD(ID_AA64DFR0, PMUVER, 8, 4) +FIELD(ID_AA64DFR0, BRPS, 12, 4) +FIELD(ID_AA64DFR0, PMSS, 16, 4) +FIELD(ID_AA64DFR0, WRPS, 20, 4) +FIELD(ID_AA64DFR0, SEBEP, 24, 4) +FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) +FIELD(ID_AA64DFR0, PMSVER, 32, 4) +FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) +FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) +FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) +FIELD(ID_AA64DFR0, MTPMU, 48, 4) +FIELD(ID_AA64DFR0, BRBE, 52, 4) +FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) +FIELD(ID_AA64DFR0, HPMN0, 60, 4) + +FIELD(ID_AA64ZFR0, SVEVER, 0, 4) +FIELD(ID_AA64ZFR0, AES, 4, 4) +FIELD(ID_AA64ZFR0, BITPERM, 16, 4) +FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) +FIELD(ID_AA64ZFR0, B16B16, 24, 4) +FIELD(ID_AA64ZFR0, SHA3, 32, 4) +FIELD(ID_AA64ZFR0, SM4, 40, 4) +FIELD(ID_AA64ZFR0, I8MM, 44, 4) +FIELD(ID_AA64ZFR0, F32MM, 52, 4) +FIELD(ID_AA64ZFR0, F64MM, 56, 4) + +FIELD(ID_AA64SMFR0, F32F32, 32, 1) +FIELD(ID_AA64SMFR0, BI32I32, 33, 1) +FIELD(ID_AA64SMFR0, B16F32, 34, 1) +FIELD(ID_AA64SMFR0, F16F32, 35, 1) +FIELD(ID_AA64SMFR0, I8I32, 36, 4) +FIELD(ID_AA64SMFR0, F16F16, 42, 1) +FIELD(ID_AA64SMFR0, B16B16, 43, 1) +FIELD(ID_AA64SMFR0, I16I32, 44, 4) +FIELD(ID_AA64SMFR0, F64F64, 48, 1) +FIELD(ID_AA64SMFR0, I16I64, 52, 4) +FIELD(ID_AA64SMFR0, SMEVER, 56, 4) +FIELD(ID_AA64SMFR0, FA64, 63, 1) + +FIELD(ID_DFR0, COPDBG, 0, 4) +FIELD(ID_DFR0, COPSDBG, 4, 4) +FIELD(ID_DFR0, MMAPDBG, 8, 4) +FIELD(ID_DFR0, COPTRC, 12, 4) +FIELD(ID_DFR0, MMAPTRC, 16, 4) +FIELD(ID_DFR0, MPROFDBG, 20, 4) +FIELD(ID_DFR0, PERFMON, 24, 4) +FIELD(ID_DFR0, TRACEFILT, 28, 4) + +FIELD(ID_DFR1, MTPMU, 0, 4) +FIELD(ID_DFR1, HPMN0, 4, 4) + +FIELD(DBGDIDR, SE_IMP, 12, 1) +FIELD(DBGDIDR, NSUHD_IMP, 14, 1) +FIELD(DBGDIDR, VERSION, 16, 4) +FIELD(DBGDIDR, CTX_CMPS, 20, 4) +FIELD(DBGDIDR, BRPS, 24, 4) +FIELD(DBGDIDR, WRPS, 28, 4) + +FIELD(DBGDEVID, PCSAMPLE, 0, 4) +FIELD(DBGDEVID, WPADDRMASK, 4, 4) +FIELD(DBGDEVID, BPADDRMASK, 8, 4) +FIELD(DBGDEVID, VECTORCATCH, 12, 4) +FIELD(DBGDEVID, VIRTEXTNS, 16, 4) +FIELD(DBGDEVID, DOUBLELOCK, 20, 4) +FIELD(DBGDEVID, AUXREGS, 24, 4) +FIELD(DBGDEVID, CIDMASK, 28, 4) + +FIELD(DBGDEVID1, PCSROFFSET, 0, 4) + +FIELD(MVFR0, SIMDREG, 0, 4) +FIELD(MVFR0, FPSP, 4, 4) +FIELD(MVFR0, FPDP, 8, 4) +FIELD(MVFR0, FPTRAP, 12, 4) +FIELD(MVFR0, FPDIVIDE, 16, 4) +FIELD(MVFR0, FPSQRT, 20, 4) +FIELD(MVFR0, FPSHVEC, 24, 4) +FIELD(MVFR0, FPROUND, 28, 4) + +FIELD(MVFR1, FPFTZ, 0, 4) +FIELD(MVFR1, FPDNAN, 4, 4) +FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */ +FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */ +FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */ +FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */ +FIELD(MVFR1, MVE, 8, 4) /* M-profile only */ +FIELD(MVFR1, FP16, 20, 4) /* M-profile only */ +FIELD(MVFR1, FPHP, 24, 4) +FIELD(MVFR1, SIMDFMAC, 28, 4) + +FIELD(MVFR2, SIMDMISC, 0, 4) +FIELD(MVFR2, FPMISC, 4, 4) /* * Naming convention for isar_feature functions: @@ -44,103 +461,103 @@ */ static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) != 0; } static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) > 1; } static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) { /* (M-profile) low-overhead loops and branch future */ - return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; + return FIELD_EX32_IDREG(id, ID_ISAR0, CMPBRANCH) >= 3; } static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR1, JAZELLE) != 0; } static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) != 0; } static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) > 1; } static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA1) != 0; } static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA2) != 0; } static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, CRC32) != 0; } static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, RDM) != 0; } static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, VCMA) != 0; } static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, JSCVT) != 0; } static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, DP) != 0; } static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, FHM) != 0; } static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SB) != 0; } static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SPECRES) != 0; } static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, BF16) != 0; } static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, I8MM) != 0; } static inline bool isar_feature_aa32_ras(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, RAS) != 0; } static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0; + return FIELD_EX32_IDREG(id, ID_PFR1, MPROGMOD) != 0; } static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) @@ -149,7 +566,7 @@ static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) * Return true if M-profile state handling insns * (VSCCLRM, CLRM, FPCTX access insns) are implemented */ - return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3; + return FIELD_EX32_IDREG(id, ID_PFR1, SECURITY) >= 3; } static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) @@ -282,88 +699,88 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4; + return FIELD_EX32_IDREG(id, ID_MMFR0, VMSA) >= 4; } static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) != 0; } static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) >= 2; } static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 4 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 5 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 6 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, HPDS) != 0; } static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, AC2) != 0; } static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, CCIDX) != 0; } static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, XNX) != 0; } static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 1; } static inline bool isar_feature_aa32_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 2; } static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, DIT) != 0; } static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR2, SSBS) != 0; } static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 5; } static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 8; } static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) @@ -376,107 +793,112 @@ static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) */ static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) != 0; } static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) > 1; } static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA1) != 0; } static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) != 0; } static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) > 1; } static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, CRC32) != 0; } -static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +static inline bool isar_feature_aa64_lse(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 2; +} + +static inline bool isar_feature_aa64_lse128(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 3; } static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0; } static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA3) != 0; } static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM3) != 0; } static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM4) != 0; } static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, DP) != 0; } static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, FHM) != 0; } static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) != 0; } static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) >= 2; } static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RNDR) != 0; } static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) == 2; } static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) != 0; } static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, JSCVT) != 0; } static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FCMA) != 0; } static inline bool isar_feature_aa64_xs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, XS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, XS) != 0; } /* @@ -500,9 +922,9 @@ isar_feature_pauth_feature(const ARMISARegisters *id) * Architecturally, only one of {APA,API,APA3} may be active (non-zero) * and the other two must be zero. Thus we may avoid conditionals. */ - return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) | - FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) | - FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3)); + return (FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) | + FIELD_EX64_IDREG(id, ID_AA64ISAR1, API) | + FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3)); } static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) @@ -520,7 +942,7 @@ static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA5 algorithm. * QEMU will always enable or disable both APA and GPA. */ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) != 0; } static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) @@ -529,144 +951,164 @@ static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA3 algorithm. * QEMU will always enable or disable both APA3 and GPA3. */ - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3) != 0; } static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SB) != 0; } static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SPECRES) != 0; } static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FRINTTS) != 0; } static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) != 0; } static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) >= 2; } static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) != 0; } static inline bool isar_feature_aa64_ebf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) > 1; } static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) != 0; } static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) >= 2; } static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, I8MM) != 0; } static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, WFXT) >= 2; } static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, BC) != 0; } static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, MOPS); } static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES); +} + +static inline bool isar_feature_aa64_cssc(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) != 0; +} + +static inline bool isar_feature_aa64_lut(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT); +} + +static inline bool isar_feature_aa64_ats1a(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, ATS1A); } static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) != 0xf; } static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically wrt FP16. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) == 1; } static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL0) >= 2; } static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL1) >= 2; } static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL2) >= 2; } static inline bool isar_feature_aa64_ras(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) != 0; } static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) >= 2; } static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SVE) != 0; } static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SEL2) != 0; } static inline bool isar_feature_aa64_rme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) != 0; +} + +static inline bool isar_feature_aa64_rme_gpc2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) >= 2; } static inline bool isar_feature_aa64_dit(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, DIT) != 0; } static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) { - int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2); + int key = FIELD_EX64_IDREG(id, ID_AA64PFR0, CSV2); if (key >= 2) { return true; /* FEAT_CSV2_2 */ } if (key == 1) { - key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC); + key = FIELD_EX64_IDREG(id, ID_AA64PFR1, CSV2_FRAC); return key >= 2; /* FEAT_CSV2_1p2 */ } return false; @@ -674,320 +1116,408 @@ static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SSBS) != 0; } static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, BT) != 0; } static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) != 0; } static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 2; } static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 3; } static inline bool isar_feature_aa64_sme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SME) != 0; } static inline bool isar_feature_aa64_nmi(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, NMI) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, NMI) != 0; +} + +static inline bool isar_feature_aa64_gcs(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64PFR1, GCS) != 0; } static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 1; } static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id)); } static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 2; } static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id)); } static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 0; } static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 1; } static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN64) >= 0; } static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id)); } static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id)); } static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN64_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id)); } static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, FGT) != 0; } static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 0; } static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 1; } static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, VH) != 0; } static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, LO) != 0; } static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) != 0; } static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 2; } static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 3; } static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HCX) != 0; } static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, AFP) != 0; } static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, TIDCP1) != 0; } static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, CMOW) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, CMOW) != 0; } static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) != 0; } static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) >= 2; } static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, XNX) != 0; } static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, UAO) != 0; } static inline bool isar_feature_aa64_st(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, ST) != 0; } static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, AT) != 0; } static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, FWB) != 0; } static inline bool isar_feature_aa64_ids(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, IDS) != 0; } static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 1; } static inline bool isar_feature_aa64_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 2; } static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, CCIDX) != 0; } static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, VARANGE) != 0; } static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, E0PD) != 0; } static inline bool isar_feature_aa64_nv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) != 0; } static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) >= 2; +} + +static inline bool isar_feature_aa64_tcr2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, TCRX) != 0; +} + +static inline bool isar_feature_aa64_sctlr2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, SCTLRX) != 0; +} + +static inline bool isar_feature_aa64_s1pie(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, S1PIE) != 0; +} + +static inline bool isar_feature_aa64_s2pie(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, S2PIE) != 0; +} + +static inline bool isar_feature_aa64_mec(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, MEC) != 0; } static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 4 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 5 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 6 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, DEBUGVER) >= 8; } static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64DFR0, DOUBLELOCK) >= 0; } static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) != 0; +} + +static inline bool isar_feature_aa64_sve2p1(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) >=2; } static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) != 0; } static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) >= 2; } static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BITPERM) != 0; } static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BFLOAT16) != 0; } static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SHA3) != 0; } static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SM4) != 0; } static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, I8MM) != 0; } static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F32MM) != 0; } static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F64MM) != 0; +} + +static inline bool isar_feature_aa64_sve_b16b16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, B16B16); +} + +static inline bool isar_feature_aa64_sme_b16b16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, B16B16); +} + +static inline bool isar_feature_aa64_sme_f16f16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F16F16); } static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F64F64); } static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf; + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, I16I64) == 0xf; } static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, FA64); +} + +static inline bool isar_feature_aa64_sme2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) != 0; +} + +static inline bool isar_feature_aa64_sme2p1(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) >= 2; +} + +/* + * Combinations of feature tests, for ease of use with TRANS_FEAT. + */ +static inline bool isar_feature_aa64_sme_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2p1_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2p1(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2_i16i64(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_i16i64(id); +} + +static inline bool isar_feature_aa64_sme2_f64f64(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_f64f64(id); } /* diff --git a/target/arm/cpu-irq.c b/target/arm/cpu-irq.c new file mode 100644 index 0000000..fe514cc --- /dev/null +++ b/target/arm/cpu-irq.c @@ -0,0 +1,381 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * QEMU ARM CPU - interrupt_request handling + * + * Copyright (c) 2003-2025 QEMU contributors + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "accel/tcg/cpu-ops.h" +#include "internals.h" + +#ifdef CONFIG_TCG +static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, + unsigned int target_el, + unsigned int cur_el, bool secure, + uint64_t hcr_el2) +{ + CPUARMState *env = cpu_env(cs); + bool pstate_unmasked; + bool unmasked = false; + bool allIntMask = false; + + /* + * Don't take exceptions if they target a lower EL. + * This check should catch any exceptions that would not be taken + * but left pending. + */ + if (cur_el > target_el) { + return false; + } + + if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && + env->cp15.sctlr_el[target_el] & SCTLR_NMI && cur_el == target_el) { + allIntMask = env->pstate & PSTATE_ALLINT || + ((env->cp15.sctlr_el[target_el] & SCTLR_SPINTMASK) && + (env->pstate & PSTATE_SP)); + } + + switch (excp_idx) { + case EXCP_NMI: + pstate_unmasked = !allIntMask; + break; + + case EXCP_VINMI: + if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { + /* VINMIs are only taken when hypervized. */ + return false; + } + return !allIntMask; + case EXCP_VFNMI: + if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { + /* VFNMIs are only taken when hypervized. */ + return false; + } + return !allIntMask; + case EXCP_FIQ: + pstate_unmasked = (!(env->daif & PSTATE_F)) && (!allIntMask); + break; + + case EXCP_IRQ: + pstate_unmasked = (!(env->daif & PSTATE_I)) && (!allIntMask); + break; + + case EXCP_VFIQ: + if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { + /* VFIQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_F) && (!allIntMask); + case EXCP_VIRQ: + if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { + /* VIRQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_I) && (!allIntMask); + case EXCP_VSERR: + if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) { + /* VIRQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_A); + default: + g_assert_not_reached(); + } + + /* + * Use the target EL, current execution state and SCR/HCR settings to + * determine whether the corresponding CPSR bit is used to mask the + * interrupt. + */ + if ((target_el > cur_el) && (target_el != 1)) { + /* Exceptions targeting a higher EL may not be maskable */ + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + switch (target_el) { + case 2: + /* + * According to ARM DDI 0487H.a, an interrupt can be masked + * when HCR_E2H and HCR_TGE are both set regardless of the + * current Security state. Note that we need to revisit this + * part again once we need to support NMI. + */ + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + unmasked = true; + } + break; + case 3: + /* Interrupt cannot be masked when the target EL is 3 */ + unmasked = true; + break; + default: + g_assert_not_reached(); + } + } else { + /* + * The old 32-bit-only environment has a more complicated + * masking setup. HCR and SCR bits not only affect interrupt + * routing but also change the behaviour of masking. + */ + bool hcr, scr; + + switch (excp_idx) { + case EXCP_FIQ: + /* + * If FIQs are routed to EL3 or EL2 then there are cases where + * we override the CPSR.F in determining if the exception is + * masked or not. If neither of these are set then we fall back + * to the CPSR.F setting otherwise we further assess the state + * below. + */ + hcr = hcr_el2 & HCR_FMO; + scr = (env->cp15.scr_el3 & SCR_FIQ); + + /* + * When EL3 is 32-bit, the SCR.FW bit controls whether the + * CPSR.F bit masks FIQ interrupts when taken in non-secure + * state. If SCR.FW is set then FIQs can be masked by CPSR.F + * when non-secure but only when FIQs are only routed to EL3. + */ + scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr); + break; + case EXCP_IRQ: + /* + * When EL3 execution state is 32-bit, if HCR.IMO is set then + * we may override the CPSR.I masking when in non-secure state. + * The SCR.IRQ setting has already been taken into consideration + * when setting the target EL, so it does not have a further + * affect here. + */ + hcr = hcr_el2 & HCR_IMO; + scr = false; + break; + default: + g_assert_not_reached(); + } + + if ((scr || hcr) && !secure) { + unmasked = true; + } + } + } + + /* + * The PSTATE bits only mask the interrupt if we have not overridden the + * ability above. + */ + return unmasked || pstate_unmasked; +} + +bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUARMState *env = cpu_env(cs); + uint32_t cur_el = arm_current_el(env); + bool secure = arm_is_secure(env); + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + uint32_t target_el; + uint32_t excp_idx; + + /* The prioritization of interrupts is IMPLEMENTATION DEFINED. */ + + if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && + (arm_sctlr(env, cur_el) & SCTLR_NMI)) { + if (interrupt_request & CPU_INTERRUPT_NMI) { + excp_idx = EXCP_NMI; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VINMI) { + excp_idx = EXCP_VINMI; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VFNMI) { + excp_idx = EXCP_VFNMI; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + } else { + /* + * NMI disabled: interrupts with superpriority are handled + * as if they didn't have it + */ + if (interrupt_request & CPU_INTERRUPT_NMI) { + interrupt_request |= CPU_INTERRUPT_HARD; + } + if (interrupt_request & CPU_INTERRUPT_VINMI) { + interrupt_request |= CPU_INTERRUPT_VIRQ; + } + if (interrupt_request & CPU_INTERRUPT_VFNMI) { + interrupt_request |= CPU_INTERRUPT_VFIQ; + } + } + + if (interrupt_request & CPU_INTERRUPT_FIQ) { + excp_idx = EXCP_FIQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_HARD) { + excp_idx = EXCP_IRQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VIRQ) { + excp_idx = EXCP_VIRQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VFIQ) { + excp_idx = EXCP_VFIQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VSERR) { + excp_idx = EXCP_VSERR; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + /* Taking a virtual abort clears HCR_EL2.VSE */ + env->cp15.hcr_el2 &= ~HCR_VSE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + goto found; + } + } + return false; + + found: + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cs->cc->tcg_ops->do_interrupt(cs); + return true; +} +#endif /* CONFIG_TCG */ + +void arm_cpu_update_virq(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VIRQ, which is the logical OR of + * the HCR_EL2.VI bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && + !(arm_hcrx_el2_eff(env) & HCRX_VINMI)) || + (env->irq_line_state & CPU_INTERRUPT_VIRQ); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VIRQ)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VIRQ); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); + } + } +} + +void arm_cpu_update_vfiq(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VFIQ, which is the logical OR of + * the HCR_EL2.VF bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VF) && + !(arm_hcrx_el2_eff(env) & HCRX_VFNMI)) || + (env->irq_line_state & CPU_INTERRUPT_VFIQ); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VFIQ)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VFIQ); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VFIQ); + } + } +} + +void arm_cpu_update_vinmi(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VINMI, which is the logical OR of + * the HCRX_EL2.VINMI bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && + (arm_hcrx_el2_eff(env) & HCRX_VINMI)) || + (env->irq_line_state & CPU_INTERRUPT_VINMI); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VINMI)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VINMI); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VINMI); + } + } +} + +void arm_cpu_update_vfnmi(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VFNMI, which is the HCRX_EL2.VFNMI bit. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = (arm_hcr_el2_eff(env) & HCR_VF) && + (arm_hcrx_el2_eff(env) & HCRX_VFNMI); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VFNMI)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VFNMI); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VFNMI); + } + } +} + +void arm_cpu_update_vserr(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = env->cp15.hcr_el2 & HCR_VSE; + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VSERR)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VSERR); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + } + } +} + diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index 896b35b..8b46c7c 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -17,15 +17,9 @@ #endif #ifdef CONFIG_USER_ONLY -# ifdef TARGET_AARCH64 -# define TARGET_TAGGED_ADDRESSES -# ifdef __FreeBSD__ -# define TARGET_PAGE_BITS 12 -# else +# if defined(TARGET_AARCH64) && defined(CONFIG_LINUX) /* Allow user-only to vary page size from 4k */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 12 -# endif # else # define TARGET_PAGE_BITS 12 # endif @@ -35,10 +29,14 @@ * have to support 1K tiny pages. */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 10 +# define TARGET_PAGE_BITS_LEGACY 10 #endif /* !CONFIG_USER_ONLY */ -/* ARM processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) +/* + * ARM-specific extra insn start words: + * 1: Conditional execution bits + * 2: Partial exception syndrome for data aborts + */ +#define TARGET_INSN_START_EXTRA_WORDS 2 #endif diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index b497667..2fcb0e1 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -28,11 +28,6 @@ OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -#define TYPE_AARCH64_CPU "aarch64-cpu" -typedef struct AArch64CPUClass AArch64CPUClass; -DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, - TYPE_AARCH64_CPU) - #define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) diff --git a/target/arm/cpu-sysregs.h b/target/arm/cpu-sysregs.h new file mode 100644 index 0000000..7877a3b --- /dev/null +++ b/target/arm/cpu-sysregs.h @@ -0,0 +1,42 @@ +/* + * Definitions for Arm ID system registers + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef ARM_CPU_SYSREGS_H +#define ARM_CPU_SYSREGS_H + +/* + * Following is similar to the coprocessor regs encodings, but with an argument + * ordering that matches the ARM ARM. We also reuse the various CP_REG_ defines + * that actually are the same as the equivalent KVM_REG_ values. + */ +#define ENCODE_ID_REG(op0, op1, crn, crm, op2) \ + (((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) NAME##_IDX, + +typedef enum ARMIDRegisterIdx { +#include "cpu-sysregs.h.inc" + NUM_ID_IDX, +} ARMIDRegisterIdx; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + SYS_##NAME = ENCODE_ID_REG(OP0, OP1, CRN, CRM, OP2), + +typedef enum ARMSysRegs { +#include "cpu-sysregs.h.inc" +} ARMSysRegs; + +#undef DEF + +extern const uint32_t id_register_sysreg[NUM_ID_IDX]; + +int get_sysreg_idx(ARMSysRegs sysreg); + +#endif /* ARM_CPU_SYSREGS_H */ diff --git a/target/arm/cpu-sysregs.h.inc b/target/arm/cpu-sysregs.h.inc new file mode 100644 index 0000000..2bb2861 --- /dev/null +++ b/target/arm/cpu-sysregs.h.inc @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +DEF(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0) +DEF(ID_AA64PFR1_EL1, 3, 0, 0, 4, 1) +DEF(ID_AA64PFR2_EL1, 3, 0, 0, 4, 2) +DEF(ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5) +DEF(ID_AA64DFR0_EL1, 3, 0, 0, 5, 0) +DEF(ID_AA64DFR1_EL1, 3, 0, 0, 5, 1) +DEF(ID_AA64AFR0_EL1, 3, 0, 0, 5, 4) +DEF(ID_AA64AFR1_EL1, 3, 0, 0, 5, 5) +DEF(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0) +DEF(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1) +DEF(ID_AA64ISAR2_EL1, 3, 0, 0, 6, 2) +DEF(ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0) +DEF(ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1) +DEF(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2) +DEF(ID_AA64MMFR3_EL1, 3, 0, 0, 7, 3) +DEF(ID_PFR0_EL1, 3, 0, 0, 1, 0) +DEF(ID_PFR1_EL1, 3, 0, 0, 1, 1) +DEF(ID_DFR0_EL1, 3, 0, 0, 1, 2) +DEF(ID_AFR0_EL1, 3, 0, 0, 1, 3) +DEF(ID_MMFR0_EL1, 3, 0, 0, 1, 4) +DEF(ID_MMFR1_EL1, 3, 0, 0, 1, 5) +DEF(ID_MMFR2_EL1, 3, 0, 0, 1, 6) +DEF(ID_MMFR3_EL1, 3, 0, 0, 1, 7) +DEF(ID_ISAR0_EL1, 3, 0, 0, 2, 0) +DEF(ID_ISAR1_EL1, 3, 0, 0, 2, 1) +DEF(ID_ISAR2_EL1, 3, 0, 0, 2, 2) +DEF(ID_ISAR3_EL1, 3, 0, 0, 2, 3) +DEF(ID_ISAR4_EL1, 3, 0, 0, 2, 4) +DEF(ID_ISAR5_EL1, 3, 0, 0, 2, 5) +DEF(ID_MMFR4_EL1, 3, 0, 0, 2, 6) +DEF(ID_ISAR6_EL1, 3, 0, 0, 2, 7) +DEF(MVFR0_EL1, 3, 0, 0, 3, 0) +DEF(MVFR1_EL1, 3, 0, 0, 3, 1) +DEF(MVFR2_EL1, 3, 0, 0, 3, 2) +DEF(ID_PFR2_EL1, 3, 0, 0, 3, 4) +DEF(ID_DFR1_EL1, 3, 0, 0, 3, 5) +DEF(ID_MMFR5_EL1, 3, 0, 0, 3, 6) +DEF(CLIDR_EL1, 3, 1, 0, 0, 1) +DEF(ID_AA64ZFR0_EL1, 3, 0, 0, 4, 4) +DEF(CTR_EL0, 3, 3, 0, 0, 1) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 01786ac..3b556f1 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -33,7 +33,7 @@ #endif /* CONFIG_TCG */ #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "hw/qdev-properties.h" #if !defined(CONFIG_USER_ONLY) #include "hw/loader.h" @@ -52,6 +52,8 @@ #include "target/arm/cpu-qom.h" #include "target/arm/gtimer.h" +#include "trace.h" + static void arm_cpu_set_pc(CPUState *cs, vaddr value) { ARMCPU *cpu = ARM_CPU(cs); @@ -121,6 +123,12 @@ void arm_restore_state_to_opc(CPUState *cs, env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; } } + +int arm_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return arm_env_mmu_index(cpu_env(cs)); +} + #endif /* CONFIG_TCG */ #ifndef CONFIG_USER_ONLY @@ -136,19 +144,14 @@ static bool arm_cpu_has_work(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); return (cpu->power_state != PSCI_OFF) - && cs->interrupt_request & - (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD - | CPU_INTERRUPT_NMI | CPU_INTERRUPT_VINMI | CPU_INTERRUPT_VFNMI - | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR - | CPU_INTERRUPT_EXITTB); + && cpu_test_interrupt(cs, + CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD + | CPU_INTERRUPT_NMI | CPU_INTERRUPT_VINMI | CPU_INTERRUPT_VFNMI + | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR + | CPU_INTERRUPT_EXITTB); } #endif /* !CONFIG_USER_ONLY */ -static int arm_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - return arm_env_mmu_index(cpu_env(cs)); -} - void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void *opaque) { @@ -191,14 +194,8 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) * This is basically only used for fields in non-core coprocessors * (like the pxa2xx ones). */ - if (!ri->fieldoffset) { - return; - } - - if (cpreg_field_is_64bit(ri)) { - CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue; - } else { - CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue; + if (ri->fieldoffset) { + raw_write(&cpu->env, ri, ri->resetvalue); } } @@ -230,6 +227,8 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); CPUARMState *env = &cpu->env; + trace_arm_cpu_reset(arm_cpu_mp_affinity(cpu)); + if (acc->parent_phases.hold) { acc->parent_phases.hold(obj, type); } @@ -246,10 +245,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) cpu->power_state = cs->start_powered_off ? PSCI_OFF : PSCI_ON; - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; - } - if (arm_feature(env, ARM_FEATURE_AARCH64)) { /* 64 bit CPUs always start in 64 bit mode */ env->aarch64 = true; @@ -316,6 +311,10 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) env->cp15.mdscr_el1 |= 1 << 12; /* Enable FEAT_MOPS */ env->cp15.sctlr_el[1] |= SCTLR_MSCEN; + /* For Linux, GCSPR_EL0 is always readable. */ + if (cpu_isar_feature(aa64_gcs, cpu)) { + env->cp15.gcscr_el[0] = GCSCRE0_NTR; + } #else /* Reset into the highest available EL */ if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -348,11 +347,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) env->uncached_cpsr = ARM_CPU_MODE_USR; /* For user mode we must enable access to coprocessors */ env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30; - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->cp15.c15_cpar = 3; - } else if (arm_feature(env, ARM_FEATURE_XSCALE)) { - env->cp15.c15_cpar = 1; - } #else /* @@ -552,11 +546,15 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); @@ -584,6 +582,8 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) bool have_el3 = arm_feature(env, ARM_FEATURE_EL3); bool have_el2 = arm_feature(env, ARM_FEATURE_EL2); + trace_arm_emulate_firmware_reset(arm_cpu_mp_affinity(cpu), target_el); + /* * Check we have the EL we're aiming for. If that is the * highest implemented EL, then cpu_reset has already done @@ -629,6 +629,9 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; env->cp15.scr_el3 |= SCR_ENTP2; env->vfp.smcr_el[3] = 0xf; + if (cpu_isar_feature(aa64_sme2, cpu)) { + env->vfp.smcr_el[3] |= R_SMCR_EZT0_MASK; + } } if (cpu_isar_feature(aa64_hcx, cpu)) { env->cp15.scr_el3 |= SCR_HXEN; @@ -636,6 +639,22 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) if (cpu_isar_feature(aa64_fgt, cpu)) { env->cp15.scr_el3 |= SCR_FGTEN; } + if (cpu_isar_feature(aa64_gcs, cpu)) { + env->cp15.scr_el3 |= SCR_GCSEN; + } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + env->cp15.scr_el3 |= SCR_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + env->cp15.scr_el3 |= SCR_SCTLR2EN; + } + if (cpu_isar_feature(aa64_s1pie, cpu) || + cpu_isar_feature(aa64_s2pie, cpu)) { + env->cp15.scr_el3 |= SCR_PIEN; + } + if (cpu_isar_feature(aa64_mec, cpu)) { + env->cp15.scr_el3 |= SCR_MECEN; + } } if (target_el == 2) { @@ -672,376 +691,6 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) } -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) - -static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, - unsigned int target_el, - unsigned int cur_el, bool secure, - uint64_t hcr_el2) -{ - CPUARMState *env = cpu_env(cs); - bool pstate_unmasked; - bool unmasked = false; - bool allIntMask = false; - - /* - * Don't take exceptions if they target a lower EL. - * This check should catch any exceptions that would not be taken - * but left pending. - */ - if (cur_el > target_el) { - return false; - } - - if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && - env->cp15.sctlr_el[target_el] & SCTLR_NMI && cur_el == target_el) { - allIntMask = env->pstate & PSTATE_ALLINT || - ((env->cp15.sctlr_el[target_el] & SCTLR_SPINTMASK) && - (env->pstate & PSTATE_SP)); - } - - switch (excp_idx) { - case EXCP_NMI: - pstate_unmasked = !allIntMask; - break; - - case EXCP_VINMI: - if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { - /* VINMIs are only taken when hypervized. */ - return false; - } - return !allIntMask; - case EXCP_VFNMI: - if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { - /* VFNMIs are only taken when hypervized. */ - return false; - } - return !allIntMask; - case EXCP_FIQ: - pstate_unmasked = (!(env->daif & PSTATE_F)) && (!allIntMask); - break; - - case EXCP_IRQ: - pstate_unmasked = (!(env->daif & PSTATE_I)) && (!allIntMask); - break; - - case EXCP_VFIQ: - if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { - /* VFIQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_F) && (!allIntMask); - case EXCP_VIRQ: - if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { - /* VIRQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_I) && (!allIntMask); - case EXCP_VSERR: - if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) { - /* VIRQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_A); - default: - g_assert_not_reached(); - } - - /* - * Use the target EL, current execution state and SCR/HCR settings to - * determine whether the corresponding CPSR bit is used to mask the - * interrupt. - */ - if ((target_el > cur_el) && (target_el != 1)) { - /* Exceptions targeting a higher EL may not be maskable */ - if (arm_feature(env, ARM_FEATURE_AARCH64)) { - switch (target_el) { - case 2: - /* - * According to ARM DDI 0487H.a, an interrupt can be masked - * when HCR_E2H and HCR_TGE are both set regardless of the - * current Security state. Note that we need to revisit this - * part again once we need to support NMI. - */ - if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - unmasked = true; - } - break; - case 3: - /* Interrupt cannot be masked when the target EL is 3 */ - unmasked = true; - break; - default: - g_assert_not_reached(); - } - } else { - /* - * The old 32-bit-only environment has a more complicated - * masking setup. HCR and SCR bits not only affect interrupt - * routing but also change the behaviour of masking. - */ - bool hcr, scr; - - switch (excp_idx) { - case EXCP_FIQ: - /* - * If FIQs are routed to EL3 or EL2 then there are cases where - * we override the CPSR.F in determining if the exception is - * masked or not. If neither of these are set then we fall back - * to the CPSR.F setting otherwise we further assess the state - * below. - */ - hcr = hcr_el2 & HCR_FMO; - scr = (env->cp15.scr_el3 & SCR_FIQ); - - /* - * When EL3 is 32-bit, the SCR.FW bit controls whether the - * CPSR.F bit masks FIQ interrupts when taken in non-secure - * state. If SCR.FW is set then FIQs can be masked by CPSR.F - * when non-secure but only when FIQs are only routed to EL3. - */ - scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr); - break; - case EXCP_IRQ: - /* - * When EL3 execution state is 32-bit, if HCR.IMO is set then - * we may override the CPSR.I masking when in non-secure state. - * The SCR.IRQ setting has already been taken into consideration - * when setting the target EL, so it does not have a further - * affect here. - */ - hcr = hcr_el2 & HCR_IMO; - scr = false; - break; - default: - g_assert_not_reached(); - } - - if ((scr || hcr) && !secure) { - unmasked = true; - } - } - } - - /* - * The PSTATE bits only mask the interrupt if we have not overridden the - * ability above. - */ - return unmasked || pstate_unmasked; -} - -static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) -{ - CPUARMState *env = cpu_env(cs); - uint32_t cur_el = arm_current_el(env); - bool secure = arm_is_secure(env); - uint64_t hcr_el2 = arm_hcr_el2_eff(env); - uint32_t target_el; - uint32_t excp_idx; - - /* The prioritization of interrupts is IMPLEMENTATION DEFINED. */ - - if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && - (arm_sctlr(env, cur_el) & SCTLR_NMI)) { - if (interrupt_request & CPU_INTERRUPT_NMI) { - excp_idx = EXCP_NMI; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VINMI) { - excp_idx = EXCP_VINMI; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VFNMI) { - excp_idx = EXCP_VFNMI; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - } else { - /* - * NMI disabled: interrupts with superpriority are handled - * as if they didn't have it - */ - if (interrupt_request & CPU_INTERRUPT_NMI) { - interrupt_request |= CPU_INTERRUPT_HARD; - } - if (interrupt_request & CPU_INTERRUPT_VINMI) { - interrupt_request |= CPU_INTERRUPT_VIRQ; - } - if (interrupt_request & CPU_INTERRUPT_VFNMI) { - interrupt_request |= CPU_INTERRUPT_VFIQ; - } - } - - if (interrupt_request & CPU_INTERRUPT_FIQ) { - excp_idx = EXCP_FIQ; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_HARD) { - excp_idx = EXCP_IRQ; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VIRQ) { - excp_idx = EXCP_VIRQ; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VFIQ) { - excp_idx = EXCP_VFIQ; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VSERR) { - excp_idx = EXCP_VSERR; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - /* Taking a virtual abort clears HCR_EL2.VSE */ - env->cp15.hcr_el2 &= ~HCR_VSE; - cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); - goto found; - } - } - return false; - - found: - cs->exception_index = excp_idx; - env->exception.target_el = target_el; - cs->cc->tcg_ops->do_interrupt(cs); - return true; -} - -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ - -void arm_cpu_update_virq(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VIRQ, which is the logical OR of - * the HCR_EL2.VI bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && - !(arm_hcrx_el2_eff(env) & HCRX_VINMI)) || - (env->irq_line_state & CPU_INTERRUPT_VIRQ); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VIRQ) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VIRQ); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); - } - } -} - -void arm_cpu_update_vfiq(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VFIQ, which is the logical OR of - * the HCR_EL2.VF bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VF) && - !(arm_hcrx_el2_eff(env) & HCRX_VFNMI)) || - (env->irq_line_state & CPU_INTERRUPT_VFIQ); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VFIQ) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VFIQ); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VFIQ); - } - } -} - -void arm_cpu_update_vinmi(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VINMI, which is the logical OR of - * the HCRX_EL2.VINMI bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && - (arm_hcrx_el2_eff(env) & HCRX_VINMI)) || - (env->irq_line_state & CPU_INTERRUPT_VINMI); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VINMI) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VINMI); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VINMI); - } - } -} - -void arm_cpu_update_vfnmi(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VFNMI, which is the HCRX_EL2.VFNMI bit. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = (arm_hcr_el2_eff(env) & HCR_VF) && - (arm_hcrx_el2_eff(env) & HCRX_VFNMI); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VFNMI) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VFNMI); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VFNMI); - } - } -} - -void arm_cpu_update_vserr(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = env->cp15.hcr_el2 & HCR_VSE; - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VSERR) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VSERR); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); - } - } -} - #ifndef CONFIG_USER_ONLY static void arm_cpu_set_irq(void *opaque, int irq, int level) { @@ -1097,37 +746,6 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level) } } -static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) -{ -#ifdef CONFIG_KVM - ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - uint32_t linestate_bit; - int irq_id; - - switch (irq) { - case ARM_CPU_IRQ: - irq_id = KVM_ARM_IRQ_CPU_IRQ; - linestate_bit = CPU_INTERRUPT_HARD; - break; - case ARM_CPU_FIQ: - irq_id = KVM_ARM_IRQ_CPU_FIQ; - linestate_bit = CPU_INTERRUPT_FIQ; - break; - default: - g_assert_not_reached(); - } - - if (level) { - env->irq_line_state |= linestate_bit; - } else { - env->irq_line_state &= ~linestate_bit; - } - kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level); -#endif -} - static bool arm_cpu_virtio_is_big_endian(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); @@ -1201,7 +819,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) info->endian = BFD_ENDIAN_LITTLE; if (bswap_code(sctlr_b)) { - info->endian = TARGET_BIG_ENDIAN ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG; + info->endian = target_big_endian() ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG; } info->flags &= ~INSN_ARM_BE32; #ifndef CONFIG_USER_ONLY @@ -1211,13 +829,11 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) #endif } -#ifdef TARGET_AARCH64 - static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - uint32_t psr = pstate_read(env); + uint64_t psr = pstate_read(env); int i, j; int el = arm_current_el(env); uint64_t hcr = arm_hcr_el2_eff(env); @@ -1239,7 +855,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } else { ns_status = ""; } - qemu_fprintf(f, "PSTATE=%08x %c%c%c%c %sEL%d%c", + qemu_fprintf(f, "PSTATE=%016" PRIx64 " %c%c%c%c %sEL%d%c", psr, psr & PSTATE_N ? 'N' : '-', psr & PSTATE_Z ? 'Z' : '-', @@ -1256,7 +872,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-')); } if (cpu_isar_feature(aa64_bti, cpu)) { - qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10); + qemu_fprintf(f, " BTYPE=%d", (int)(psr & PSTATE_BTYPE) >> 10); } qemu_fprintf(f, "%s%s%s", (hcr & HCR_NV) ? " NV" : "", @@ -1362,23 +978,14 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i); for (j = zcr_len; j >= 0; --j) { qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c", - env->zarray[i].d[2 * j + 1], - env->zarray[i].d[2 * j], + env->za_state.za[i].d[2 * j + 1], + env->za_state.za[i].d[2 * j], j ? ':' : '\n'); } } } } -#else - -static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) -{ - g_assert_not_reached(); -} - -#endif - static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) { ARMCPU *cpu = ARM_CPU(cs); @@ -1540,6 +1147,7 @@ static void arm_cpu_initfn(Object *obj) * 0 means "unset, use the default value". That default might vary depending * on the CPU type, and is set in the realize fn. */ +#ifndef CONFIG_USER_ONLY static const Property arm_cpu_gt_cntfrq_property = DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0); @@ -1549,7 +1157,6 @@ static const Property arm_cpu_reset_cbar_property = static const Property arm_cpu_reset_hivecs_property = DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false); -#ifndef CONFIG_USER_ONLY static const Property arm_cpu_has_el2_property = DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true); @@ -1572,6 +1179,7 @@ static const Property arm_cpu_has_neon_property = static const Property arm_cpu_has_dsp_property = DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true); +#ifndef CONFIG_USER_ONLY static const Property arm_cpu_has_mpu_property = DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true); @@ -1584,6 +1192,7 @@ static const Property arm_cpu_pmsav7_dregion_property = DEFINE_PROP_UNSIGNED_NODEFAULT("pmsav7-dregion", ARMCPU, pmsav7_dregion, qdev_prop_uint32, uint32_t); +#endif static bool arm_get_pmu(Object *obj, Error **errp) { @@ -1608,6 +1217,35 @@ static void arm_set_pmu(Object *obj, bool value, Error **errp) cpu->has_pmu = value; } +static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); +} + +static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + /* + * At this time, this property is only allowed if KVM is enabled. This + * restriction allows us to avoid fixing up functionality that assumes a + * uniform execution state like do_interrupt. + */ + if (value == false) { + if (!kvm_enabled() || !kvm_arm_aarch32_supported()) { + error_setg(errp, "'aarch64' feature cannot be disabled " + "unless KVM is enabled and 32-bit EL1 " + "is supported"); + return; + } + unset_feature(&cpu->env, ARM_FEATURE_AARCH64); + } else { + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + } +} + unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) { /* @@ -1724,7 +1362,7 @@ static void arm_cpu_propagate_feature_implications(ARMCPU *cpu) } } -void arm_cpu_post_init(Object *obj) +static void arm_cpu_post_init(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -1735,6 +1373,14 @@ void arm_cpu_post_init(Object *obj) */ arm_cpu_propagate_feature_implications(cpu); + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + object_property_add_bool(obj, "aarch64", aarch64_cpu_get_aarch64, + aarch64_cpu_set_aarch64); + object_property_set_description(obj, "aarch64", + "Set on/off to enable/disable aarch64 " + "execution state "); + } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property); @@ -1750,7 +1396,6 @@ void arm_cpu_post_init(Object *obj) OBJ_PROP_FLAG_READWRITE); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { /* Add the has_el3 state CPU property only if EL3 is allowed. This will * prevent "has_el3" from existing on CPUs which cannot support EL3. @@ -1822,6 +1467,7 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property); } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property); if (arm_feature(&cpu->env, ARM_FEATURE_V7)) { @@ -1858,8 +1504,6 @@ void arm_cpu_post_init(Object *obj) &cpu->psci_conduit, OBJ_PROP_FLAG_READWRITE); - qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); - if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property); } @@ -1868,7 +1512,6 @@ void arm_cpu_post_init(Object *obj) kvm_arm_add_vcpu_properties(cpu); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && cpu_isar_feature(aa64_mte, cpu)) { object_property_add_link(obj, "tag-memory", @@ -1886,6 +1529,7 @@ void arm_cpu_post_init(Object *obj) } } #endif + qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); } static void arm_cpu_finalizefn(Object *obj) @@ -1917,7 +1561,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) { Error *local_err = NULL; -#ifdef TARGET_AARCH64 if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { arm_cpu_sve_finalize(cpu, &local_err); if (local_err != NULL) { @@ -1953,7 +1596,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) return; } } -#endif if (kvm_enabled()) { kvm_arm_steal_time_finalize(cpu, &local_err); @@ -1968,6 +1610,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); ARMCPU *cpu = ARM_CPU(dev); + ARMISARegisters *isar = &cpu->isar; ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; Error *local_err = NULL; @@ -2125,21 +1768,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, JSCVT, 0); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, FP, 0xf); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, FPSP, 0); @@ -2173,7 +1811,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_NEON); - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0); @@ -2181,32 +1819,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0); t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0); t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0); - cpu->isar.id_aa64isar1 = t; + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, ADVSIMD, 0xf); - u = cpu->isar.id_isar5; + u = GET_IDREG(isar, ID_ISAR5); u = FIELD_DP32(u, ID_ISAR5, AES, 0); u = FIELD_DP32(u, ID_ISAR5, SHA1, 0); u = FIELD_DP32(u, ID_ISAR5, SHA2, 0); u = FIELD_DP32(u, ID_ISAR5, RDM, 0); u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); - cpu->isar.id_isar5 = u; + SET_IDREG(isar, ID_ISAR5, u); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, DP, 0); u = FIELD_DP32(u, ID_ISAR6, FHM, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); u = FIELD_DP32(u, ID_ISAR6, I8MM, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); if (!arm_feature(env, ARM_FEATURE_M)) { u = cpu->isar.mvfr1; @@ -2223,16 +1859,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_neon && !cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar0; - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); - cpu->isar.id_aa64isar0 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR0, FHM, 0); - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, FRINTTS, 0); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, SIMDREG, 0); @@ -2249,30 +1880,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_THUMB_DSP); - u = cpu->isar.id_isar1; - u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); - cpu->isar.id_isar1 = u; + FIELD_DP32_IDREG(isar, ID_ISAR1, EXTEND, 1); - u = cpu->isar.id_isar2; + u = GET_IDREG(isar, ID_ISAR2); u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); - cpu->isar.id_isar2 = u; + SET_IDREG(isar, ID_ISAR2, u); - u = cpu->isar.id_isar3; + u = GET_IDREG(isar, ID_ISAR3); u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); - cpu->isar.id_isar3 = u; + SET_IDREG(isar, ID_ISAR3, u); } - /* - * We rely on no XScale CPU having VFP so we can use the same bits in the - * TB flags field for VECSTRIDE and XSCALE_CPAR. - */ - assert(arm_feature(env, ARM_FEATURE_AARCH64) || - !cpu_isar_feature(aa32_vfp_simd, cpu) || - !arm_feature(env, ARM_FEATURE_XSCALE)); - #ifndef CONFIG_USER_ONLY { int pagebits; @@ -2336,14 +1957,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the security extension feature bits in the processor * feature registers as well. */ - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0); - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL3, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, SECURITY, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPSDBG, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL3, 0); /* Disable the realm management extension, which requires EL3. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, RME, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, RME, 0); } if (!cpu->has_el2) { @@ -2366,9 +1985,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu); #endif } else { - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMUVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, PERFMON, 0); cpu->pmceid0 = 0; cpu->pmceid1 = 0; } @@ -2378,10 +1996,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the hypervisor feature bits in the processor feature * registers if we don't have EL2. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL2, 0); - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, - ID_PFR1, VIRTUALIZATION, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL2, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, VIRTUALIZATION, 0); } if (cpu_isar_feature(aa64_mte, cpu)) { @@ -2400,8 +2016,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * This matches Cortex-A710 BROADCASTMTE input being LOW. */ if (tcg_enabled() && cpu->tag_memory == NULL) { - cpu->isar.id_aa64pfr1 = - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 1); } /* @@ -2409,7 +2024,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * enabled on the guest (i.e mte=off), clear guest's MTE bits." */ if (kvm_enabled() && !cpu->kvm_mte) { - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 0); } #endif } @@ -2429,32 +2044,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * try to access the non-existent system registers for them. */ /* FEAT_SPE (Statistical Profiling Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMSVER, 0); /* FEAT_TRBE (Trace Buffer Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEBUFFER, 0); /* FEAT_TRF (Self-hosted Trace Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEFILT, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, TRACEFILT, 0); /* Trace Macrocell system register access */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPTRC, 0); /* Memory mapped trace */ - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, MMAPTRC, 0); /* FEAT_AMU (Activity Monitors Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); - cpu->isar.id_pfr0 = - FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, AMU, 0); + FIELD_DP32_IDREG(isar, ID_PFR0, AMU, 0); /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, MPAM, 0); } /* MPU can be configured out of a PMSA CPU either by setting has-mpu @@ -2645,15 +2250,52 @@ static const Property arm_cpu_properties[] = { static const gchar *arm_gdb_arch_name(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - return "iwmmxt"; + if (arm_gdbstub_is_aarch64(cpu)) { + return "aarch64"; } return "arm"; } -#ifndef CONFIG_USER_ONLY +static const char *arm_gdb_get_core_xml_file(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + if (arm_gdbstub_is_aarch64(cpu)) { + return "aarch64-core.xml"; + } + if (arm_feature(env, ARM_FEATURE_M)) { + return "arm-m-profile.xml"; + } + return "arm-core.xml"; +} + +#ifdef CONFIG_USER_ONLY +/** + * aarch64_untagged_addr: + * + * Remove any address tag from @x. This is explicitly related to the + * linux syscall TIF_TAGGED_ADDR setting, not TBI in general. + * + * There should be a better place to put this, but we need this in + * include/accel/tcg/cpu-ldst.h, and not some place linux-user specific. + * + * Note that arm-*-user will never set tagged_addr_enable. + */ +static vaddr aarch64_untagged_addr(CPUState *cs, vaddr x) +{ + CPUARMState *env = cpu_env(cs); + if (env->tagged_addr_enable) { + /* + * TBI is enabled for userspace but not kernelspace addresses. + * Only clear the tag if bit 55 is clear. + */ + x &= sextract64(x, 0, 56); + } + return x; +} +#else #include "hw/core/sysemu-cpu-ops.h" static const struct SysemuCPUOps arm_sysemu_ops = { @@ -2668,20 +2310,52 @@ static const struct SysemuCPUOps arm_sysemu_ops = { #endif #ifdef CONFIG_TCG +#ifndef CONFIG_USER_ONLY +static vaddr aprofile_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + /* + * The Stage2 and Phys indexes are only used for ptw on arm32, + * and all pte's are aligned, so we never produce a wrap for these. + * Double check that we're not truncating a 40-bit physical address. + */ + assert((unsigned)mmu_idx < (ARMMMUIdx_Stage2_S & ARM_MMU_IDX_COREIDX_MASK)); + + if (!is_a64(cpu_env(cs))) { + return (uint32_t)result; + } + + /* + * TODO: For FEAT_CPA2, decide how to we want to resolve + * Unpredictable_CPACHECK in AddressIncrement. + */ + return result; +} +#endif /* !CONFIG_USER_ONLY */ + static const TCGCPUOps arm_tcg_ops = { + .mttcg_supported = true, + /* ARM processors have a weak memory model */ + .guest_default_memory_order = 0, + .initialize = arm_translate_init, .translate_code = arm_translate_code, + .get_tb_cpu_state = arm_get_tb_cpu_state, .synchronize_from_tb = arm_cpu_synchronize_from_tb, .debug_excp_handler = arm_debug_excp_handler, .restore_state_to_opc = arm_restore_state_to_opc, + .mmu_index = arm_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = arm_cpu_record_sigsegv, .record_sigbus = arm_cpu_record_sigbus, + .untagged_addr = aarch64_untagged_addr, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = aprofile_pointer_wrap, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, + .cpu_exec_reset = cpu_reset, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, @@ -2692,7 +2366,7 @@ static const TCGCPUOps arm_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void arm_cpu_class_init(ObjectClass *oc, void *data) +static void arm_cpu_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(acc); @@ -2708,7 +2382,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) &acc->parent_phases); cc->class_by_name = arm_cpu_class_by_name; - cc->mmu_index = arm_cpu_mmu_index; cc->dump_state = arm_cpu_dump_state; cc->set_pc = arm_cpu_set_pc; cc->get_pc = arm_cpu_get_pc; @@ -2718,6 +2391,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->sysemu_ops = &arm_sysemu_ops; #endif cc->gdb_arch_name = arm_gdb_arch_name; + cc->gdb_get_core_xml_file = arm_gdb_get_core_xml_file; cc->gdb_stop_before_watchpoint = true; cc->disas_set_info = arm_disas_set_info; @@ -2734,13 +2408,12 @@ static void arm_cpu_instance_init(Object *obj) arm_cpu_post_init(obj); } -static void cpu_register_class_init(ObjectClass *oc, void *data) +static void cpu_register_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(acc); acc->info = data; - cc->gdb_core_xml_file = "arm-core.xml"; if (acc->info->deprecation_note) { cc->deprecation_note = acc->info->deprecation_note; } @@ -2752,7 +2425,7 @@ void arm_cpu_register(const ARMCPUInfo *info) .parent = TYPE_ARM_CPU, .instance_init = arm_cpu_instance_init, .class_init = info->class_init ?: cpu_register_class_init, - .class_data = (void *)info, + .class_data = info, }; type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a8177c6..bf221e6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -24,16 +24,16 @@ #include "qemu/cpu-float.h" #include "hw/registerfields.h" #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "exec/gdbstub.h" #include "exec/page-protection.h" #include "qapi/qapi-types-common.h" #include "target/arm/multiprocessing.h" #include "target/arm/gtimer.h" - -#ifdef TARGET_AARCH64 -#define KVM_HAVE_MCE_INJECTION 1 -#endif +#include "target/arm/cpu-sysregs.h" +#include "target/arm/mmuidx.h" #define EXCP_UDEF 1 /* undefined instruction */ #define EXCP_SWI 2 /* software interrupt */ @@ -100,12 +100,6 @@ #define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) #endif -/* ARM-specific extra insn start words: - * 1: Conditional execution bits - * 2: Partial exception syndrome for data aborts - */ -#define TARGET_INSN_START_EXTRA_WORDS 2 - /* The 2nd extra word holding syndrome info for data aborts does not use * the upper 6 bits nor the lower 13 bits. We mask and shift it down to * help the sleb128 encoder do a better job. @@ -171,17 +165,12 @@ typedef struct ARMGenericTimer { * Align the data for use with TCG host vector operations. */ -#ifdef TARGET_AARCH64 -# define ARM_MAX_VQ 16 -#else -# define ARM_MAX_VQ 1 -#endif +#define ARM_MAX_VQ 16 typedef struct ARMVectorReg { uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16); } ARMVectorReg; -#ifdef TARGET_AARCH64 /* In AArch32 mode, predicate registers do not exist at all. */ typedef struct ARMPredicateReg { uint64_t p[DIV_ROUND_UP(2 * ARM_MAX_VQ, 8)] QEMU_ALIGNED(16); @@ -191,12 +180,11 @@ typedef struct ARMPredicateReg { typedef struct ARMPACKey { uint64_t lo, hi; } ARMPACKey; -#endif /* See the commentary above the TBFLAG field definitions. */ typedef struct CPUARMTBFlags { uint32_t flags; - target_ulong flags2; + uint64_t flags2; } CPUARMTBFlags; typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; @@ -220,6 +208,8 @@ typedef struct NVICState NVICState; * when FPCR.AH == 1 (bfloat16 conversions and multiplies, * and the reciprocal and square root estimate/step insns); * for half-precision + * ZA: the "streaming sve" fp status. + * ZA_F16: likewise for half-precision. * * Half-precision operations are governed by a separate * flush-to-zero control bit in FPSCR:FZ16. We pass a separate @@ -240,6 +230,12 @@ typedef struct NVICState NVICState; * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, * which means we need an FPST_AH_F16 as well. * + * The "ZA" float_status are for Streaming SVE operations which use + * default-NaN and do not generate fp exceptions, which means that they + * do not accumulate exception bits back into FPCR. + * See e.g. FPAdd vs FPAdd_ZA pseudocode functions, and the setting + * of fpcr.DN and fpexec parameters. + * * To avoid having to transfer exception bits around, we simply * say that the FPSCR cumulative exception flags are the logical * OR of the flags in the four fp statuses. This relies on the @@ -253,10 +249,12 @@ typedef enum ARMFPStatusFlavour { FPST_A64_F16, FPST_AH, FPST_AH_F16, + FPST_ZA, + FPST_ZA_F16, FPST_STD, FPST_STD_F16, } ARMFPStatusFlavour; -#define FPST_COUNT 8 +#define FPST_COUNT 10 typedef struct CPUArchState { /* Regs for current mode. */ @@ -270,7 +268,7 @@ typedef struct CPUArchState { uint64_t xregs[32]; uint64_t pc; /* PSTATE isn't an architectural register for ARMv8. However, it is - * convenient for us to assemble the underlying state into a 32 bit format + * convenient for us to assemble the underlying state into a 64 bit format * identical to the architectural format used for the SPSR. (This is also * what the Linux kernel's 'pstate' field in signal handlers and KVM's * 'pstate' register are.) Of the PSTATE bits: @@ -282,7 +280,7 @@ typedef struct CPUArchState { * SM and ZA are kept in env->svcr * all other bits are stored in their correct places in env->pstate */ - uint32_t pstate; + uint64_t pstate; bool aarch64; /* True if CPU is in aarch64 state; inverse of PSTATE.nRW */ bool thumb; /* True if CPU is in thumb mode; cpsr[5] */ @@ -340,10 +338,10 @@ typedef struct CPUArchState { }; uint64_t sctlr_el[4]; }; + uint64_t sctlr2_el[4]; /* Extension to System control register. */ uint64_t vsctlr; /* Virtualization System control register. */ uint64_t cpacr_el1; /* Architectural feature access control register */ uint64_t cptr_el[4]; /* ARMv8 feature trap registers */ - uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ uint64_t sder; /* Secure debug enable register. */ uint32_t nsacr; /* Non-secure access control register. */ union { /* MMU translation table base 0. */ @@ -368,8 +366,12 @@ typedef struct CPUArchState { uint64_t vsttbr_el2; /* Secure Virtualization Translation Table. */ /* MMU translation table base control. */ uint64_t tcr_el[4]; + uint64_t tcr2_el[3]; uint64_t vtcr_el2; /* Virtualization Translation Control. */ uint64_t vstcr_el2; /* Secure Virtualization Translation Control. */ + uint64_t pir_el[4]; /* PIRE0_EL1, PIR_EL1, PIR_EL2, PIR_EL3 */ + uint64_t pire0_el2; + uint64_t s2pir_el2; uint32_t c2_data; /* MPU data cacheable bits. */ uint32_t c2_insn; /* MPU instruction cacheable bits. */ union { /* MMU domain access control register @@ -514,7 +516,6 @@ typedef struct CPUArchState { uint64_t cntvoff_el2; /* Counter Virtual Offset register */ uint64_t cntpoff_el2; /* Counter Physical Offset register */ ARMGenericTimer c14_timer[NUM_GTIMERS]; - uint32_t c15_cpar; /* XScale Coprocessor Access Register */ uint32_t c15_ticonfig; /* TI925T configuration byte. */ uint32_t c15_i_max; /* Maximum D-cache dirty line index. */ uint32_t c15_i_min; /* Minimum D-cache dirty line index. */ @@ -579,6 +580,18 @@ typedef struct CPUArchState { /* NV2 register */ uint64_t vncr_el2; + + uint64_t gcscr_el[4]; /* GCSCRE0_EL1, GCSCR_EL[123] */ + uint64_t gcspr_el[4]; /* GCSPR_EL[0123] */ + + /* MEC registers */ + uint64_t mecid_p0_el2; + uint64_t mecid_a0_el2; + uint64_t mecid_p1_el2; + uint64_t mecid_a1_el2; + uint64_t mecid_rl_a_el3; + uint64_t vmecid_p_el2; + uint64_t vmecid_a_el2; } cp15; struct { @@ -633,13 +646,10 @@ typedef struct CPUArchState { * entry process. */ struct { - uint32_t syndrome; /* AArch64 format syndrome register */ - uint32_t fsr; /* AArch32 format fault status register info */ + uint64_t syndrome; /* AArch64 format syndrome register */ uint64_t vaddress; /* virtual addr associated with exception, if any */ + uint32_t fsr; /* AArch32 format fault status register info */ uint32_t target_el; /* EL the exception should be targeted for */ - /* If we implement EL2 we will also need to store information - * about the intermediate physical address for stage 2 faults. - */ } exception; /* Information associated with an SError */ @@ -662,13 +672,11 @@ typedef struct CPUArchState { struct { ARMVectorReg zregs[32]; -#ifdef TARGET_AARCH64 /* Store FFR as pregs[16] to make it easier to treat as any other. */ #define FFR_PRED_NUM 16 ARMPredicateReg pregs[17]; /* Scratch space for aa64 sve predicate temporary. */ ARMPredicateReg preg_tmp; -#endif /* We store these fpcsr fields separately for convenience. */ uint32_t qc[4] QEMU_ALIGNED(16); @@ -684,9 +692,6 @@ typedef struct CPUArchState { uint32_t xregs[16]; - /* Scratch space for aa32 neon expansion. */ - uint32_t scratch[8]; - /* There are a number of distinct float control structures. */ float_status fp_status[FPST_COUNT]; @@ -705,15 +710,6 @@ typedef struct CPUArchState { */ uint64_t exclusive_high; - /* iwMMXt coprocessor state. */ - struct { - uint64_t regs[16]; - uint64_t val; - - uint32_t cregs[16]; - } iwmmxt; - -#ifdef TARGET_AARCH64 struct { ARMPACKey apia; ARMPACKey apib; @@ -724,28 +720,36 @@ typedef struct CPUArchState { uint64_t scxtnum_el[4]; - /* - * SME ZA storage -- 256 x 256 byte array, with bytes in host word order, - * as we do with vfp.zregs[]. This corresponds to the architectural ZA - * array, where ZA[N] is in the least-significant bytes of env->zarray[N]. - * When SVL is less than the architectural maximum, the accessible - * storage is restricted, such that if the SVL is X bytes the guest can - * see only the bottom X elements of zarray[], and only the least - * significant X bytes of each element of the array. (In other words, - * the observable part is always square.) - * - * The ZA storage can also be considered as a set of square tiles of - * elements of different sizes. The mapping from tiles to the ZA array - * is architecturally defined, such that for tiles of elements of esz - * bytes, the Nth row (or "horizontal slice") of tile T is in - * ZA[T + N * esz]. Note that this means that each tile is not contiguous - * in the ZA storage, because its rows are striped through the ZA array. - * - * Because this is so large, keep this toward the end of the reset area, - * to keep the offsets into the rest of the structure smaller. - */ - ARMVectorReg zarray[ARM_MAX_VQ * 16]; -#endif + struct { + /* SME2 ZT0 -- 512 bit array, with data ordered like ARMVectorReg. */ + uint64_t zt0[512 / 64] QEMU_ALIGNED(16); + + /* + * SME ZA storage -- 256 x 256 byte array, with bytes in host + * word order, as we do with vfp.zregs[]. This corresponds to + * the architectural ZA array, where ZA[N] is in the least + * significant bytes of env->za_state.za[N]. + * + * When SVL is less than the architectural maximum, the accessible + * storage is restricted, such that if the SVL is X bytes the guest + * can see only the bottom X elements of zarray[], and only the least + * significant X bytes of each element of the array. (In other words, + * the observable part is always square.) + * + * The ZA storage can also be considered as a set of square tiles of + * elements of different sizes. The mapping from tiles to the ZA array + * is architecturally defined, such that for tiles of elements of esz + * bytes, the Nth row (or "horizontal slice") of tile T is in + * ZA[T + N * esz]. Note that this means that each tile is not + * contiguous in the ZA storage, because its rows are striped through + * the ZA array. + * + * Because this is so large, keep this toward the end of the + * reset area, to keep the offsets into the rest of the structure + * smaller. + */ + ARMVectorReg za[ARM_MAX_VQ * 16]; + } za_state; struct CPUBreakpoint *cpu_breakpoint[16]; struct CPUWatchpoint *cpu_watchpoint[16]; @@ -801,12 +805,9 @@ typedef struct CPUArchState { #else /* CONFIG_USER_ONLY */ /* For usermode syscall translation. */ bool eabi; -#endif /* CONFIG_USER_ONLY */ - -#ifdef TARGET_TAGGED_ADDRESSES /* Linux syscall tagged address support */ bool tagged_addr_enable; -#endif +#endif /* CONFIG_USER_ONLY */ } CPUARMState; static inline void set_feature(CPUARMState *env, int feature) @@ -855,6 +856,53 @@ typedef struct { uint32_t map, init, supported; } ARMVQMap; +/* REG is ID_XXX */ +#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP64(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_DP32_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP32(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_EX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_EX32_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX32(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_SEX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_SEX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define SET_IDREG(ISAR, REG, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX] = VALUE; \ + }) + +#define GET_IDREG(ISAR, REG) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX]; \ + }) + /** * ARMCPU: * @env: #CPUARMState @@ -890,6 +938,7 @@ struct ArchCPU { DynamicGDBFeatureInfo dyn_sysreg_feature; DynamicGDBFeatureInfo dyn_svereg_feature; + DynamicGDBFeatureInfo dyn_smereg_feature; DynamicGDBFeatureInfo dyn_m_systemreg_feature; DynamicGDBFeatureInfo dyn_m_secextreg_feature; @@ -973,7 +1022,6 @@ struct ArchCPU { */ uint32_t kvm_target; -#ifdef CONFIG_KVM /* KVM init features for this CPU */ uint32_t kvm_init_features[7]; @@ -986,7 +1034,6 @@ struct ArchCPU { /* KVM steal time */ OnOffAuto kvm_steal_time; -#endif /* CONFIG_KVM */ /* Uniprocessor system with MP extensions */ bool mp_is_up; @@ -1025,44 +1072,14 @@ struct ArchCPU { * field by reading the value from the KVM vCPU. */ struct ARMISARegisters { - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint32_t id_mmfr0; - uint32_t id_mmfr1; - uint32_t id_mmfr2; - uint32_t id_mmfr3; - uint32_t id_mmfr4; - uint32_t id_mmfr5; - uint32_t id_pfr0; - uint32_t id_pfr1; - uint32_t id_pfr2; uint32_t mvfr0; uint32_t mvfr1; uint32_t mvfr2; - uint32_t id_dfr0; - uint32_t id_dfr1; uint32_t dbgdidr; uint32_t dbgdevid; uint32_t dbgdevid1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; - uint64_t id_aa64isar2; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; - uint64_t id_aa64mmfr0; - uint64_t id_aa64mmfr1; - uint64_t id_aa64mmfr2; - uint64_t id_aa64mmfr3; - uint64_t id_aa64dfr0; - uint64_t id_aa64dfr1; - uint64_t id_aa64zfr0; - uint64_t id_aa64smfr0; uint64_t reset_pmcr_el0; + uint64_t idregs[NUM_ID_IDX]; } isar; uint64_t midr; uint32_t revidr; @@ -1071,10 +1088,6 @@ struct ArchCPU { uint32_t reset_sctlr; uint64_t pmceid0; uint64_t pmceid1; - uint32_t id_afr0; - uint64_t id_aa64afr0; - uint64_t id_aa64afr1; - uint64_t clidr; uint64_t mp_affinity; /* MP ID without feature bits */ /* The elements of this array are the CCSIDR values for each cache, * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc. @@ -1125,6 +1138,7 @@ struct ArchCPU { /* Used to set the maximum vector length the cpu will support. */ uint32_t sve_max_vq; + uint32_t sme_max_vq; #ifdef CONFIG_USER_ONLY /* Used to set the default vector length at process start. */ @@ -1143,7 +1157,7 @@ typedef struct ARMCPUInfo { const char *name; const char *deprecation_note; void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); + void (*class_init)(ObjectClass *oc, const void *data); } ARMCPUInfo; /** @@ -1161,10 +1175,6 @@ struct ARMCPUClass { ResettablePhases parent_phases; }; -struct AArch64CPUClass { - ARMCPUClass parent_class; -}; - /* Callback functions for the generic timer's timers. */ void arm_gt_ptimer_cb(void *opaque); void arm_gt_vtimer_cb(void *opaque); @@ -1177,8 +1187,6 @@ void arm_gt_sel2vtimer_cb(void *opaque); unsigned int gt_cntfrq_period_ns(ARMCPU *cpu); void gt_rme_post_el_change(ARMCPU *cpu, void *opaque); -void arm_cpu_post_init(Object *obj); - #define ARM_AFF0_SHIFT 0 #define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT) #define ARM_AFF1_SHIFT 8 @@ -1236,7 +1244,6 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, */ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el); -#ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); @@ -1268,13 +1275,6 @@ static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) #endif } -#else -static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } -static inline void aarch64_sve_change_el(CPUARMState *env, int o, - int n, bool a) -{ } -#endif - void aarch64_sync_32_to_64(CPUARMState *env); void aarch64_sync_64_to_32(CPUARMState *env); @@ -1426,6 +1426,19 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */ #define SCTLR_TIDCP (1ULL << 63) /* FEAT_TIDCP1 */ +#define SCTLR2_EMEC (1ULL << 1) /* FEAT_MEC */ +#define SCTLR2_NMEA (1ULL << 2) /* FEAT_DoubleFault2 */ +#define SCTLR2_ENADERR (1ULL << 3) /* FEAT_ADERR */ +#define SCTLR2_ENANERR (1ULL << 4) /* FEAT_ANERR */ +#define SCTLR2_EASE (1ULL << 5) /* FEAT_DoubleFault2 */ +#define SCTLR2_ENIDCP128 (1ULL << 6) /* FEAT_SYSREG128 */ +#define SCTLR2_ENPACM (1ULL << 7) /* FEAT_PAuth_LR */ +#define SCTLR2_ENPACM0 (1ULL << 8) /* FEAT_PAuth_LR */ +#define SCTLR2_CPTA (1ULL << 9) /* FEAT_CPA2 */ +#define SCTLR2_CPTA0 (1ULL << 10) /* FEAT_CPA2 */ +#define SCTLR2_CPTM (1ULL << 11) /* FEAT_CPA2 */ +#define SCTLR2_CPTM0 (1ULL << 12) /* FEAT_CAP2 */ + #define CPSR_M (0x1fU) #define CPSR_T (1U << 5) #define CPSR_F (1U << 6) @@ -1498,6 +1511,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) #define PSTATE_N (1U << 31) +#define PSTATE_EXLOCK (1ULL << 34) #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V) #define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F) #define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF | PSTATE_BTYPE) @@ -1516,6 +1530,7 @@ FIELD(SVCR, ZA, 1, 1) /* Fields for SMCR_ELx. */ FIELD(SMCR, LEN, 0, 4) +FIELD(SMCR, EZT0, 30, 1) FIELD(SMCR, FA64, 31, 1) /* Write a new value to v7m.exception, thus transitioning into or out @@ -1533,7 +1548,7 @@ static inline unsigned int aarch64_pstate_mode(unsigned int el, bool handler) * interprocessing, so we don't attempt to sync with the cpsr state used by * the 32 bit decoder. */ -static inline uint32_t pstate_read(CPUARMState *env) +static inline uint64_t pstate_read(CPUARMState *env) { int ZF; @@ -1543,7 +1558,7 @@ static inline uint32_t pstate_read(CPUARMState *env) | env->pstate | env->daif | (env->btype << 10); } -static inline void pstate_write(CPUARMState *env, uint32_t val) +static inline void pstate_write(CPUARMState *env, uint64_t val) { env->ZF = (~val) & PSTATE_Z; env->NF = val; @@ -1715,11 +1730,24 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define SCR_ENAS0 (1ULL << 36) #define SCR_ADEN (1ULL << 37) #define SCR_HXEN (1ULL << 38) +#define SCR_GCSEN (1ULL << 39) #define SCR_TRNDR (1ULL << 40) #define SCR_ENTP2 (1ULL << 41) +#define SCR_TCR2EN (1ULL << 43) +#define SCR_SCTLR2EN (1ULL << 44) +#define SCR_PIEN (1ULL << 45) #define SCR_GPF (1ULL << 48) +#define SCR_MECEN (1ULL << 49) #define SCR_NSE (1ULL << 62) +/* GCSCR_ELx fields */ +#define GCSCR_PCRSEL (1ULL << 0) +#define GCSCR_RVCHKEN (1ULL << 5) +#define GCSCR_EXLOCKEN (1ULL << 6) +#define GCSCR_PUSHMEN (1ULL << 8) +#define GCSCR_STREN (1ULL << 9) +#define GCSCRE0_NTR (1ULL << 10) + /* Return the current FPSCR value. */ uint32_t vfp_get_fpscr(CPUARMState *env); void vfp_set_fpscr(CPUARMState *env, uint32_t val); @@ -1853,16 +1881,6 @@ enum arm_cpu_mode { /* QEMU-internal value meaning "FPSCR, but we care only about NZCV" */ #define QEMU_VFP_FPSCR_NZCV 0xffff -/* iwMMXt coprocessor control registers. */ -#define ARM_IWMMXT_wCID 0 -#define ARM_IWMMXT_wCon 1 -#define ARM_IWMMXT_wCSSF 2 -#define ARM_IWMMXT_wCASF 3 -#define ARM_IWMMXT_wCGR0 8 -#define ARM_IWMMXT_wCGR1 9 -#define ARM_IWMMXT_wCGR2 10 -#define ARM_IWMMXT_wCGR3 11 - /* V7M CCR bits */ FIELD(V7M_CCR, NONBASETHRDENA, 0, 1) FIELD(V7M_CCR, USERSETMPEND, 1, 1) @@ -2001,423 +2019,20 @@ FIELD(V7M_VPR, P0, 0, 16) FIELD(V7M_VPR, MASK01, 16, 4) FIELD(V7M_VPR, MASK23, 20, 4) -/* - * System register ID fields. - */ -FIELD(CLIDR_EL1, CTYPE1, 0, 3) -FIELD(CLIDR_EL1, CTYPE2, 3, 3) -FIELD(CLIDR_EL1, CTYPE3, 6, 3) -FIELD(CLIDR_EL1, CTYPE4, 9, 3) -FIELD(CLIDR_EL1, CTYPE5, 12, 3) -FIELD(CLIDR_EL1, CTYPE6, 15, 3) -FIELD(CLIDR_EL1, CTYPE7, 18, 3) -FIELD(CLIDR_EL1, LOUIS, 21, 3) -FIELD(CLIDR_EL1, LOC, 24, 3) -FIELD(CLIDR_EL1, LOUU, 27, 3) -FIELD(CLIDR_EL1, ICB, 30, 3) - -/* When FEAT_CCIDX is implemented */ -FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3) -FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21) -FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24) - -/* When FEAT_CCIDX is not implemented */ -FIELD(CCSIDR_EL1, LINESIZE, 0, 3) -FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10) -FIELD(CCSIDR_EL1, NUMSETS, 13, 15) - -FIELD(CTR_EL0, IMINLINE, 0, 4) -FIELD(CTR_EL0, L1IP, 14, 2) -FIELD(CTR_EL0, DMINLINE, 16, 4) -FIELD(CTR_EL0, ERG, 20, 4) -FIELD(CTR_EL0, CWG, 24, 4) -FIELD(CTR_EL0, IDC, 28, 1) -FIELD(CTR_EL0, DIC, 29, 1) -FIELD(CTR_EL0, TMINLINE, 32, 6) - -FIELD(MIDR_EL1, REVISION, 0, 4) -FIELD(MIDR_EL1, PARTNUM, 4, 12) -FIELD(MIDR_EL1, ARCHITECTURE, 16, 4) -FIELD(MIDR_EL1, VARIANT, 20, 4) -FIELD(MIDR_EL1, IMPLEMENTER, 24, 8) - -FIELD(ID_ISAR0, SWAP, 0, 4) -FIELD(ID_ISAR0, BITCOUNT, 4, 4) -FIELD(ID_ISAR0, BITFIELD, 8, 4) -FIELD(ID_ISAR0, CMPBRANCH, 12, 4) -FIELD(ID_ISAR0, COPROC, 16, 4) -FIELD(ID_ISAR0, DEBUG, 20, 4) -FIELD(ID_ISAR0, DIVIDE, 24, 4) - -FIELD(ID_ISAR1, ENDIAN, 0, 4) -FIELD(ID_ISAR1, EXCEPT, 4, 4) -FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) -FIELD(ID_ISAR1, EXTEND, 12, 4) -FIELD(ID_ISAR1, IFTHEN, 16, 4) -FIELD(ID_ISAR1, IMMEDIATE, 20, 4) -FIELD(ID_ISAR1, INTERWORK, 24, 4) -FIELD(ID_ISAR1, JAZELLE, 28, 4) - -FIELD(ID_ISAR2, LOADSTORE, 0, 4) -FIELD(ID_ISAR2, MEMHINT, 4, 4) -FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) -FIELD(ID_ISAR2, MULT, 12, 4) -FIELD(ID_ISAR2, MULTS, 16, 4) -FIELD(ID_ISAR2, MULTU, 20, 4) -FIELD(ID_ISAR2, PSR_AR, 24, 4) -FIELD(ID_ISAR2, REVERSAL, 28, 4) - -FIELD(ID_ISAR3, SATURATE, 0, 4) -FIELD(ID_ISAR3, SIMD, 4, 4) -FIELD(ID_ISAR3, SVC, 8, 4) -FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) -FIELD(ID_ISAR3, TABBRANCH, 16, 4) -FIELD(ID_ISAR3, T32COPY, 20, 4) -FIELD(ID_ISAR3, TRUENOP, 24, 4) -FIELD(ID_ISAR3, T32EE, 28, 4) - -FIELD(ID_ISAR4, UNPRIV, 0, 4) -FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) -FIELD(ID_ISAR4, WRITEBACK, 8, 4) -FIELD(ID_ISAR4, SMC, 12, 4) -FIELD(ID_ISAR4, BARRIER, 16, 4) -FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) -FIELD(ID_ISAR4, PSR_M, 24, 4) -FIELD(ID_ISAR4, SWP_FRAC, 28, 4) - -FIELD(ID_ISAR5, SEVL, 0, 4) -FIELD(ID_ISAR5, AES, 4, 4) -FIELD(ID_ISAR5, SHA1, 8, 4) -FIELD(ID_ISAR5, SHA2, 12, 4) -FIELD(ID_ISAR5, CRC32, 16, 4) -FIELD(ID_ISAR5, RDM, 24, 4) -FIELD(ID_ISAR5, VCMA, 28, 4) - -FIELD(ID_ISAR6, JSCVT, 0, 4) -FIELD(ID_ISAR6, DP, 4, 4) -FIELD(ID_ISAR6, FHM, 8, 4) -FIELD(ID_ISAR6, SB, 12, 4) -FIELD(ID_ISAR6, SPECRES, 16, 4) -FIELD(ID_ISAR6, BF16, 20, 4) -FIELD(ID_ISAR6, I8MM, 24, 4) - -FIELD(ID_MMFR0, VMSA, 0, 4) -FIELD(ID_MMFR0, PMSA, 4, 4) -FIELD(ID_MMFR0, OUTERSHR, 8, 4) -FIELD(ID_MMFR0, SHARELVL, 12, 4) -FIELD(ID_MMFR0, TCM, 16, 4) -FIELD(ID_MMFR0, AUXREG, 20, 4) -FIELD(ID_MMFR0, FCSE, 24, 4) -FIELD(ID_MMFR0, INNERSHR, 28, 4) - -FIELD(ID_MMFR1, L1HVDVA, 0, 4) -FIELD(ID_MMFR1, L1UNIVA, 4, 4) -FIELD(ID_MMFR1, L1HVDSW, 8, 4) -FIELD(ID_MMFR1, L1UNISW, 12, 4) -FIELD(ID_MMFR1, L1HVD, 16, 4) -FIELD(ID_MMFR1, L1UNI, 20, 4) -FIELD(ID_MMFR1, L1TSTCLN, 24, 4) -FIELD(ID_MMFR1, BPRED, 28, 4) - -FIELD(ID_MMFR2, L1HVDFG, 0, 4) -FIELD(ID_MMFR2, L1HVDBG, 4, 4) -FIELD(ID_MMFR2, L1HVDRNG, 8, 4) -FIELD(ID_MMFR2, HVDTLB, 12, 4) -FIELD(ID_MMFR2, UNITLB, 16, 4) -FIELD(ID_MMFR2, MEMBARR, 20, 4) -FIELD(ID_MMFR2, WFISTALL, 24, 4) -FIELD(ID_MMFR2, HWACCFLG, 28, 4) - -FIELD(ID_MMFR3, CMAINTVA, 0, 4) -FIELD(ID_MMFR3, CMAINTSW, 4, 4) -FIELD(ID_MMFR3, BPMAINT, 8, 4) -FIELD(ID_MMFR3, MAINTBCST, 12, 4) -FIELD(ID_MMFR3, PAN, 16, 4) -FIELD(ID_MMFR3, COHWALK, 20, 4) -FIELD(ID_MMFR3, CMEMSZ, 24, 4) -FIELD(ID_MMFR3, SUPERSEC, 28, 4) - -FIELD(ID_MMFR4, SPECSEI, 0, 4) -FIELD(ID_MMFR4, AC2, 4, 4) -FIELD(ID_MMFR4, XNX, 8, 4) -FIELD(ID_MMFR4, CNP, 12, 4) -FIELD(ID_MMFR4, HPDS, 16, 4) -FIELD(ID_MMFR4, LSM, 20, 4) -FIELD(ID_MMFR4, CCIDX, 24, 4) -FIELD(ID_MMFR4, EVT, 28, 4) - -FIELD(ID_MMFR5, ETS, 0, 4) -FIELD(ID_MMFR5, NTLBPA, 4, 4) - -FIELD(ID_PFR0, STATE0, 0, 4) -FIELD(ID_PFR0, STATE1, 4, 4) -FIELD(ID_PFR0, STATE2, 8, 4) -FIELD(ID_PFR0, STATE3, 12, 4) -FIELD(ID_PFR0, CSV2, 16, 4) -FIELD(ID_PFR0, AMU, 20, 4) -FIELD(ID_PFR0, DIT, 24, 4) -FIELD(ID_PFR0, RAS, 28, 4) - -FIELD(ID_PFR1, PROGMOD, 0, 4) -FIELD(ID_PFR1, SECURITY, 4, 4) -FIELD(ID_PFR1, MPROGMOD, 8, 4) -FIELD(ID_PFR1, VIRTUALIZATION, 12, 4) -FIELD(ID_PFR1, GENTIMER, 16, 4) -FIELD(ID_PFR1, SEC_FRAC, 20, 4) -FIELD(ID_PFR1, VIRT_FRAC, 24, 4) -FIELD(ID_PFR1, GIC, 28, 4) - -FIELD(ID_PFR2, CSV3, 0, 4) -FIELD(ID_PFR2, SSBS, 4, 4) -FIELD(ID_PFR2, RAS_FRAC, 8, 4) - -FIELD(ID_AA64ISAR0, AES, 4, 4) -FIELD(ID_AA64ISAR0, SHA1, 8, 4) -FIELD(ID_AA64ISAR0, SHA2, 12, 4) -FIELD(ID_AA64ISAR0, CRC32, 16, 4) -FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) -FIELD(ID_AA64ISAR0, TME, 24, 4) -FIELD(ID_AA64ISAR0, RDM, 28, 4) -FIELD(ID_AA64ISAR0, SHA3, 32, 4) -FIELD(ID_AA64ISAR0, SM3, 36, 4) -FIELD(ID_AA64ISAR0, SM4, 40, 4) -FIELD(ID_AA64ISAR0, DP, 44, 4) -FIELD(ID_AA64ISAR0, FHM, 48, 4) -FIELD(ID_AA64ISAR0, TS, 52, 4) -FIELD(ID_AA64ISAR0, TLB, 56, 4) -FIELD(ID_AA64ISAR0, RNDR, 60, 4) - -FIELD(ID_AA64ISAR1, DPB, 0, 4) -FIELD(ID_AA64ISAR1, APA, 4, 4) -FIELD(ID_AA64ISAR1, API, 8, 4) -FIELD(ID_AA64ISAR1, JSCVT, 12, 4) -FIELD(ID_AA64ISAR1, FCMA, 16, 4) -FIELD(ID_AA64ISAR1, LRCPC, 20, 4) -FIELD(ID_AA64ISAR1, GPA, 24, 4) -FIELD(ID_AA64ISAR1, GPI, 28, 4) -FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) -FIELD(ID_AA64ISAR1, SB, 36, 4) -FIELD(ID_AA64ISAR1, SPECRES, 40, 4) -FIELD(ID_AA64ISAR1, BF16, 44, 4) -FIELD(ID_AA64ISAR1, DGH, 48, 4) -FIELD(ID_AA64ISAR1, I8MM, 52, 4) -FIELD(ID_AA64ISAR1, XS, 56, 4) -FIELD(ID_AA64ISAR1, LS64, 60, 4) - -FIELD(ID_AA64ISAR2, WFXT, 0, 4) -FIELD(ID_AA64ISAR2, RPRES, 4, 4) -FIELD(ID_AA64ISAR2, GPA3, 8, 4) -FIELD(ID_AA64ISAR2, APA3, 12, 4) -FIELD(ID_AA64ISAR2, MOPS, 16, 4) -FIELD(ID_AA64ISAR2, BC, 20, 4) -FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) -FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) -FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) -FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) -FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) -FIELD(ID_AA64ISAR2, RPRFM, 48, 4) -FIELD(ID_AA64ISAR2, CSSC, 52, 4) -FIELD(ID_AA64ISAR2, ATS1A, 60, 4) - -FIELD(ID_AA64PFR0, EL0, 0, 4) -FIELD(ID_AA64PFR0, EL1, 4, 4) -FIELD(ID_AA64PFR0, EL2, 8, 4) -FIELD(ID_AA64PFR0, EL3, 12, 4) -FIELD(ID_AA64PFR0, FP, 16, 4) -FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) -FIELD(ID_AA64PFR0, GIC, 24, 4) -FIELD(ID_AA64PFR0, RAS, 28, 4) -FIELD(ID_AA64PFR0, SVE, 32, 4) -FIELD(ID_AA64PFR0, SEL2, 36, 4) -FIELD(ID_AA64PFR0, MPAM, 40, 4) -FIELD(ID_AA64PFR0, AMU, 44, 4) -FIELD(ID_AA64PFR0, DIT, 48, 4) -FIELD(ID_AA64PFR0, RME, 52, 4) -FIELD(ID_AA64PFR0, CSV2, 56, 4) -FIELD(ID_AA64PFR0, CSV3, 60, 4) - -FIELD(ID_AA64PFR1, BT, 0, 4) -FIELD(ID_AA64PFR1, SSBS, 4, 4) -FIELD(ID_AA64PFR1, MTE, 8, 4) -FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) -FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) -FIELD(ID_AA64PFR1, SME, 24, 4) -FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) -FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) -FIELD(ID_AA64PFR1, NMI, 36, 4) -FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) -FIELD(ID_AA64PFR1, GCS, 44, 4) -FIELD(ID_AA64PFR1, THE, 48, 4) -FIELD(ID_AA64PFR1, MTEX, 52, 4) -FIELD(ID_AA64PFR1, DF2, 56, 4) -FIELD(ID_AA64PFR1, PFAR, 60, 4) - -FIELD(ID_AA64MMFR0, PARANGE, 0, 4) -FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) -FIELD(ID_AA64MMFR0, BIGEND, 8, 4) -FIELD(ID_AA64MMFR0, SNSMEM, 12, 4) -FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4) -FIELD(ID_AA64MMFR0, TGRAN16, 20, 4) -FIELD(ID_AA64MMFR0, TGRAN64, 24, 4) -FIELD(ID_AA64MMFR0, TGRAN4, 28, 4) -FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) -FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) -FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) -FIELD(ID_AA64MMFR0, EXS, 44, 4) -FIELD(ID_AA64MMFR0, FGT, 56, 4) -FIELD(ID_AA64MMFR0, ECV, 60, 4) - -FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) -FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) -FIELD(ID_AA64MMFR1, VH, 8, 4) -FIELD(ID_AA64MMFR1, HPDS, 12, 4) -FIELD(ID_AA64MMFR1, LO, 16, 4) -FIELD(ID_AA64MMFR1, PAN, 20, 4) -FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) -FIELD(ID_AA64MMFR1, XNX, 28, 4) -FIELD(ID_AA64MMFR1, TWED, 32, 4) -FIELD(ID_AA64MMFR1, ETS, 36, 4) -FIELD(ID_AA64MMFR1, HCX, 40, 4) -FIELD(ID_AA64MMFR1, AFP, 44, 4) -FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) -FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) -FIELD(ID_AA64MMFR1, CMOW, 56, 4) -FIELD(ID_AA64MMFR1, ECBHB, 60, 4) - -FIELD(ID_AA64MMFR2, CNP, 0, 4) -FIELD(ID_AA64MMFR2, UAO, 4, 4) -FIELD(ID_AA64MMFR2, LSM, 8, 4) -FIELD(ID_AA64MMFR2, IESB, 12, 4) -FIELD(ID_AA64MMFR2, VARANGE, 16, 4) -FIELD(ID_AA64MMFR2, CCIDX, 20, 4) -FIELD(ID_AA64MMFR2, NV, 24, 4) -FIELD(ID_AA64MMFR2, ST, 28, 4) -FIELD(ID_AA64MMFR2, AT, 32, 4) -FIELD(ID_AA64MMFR2, IDS, 36, 4) -FIELD(ID_AA64MMFR2, FWB, 40, 4) -FIELD(ID_AA64MMFR2, TTL, 48, 4) -FIELD(ID_AA64MMFR2, BBM, 52, 4) -FIELD(ID_AA64MMFR2, EVT, 56, 4) -FIELD(ID_AA64MMFR2, E0PD, 60, 4) - -FIELD(ID_AA64MMFR3, TCRX, 0, 4) -FIELD(ID_AA64MMFR3, SCTLRX, 4, 4) -FIELD(ID_AA64MMFR3, S1PIE, 8, 4) -FIELD(ID_AA64MMFR3, S2PIE, 12, 4) -FIELD(ID_AA64MMFR3, S1POE, 16, 4) -FIELD(ID_AA64MMFR3, S2POE, 20, 4) -FIELD(ID_AA64MMFR3, AIE, 24, 4) -FIELD(ID_AA64MMFR3, MEC, 28, 4) -FIELD(ID_AA64MMFR3, D128, 32, 4) -FIELD(ID_AA64MMFR3, D128_2, 36, 4) -FIELD(ID_AA64MMFR3, SNERR, 40, 4) -FIELD(ID_AA64MMFR3, ANERR, 44, 4) -FIELD(ID_AA64MMFR3, SDERR, 52, 4) -FIELD(ID_AA64MMFR3, ADERR, 56, 4) -FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4) - -FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) -FIELD(ID_AA64DFR0, TRACEVER, 4, 4) -FIELD(ID_AA64DFR0, PMUVER, 8, 4) -FIELD(ID_AA64DFR0, BRPS, 12, 4) -FIELD(ID_AA64DFR0, PMSS, 16, 4) -FIELD(ID_AA64DFR0, WRPS, 20, 4) -FIELD(ID_AA64DFR0, SEBEP, 24, 4) -FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) -FIELD(ID_AA64DFR0, PMSVER, 32, 4) -FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) -FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) -FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) -FIELD(ID_AA64DFR0, MTPMU, 48, 4) -FIELD(ID_AA64DFR0, BRBE, 52, 4) -FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) -FIELD(ID_AA64DFR0, HPMN0, 60, 4) - -FIELD(ID_AA64ZFR0, SVEVER, 0, 4) -FIELD(ID_AA64ZFR0, AES, 4, 4) -FIELD(ID_AA64ZFR0, BITPERM, 16, 4) -FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) -FIELD(ID_AA64ZFR0, B16B16, 24, 4) -FIELD(ID_AA64ZFR0, SHA3, 32, 4) -FIELD(ID_AA64ZFR0, SM4, 40, 4) -FIELD(ID_AA64ZFR0, I8MM, 44, 4) -FIELD(ID_AA64ZFR0, F32MM, 52, 4) -FIELD(ID_AA64ZFR0, F64MM, 56, 4) - -FIELD(ID_AA64SMFR0, F32F32, 32, 1) -FIELD(ID_AA64SMFR0, BI32I32, 33, 1) -FIELD(ID_AA64SMFR0, B16F32, 34, 1) -FIELD(ID_AA64SMFR0, F16F32, 35, 1) -FIELD(ID_AA64SMFR0, I8I32, 36, 4) -FIELD(ID_AA64SMFR0, F16F16, 42, 1) -FIELD(ID_AA64SMFR0, B16B16, 43, 1) -FIELD(ID_AA64SMFR0, I16I32, 44, 4) -FIELD(ID_AA64SMFR0, F64F64, 48, 1) -FIELD(ID_AA64SMFR0, I16I64, 52, 4) -FIELD(ID_AA64SMFR0, SMEVER, 56, 4) -FIELD(ID_AA64SMFR0, FA64, 63, 1) - -FIELD(ID_DFR0, COPDBG, 0, 4) -FIELD(ID_DFR0, COPSDBG, 4, 4) -FIELD(ID_DFR0, MMAPDBG, 8, 4) -FIELD(ID_DFR0, COPTRC, 12, 4) -FIELD(ID_DFR0, MMAPTRC, 16, 4) -FIELD(ID_DFR0, MPROFDBG, 20, 4) -FIELD(ID_DFR0, PERFMON, 24, 4) -FIELD(ID_DFR0, TRACEFILT, 28, 4) - -FIELD(ID_DFR1, MTPMU, 0, 4) -FIELD(ID_DFR1, HPMN0, 4, 4) - -FIELD(DBGDIDR, SE_IMP, 12, 1) -FIELD(DBGDIDR, NSUHD_IMP, 14, 1) -FIELD(DBGDIDR, VERSION, 16, 4) -FIELD(DBGDIDR, CTX_CMPS, 20, 4) -FIELD(DBGDIDR, BRPS, 24, 4) -FIELD(DBGDIDR, WRPS, 28, 4) - -FIELD(DBGDEVID, PCSAMPLE, 0, 4) -FIELD(DBGDEVID, WPADDRMASK, 4, 4) -FIELD(DBGDEVID, BPADDRMASK, 8, 4) -FIELD(DBGDEVID, VECTORCATCH, 12, 4) -FIELD(DBGDEVID, VIRTEXTNS, 16, 4) -FIELD(DBGDEVID, DOUBLELOCK, 20, 4) -FIELD(DBGDEVID, AUXREGS, 24, 4) -FIELD(DBGDEVID, CIDMASK, 28, 4) - -FIELD(DBGDEVID1, PCSROFFSET, 0, 4) - -FIELD(MVFR0, SIMDREG, 0, 4) -FIELD(MVFR0, FPSP, 4, 4) -FIELD(MVFR0, FPDP, 8, 4) -FIELD(MVFR0, FPTRAP, 12, 4) -FIELD(MVFR0, FPDIVIDE, 16, 4) -FIELD(MVFR0, FPSQRT, 20, 4) -FIELD(MVFR0, FPSHVEC, 24, 4) -FIELD(MVFR0, FPROUND, 28, 4) - -FIELD(MVFR1, FPFTZ, 0, 4) -FIELD(MVFR1, FPDNAN, 4, 4) -FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */ -FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */ -FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */ -FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */ -FIELD(MVFR1, MVE, 8, 4) /* M-profile only */ -FIELD(MVFR1, FP16, 20, 4) /* M-profile only */ -FIELD(MVFR1, FPHP, 24, 4) -FIELD(MVFR1, SIMDFMAC, 28, 4) - -FIELD(MVFR2, SIMDMISC, 0, 4) -FIELD(MVFR2, FPMISC, 4, 4) - FIELD(GPCCR, PPS, 0, 3) +FIELD(GPCCR, RLPAD, 5, 1) +FIELD(GPCCR, NSPAD, 6, 1) +FIELD(GPCCR, SPAD, 7, 1) FIELD(GPCCR, IRGN, 8, 2) FIELD(GPCCR, ORGN, 10, 2) FIELD(GPCCR, SH, 12, 2) FIELD(GPCCR, PGS, 14, 2) FIELD(GPCCR, GPC, 16, 1) FIELD(GPCCR, GPCP, 17, 1) +FIELD(GPCCR, TBGPCD, 18, 1) +FIELD(GPCCR, NSO, 19, 1) FIELD(GPCCR, L0GPTSZ, 20, 4) +FIELD(GPCCR, APPSAA, 24, 1) FIELD(MFAR, FPA, 12, 40) FIELD(MFAR, NSE, 62, 1) @@ -2431,8 +2046,6 @@ QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); */ enum arm_features { ARM_FEATURE_AUXCR, /* ARM1026 Auxiliary control register. */ - ARM_FEATURE_XSCALE, /* Intel XScale extensions. */ - ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension. */ ARM_FEATURE_V6, ARM_FEATURE_V6K, ARM_FEATURE_V7, @@ -2633,6 +2246,7 @@ static inline bool arm_is_el2_enabled(CPUARMState *env) */ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space); uint64_t arm_hcr_el2_eff(CPUARMState *env); +uint64_t arm_hcr_el2_nvx_eff(CPUARMState *env); uint64_t arm_hcrx_el2_eff(CPUARMState *env); /* @@ -2712,212 +2326,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU -/* ARM has the following "translation regimes" (as the ARM ARM calls them): - * - * If EL3 is 64-bit: - * + NonSecure EL1 & 0 stage 1 - * + NonSecure EL1 & 0 stage 2 - * + NonSecure EL2 - * + NonSecure EL2 & 0 (ARMv8.1-VHE) - * + Secure EL1 & 0 stage 1 - * + Secure EL1 & 0 stage 2 (FEAT_SEL2) - * + Secure EL2 (FEAT_SEL2) - * + Secure EL2 & 0 (FEAT_SEL2) - * + Realm EL1 & 0 stage 1 (FEAT_RME) - * + Realm EL1 & 0 stage 2 (FEAT_RME) - * + Realm EL2 (FEAT_RME) - * + EL3 - * If EL3 is 32-bit: - * + NonSecure PL1 & 0 stage 1 - * + NonSecure PL1 & 0 stage 2 - * + NonSecure PL2 - * + Secure PL1 & 0 - * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) - * - * For QEMU, an mmu_idx is not quite the same as a translation regime because: - * 1. we need to split the "EL1 & 0" and "EL2 & 0" regimes into two mmu_idxes, - * because they may differ in access permissions even if the VA->PA map is - * the same - * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 - * translation, which means that we have one mmu_idx that deals with two - * concatenated translation regimes [this sort of combined s1+2 TLB is - * architecturally permitted] - * 3. we don't need to allocate an mmu_idx to translations that we won't be - * handling via the TLB. The only way to do a stage 1 translation without - * the immediate stage 2 translation is via the ATS or AT system insns, - * which can be slow-pathed and always do a page table walk. - * The only use of stage 2 translations is either as part of an s1+2 - * lookup or when loading the descriptors during a stage 1 page table walk, - * and in both those cases we don't use the TLB. - * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" - * translation regimes, because they map reasonably well to each other - * and they can't both be active at the same time. - * 5. we want to be able to use the TLB for accesses done as part of a - * stage1 page table walk, rather than having to walk the stage2 page - * table over and over. - * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access - * Never (PAN) bit within PSTATE. - * 7. we fold together most secure and non-secure regimes for A-profile, - * because there are no banked system registers for aarch64, so the - * process of switching between secure and non-secure is - * already heavyweight. - * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, - * because both are in use simultaneously for Secure EL2. - * - * This gives us the following list of cases: - * - * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) - * EL0 EL2&0 - * EL2 EL2&0 - * EL2 EL2&0 +PAN - * EL2 (aka NS PL2) - * EL3 (aka AArch32 S PL1 PL1&0) - * AArch32 S PL0 PL1&0 (we call this EL30_0) - * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) - * Stage2 Secure - * Stage2 NonSecure - * plus one TLB per Physical address space: S, NS, Realm, Root - * - * for a total of 16 different mmu_idx. - * - * R profile CPUs have an MPU, but can use the same set of MMU indexes - * as A profile. They only need to distinguish EL0 and EL1 (and - * EL2 for cores like the Cortex-R52). - * - * M profile CPUs are rather different as they do not have a true MMU. - * They have the following different MMU indexes: - * User - * Privileged - * User, execution priority negative (ie the MPU HFNMIENA bit may apply) - * Privileged, execution priority negative (ditto) - * If the CPU supports the v8M Security Extension then there are also: - * Secure User - * Secure Privileged - * Secure User, execution priority negative - * Secure Privileged, execution priority negative - * - * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code - * are not quite the same -- different CPU types (most notably M profile - * vs A/R profile) would like to use MMU indexes with different semantics, - * but since we don't ever need to use all of those in a single CPU we - * can avoid having to set NB_MMU_MODES to "total number of A profile MMU - * modes + total number of M profile MMU modes". The lower bits of - * ARMMMUIdx are the core TLB mmu index, and the higher bits are always - * the same for any particular CPU. - * Variables of type ARMMUIdx are always full values, and the core - * index values are in variables of type 'int'. - * - * Our enumeration includes at the end some entries which are not "true" - * mmu_idx values in that they don't have corresponding TLBs and are only - * valid for doing slow path page table walks. - * - * The constant names here are patterned after the general style of the names - * of the AT/ATS operations. - * The values used are carefully arranged to make mmu_idx => EL lookup easy. - * For M profile we arrange them to have a bit for priv, a bit for negpri - * and a bit for secure. - */ -#define ARM_MMU_IDX_A 0x10 /* A profile */ -#define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */ -#define ARM_MMU_IDX_M 0x40 /* M profile */ - -/* Meanings of the bits for M profile mmu idx values */ -#define ARM_MMU_IDX_M_PRIV 0x1 -#define ARM_MMU_IDX_M_NEGPRI 0x2 -#define ARM_MMU_IDX_M_S 0x4 /* Secure */ - -#define ARM_MMU_IDX_TYPE_MASK \ - (ARM_MMU_IDX_A | ARM_MMU_IDX_M | ARM_MMU_IDX_NOTLB) -#define ARM_MMU_IDX_COREIDX_MASK 0xf - -typedef enum ARMMMUIdx { - /* - * A-profile. - */ - ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_0 = 1 | ARM_MMU_IDX_A, - ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_2 = 3 | ARM_MMU_IDX_A, - ARMMMUIdx_E10_1_PAN = 4 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, - ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, - ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, - ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A, - ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A, - - /* - * Used for second stage of an S12 page table walk, or for descriptor - * loads during first stage of an S1 page table walk. Note that both - * are in use simultaneously for SecureEL2: the security state for - * the S2 ptw is selected by the NS bit from the S1 ptw. - */ - ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, - - /* TLBs with 1-1 mapping to the physical address spaces. */ - ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A, - - /* - * These are not allocated TLBs and are used only for AT system - * instructions or for the first stage of an S12 page table walk. - */ - ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, - - /* - * M-profile. - */ - ARMMMUIdx_MUser = ARM_MMU_IDX_M, - ARMMMUIdx_MPriv = ARM_MMU_IDX_M | ARM_MMU_IDX_M_PRIV, - ARMMMUIdx_MUserNegPri = ARMMMUIdx_MUser | ARM_MMU_IDX_M_NEGPRI, - ARMMMUIdx_MPrivNegPri = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_NEGPRI, - ARMMMUIdx_MSUser = ARMMMUIdx_MUser | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSPriv = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSUserNegPri = ARMMMUIdx_MUserNegPri | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSPrivNegPri = ARMMMUIdx_MPrivNegPri | ARM_MMU_IDX_M_S, -} ARMMMUIdx; - -/* - * Bit macros for the core-mmu-index values for each index, - * for use when calling tlb_flush_by_mmuidx() and friends. - */ -#define TO_CORE_BIT(NAME) \ - ARMMMUIdxBit_##NAME = 1 << (ARMMMUIdx_##NAME & ARM_MMU_IDX_COREIDX_MASK) - -typedef enum ARMMMUIdxBit { - TO_CORE_BIT(E10_0), - TO_CORE_BIT(E20_0), - TO_CORE_BIT(E10_1), - TO_CORE_BIT(E10_1_PAN), - TO_CORE_BIT(E2), - TO_CORE_BIT(E20_2), - TO_CORE_BIT(E20_2_PAN), - TO_CORE_BIT(E3), - TO_CORE_BIT(E30_0), - TO_CORE_BIT(E30_3_PAN), - TO_CORE_BIT(Stage2), - TO_CORE_BIT(Stage2_S), - - TO_CORE_BIT(MUser), - TO_CORE_BIT(MPriv), - TO_CORE_BIT(MUserNegPri), - TO_CORE_BIT(MPrivNegPri), - TO_CORE_BIT(MSUser), - TO_CORE_BIT(MSPriv), - TO_CORE_BIT(MSUserNegPri), - TO_CORE_BIT(MSPrivNegPri), -} ARMMMUIdxBit; - -#undef TO_CORE_BIT - -#define MMU_USER_IDX 0 - /* Indexes used when registering address spaces with cpu_address_space_init */ typedef enum ARMASIdx { ARMASIdx_NS = 0, @@ -2948,7 +2356,7 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu) /* If all the CLIDR.Ctypem bits are 0 there are no caches, and * CSSELR is RAZ/WI. */ - return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0; + return (GET_IDREG(&cpu->isar, CLIDR) & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0; } static inline bool arm_sctlr_b(CPUARMState *env) @@ -2966,19 +2374,15 @@ static inline bool arm_sctlr_b(CPUARMState *env) uint64_t arm_sctlr(CPUARMState *env, int el); -#include "exec/cpu-all.h" - /* * We have more than 32-bits worth of state per TB, so we split the data * between tb->flags and tb->cs_base, which is otherwise unused for ARM. * We collect these two parts in CPUARMTBFlags where they are named * flags and flags2 respectively. * - * The flags that are shared between all execution modes, TBFLAG_ANY, - * are stored in flags. The flags that are specific to a given mode - * are stores in flags2. Since cs_base is sized on the configured - * address size, flags2 always has 64-bits for A64, and a minimum of - * 32-bits for A32 and M32. + * The flags that are shared between all execution modes, TBFLAG_ANY, are stored + * in flags. The flags that are specific to a given mode are stored in flags2. + * flags2 always has 64-bits, even though only 32-bits are used for A32 and M32. * * The bits for 32-bit A-profile and M-profile partially overlap: * @@ -3016,13 +2420,6 @@ FIELD(TBFLAG_AM32, THUMB, 23, 1) /* Not cached. */ */ FIELD(TBFLAG_A32, VECLEN, 0, 3) /* Not cached. */ FIELD(TBFLAG_A32, VECSTRIDE, 3, 2) /* Not cached. */ -/* - * We store the bottom two bits of the CPAR as TB flags and handle - * checks on the other bits at runtime. This shares the same bits as - * VECSTRIDE, which is OK as no XScale CPU has VFP. - * Not cached, because VECLEN+VECSTRIDE are not cached. - */ -FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2) FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */ FIELD(TBFLAG_A32, SCTLR__B, 8, 1) /* Cannot overlap with SCTLR_B */ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) @@ -3084,12 +2481,15 @@ FIELD(TBFLAG_A64, ATA0, 31, 1) FIELD(TBFLAG_A64, NV, 32, 1) FIELD(TBFLAG_A64, NV1, 33, 1) FIELD(TBFLAG_A64, NV2, 34, 1) -/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */ -FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) +FIELD(TBFLAG_A64, E2H, 35, 1) /* Set if FEAT_NV2 RAM accesses are big-endian */ FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ +FIELD(TBFLAG_A64, ZT0EXC_EL, 39, 2) +FIELD(TBFLAG_A64, GCS_EN, 41, 1) +FIELD(TBFLAG_A64, GCS_RVCEN, 42, 1) +FIELD(TBFLAG_A64, GCSSTR_EL, 43, 2) /* * Helpers for using the above. Note that only the A64 accessors use @@ -3151,9 +2551,6 @@ static inline bool bswap_code(bool sctlr_b) #endif } -void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags); - enum { QEMU_PSCI_CONDUIT_DISABLED = 0, QEMU_PSCI_CONDUIT_SMC = 1, @@ -3245,41 +2642,9 @@ extern const uint64_t pred_esz_masks[5]; */ #define PAGE_BTI PAGE_TARGET_1 #define PAGE_MTE PAGE_TARGET_2 -#define PAGE_TARGET_STICKY PAGE_MTE /* We associate one allocation tag per 16 bytes, the minimum. */ #define LOG2_TAG_GRANULE 4 #define TAG_GRANULE (1 << LOG2_TAG_GRANULE) -#ifdef CONFIG_USER_ONLY - -#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1)) - -#ifdef TARGET_TAGGED_ADDRESSES -/** - * cpu_untagged_addr: - * @cs: CPU context - * @x: tagged address - * - * Remove any address tag from @x. This is explicitly related to the - * linux syscall TIF_TAGGED_ADDR setting, not TBI in general. - * - * There should be a better place to put this, but we need this in - * include/exec/cpu_ldst.h, and not some place linux-user specific. - */ -static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x) -{ - CPUARMState *env = cpu_env(cs); - if (env->tagged_addr_enable) { - /* - * TBI is enabled for userspace but not kernelspace addresses. - * Only clear the tag if bit 55 is clear. - */ - x &= sextract64(x, 0, 56); - } - return x; -} -#endif /* TARGET_TAGGED_ADDRESSES */ -#endif /* CONFIG_USER_ONLY */ - #endif diff --git a/target/arm/cpu32-stubs.c b/target/arm/cpu32-stubs.c new file mode 100644 index 0000000..81be44d --- /dev/null +++ b/target/arm/cpu32-stubs.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "target/arm/cpu.h" +#include "target/arm/internals.h" +#include <glib.h> + +void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 8188ede..26cf7e6 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -36,6 +36,28 @@ #include "cpu-features.h" #include "cpregs.h" +/* convert between <register>_IDX and SYS_<register> */ +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + [NAME##_IDX] = SYS_##NAME, + +const uint32_t id_register_sysreg[NUM_ID_IDX] = { +#include "cpu-sysregs.h.inc" +}; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + case SYS_##NAME: return NAME##_IDX; + +int get_sysreg_idx(ARMSysRegs sysreg) +{ + switch (sysreg) { +#include "cpu-sysregs.h.inc" + } + g_assert_not_reached(); +} + +#undef DEF + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { /* @@ -114,7 +136,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * SVE is disabled and so are all vector lengths. Good. * Disable all SVE extensions as well. */ - cpu->isar.id_aa64zfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64ZFR0, 0); return; } @@ -237,6 +259,13 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) /* From now on sve_max_vq is the actual maximum supported length. */ cpu->sve_max_vq = max_vq; cpu->sve_vq.map = vq_map; + + /* FEAT_F64MM requires the existence of a 256-bit vector size. */ + if (max_vq < 2) { + uint64_t t = GET_IDREG(&cpu->isar, ID_AA64ZFR0); + t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 0); + SET_IDREG(&cpu->isar, ID_AA64ZFR0, t); + } } /* @@ -288,16 +317,13 @@ static bool cpu_arm_get_sve(Object *obj, Error **errp) static void cpu_arm_set_sve(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; if (value && kvm_enabled() && !kvm_arm_sve_supported()) { error_setg(errp, "'sve' feature not supported by KVM on this host"); return; } - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, value); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, SVE, value); } void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) @@ -309,7 +335,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) if (vq_map == 0) { if (!cpu_isar_feature(aa64_sme, cpu)) { - cpu->isar.id_aa64smfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64SMFR0, 0); return; } @@ -337,6 +363,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) } cpu->sme_vq.map = vq_map; + cpu->sme_max_vq = 32 - clz32(vq_map); } static bool cpu_arm_get_sme(Object *obj, Error **errp) @@ -348,11 +375,8 @@ static bool cpu_arm_get_sme(Object *obj, Error **errp) static void cpu_arm_set_sme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr1; - t = FIELD_DP64(t, ID_AA64PFR1, SME, value); - cpu->isar.id_aa64pfr1 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR1, SME, value); } static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) @@ -365,11 +389,8 @@ static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64smfr0; - t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value); - cpu->isar.id_aa64smfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64SMFR0, FA64, value); } #ifdef CONFIG_USER_ONLY @@ -480,6 +501,7 @@ void aarch64_add_sme_properties(Object *obj) void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu); + ARMISARegisters *isar = &cpu->isar; uint64_t isar1, isar2; /* @@ -490,13 +512,13 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) * * Begin by disabling all fields. */ - isar1 = cpu->isar.id_aa64isar1; + isar1 = GET_IDREG(isar, ID_AA64ISAR1); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0); - isar2 = cpu->isar.id_aa64isar2; + isar2 = GET_IDREG(isar, ID_AA64ISAR2); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0); @@ -558,8 +580,8 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) } } - cpu->isar.id_aa64isar1 = isar1; - cpu->isar.id_aa64isar2 = isar2; + SET_IDREG(isar, ID_AA64ISAR1, isar1); + SET_IDREG(isar, ID_AA64ISAR2, isar2); } static const Property arm_cpu_pauth_property = @@ -606,17 +628,18 @@ void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) return; } - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(&cpu->isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(&cpu->isar, ID_AA64MMFR0, t); } static void aarch64_a57_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a57"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -637,30 +660,30 @@ static void aarch64_a57_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41013000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 48KB L1 icache */ @@ -678,6 +701,7 @@ static void aarch64_a57_initfn(Object *obj) static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a53"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -698,30 +722,30 @@ static void aarch64_a53_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001122); /* 40 bit physical addr */ cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x00110f13; cpu->isar.dbgdevid1 = 0x1; cpu->isar.reset_pmcr_el0 = 0x41033000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 32KB L1 icache */ @@ -781,104 +805,12 @@ static const ARMCPUInfo aarch64_cpus[] = { #endif }; -static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - - return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); -} - -static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - - /* At this time, this property is only allowed if KVM is enabled. This - * restriction allows us to avoid fixing up functionality that assumes a - * uniform execution state like do_interrupt. - */ - if (value == false) { - if (!kvm_enabled() || !kvm_arm_aarch32_supported()) { - error_setg(errp, "'aarch64' feature cannot be disabled " - "unless KVM is enabled and 32-bit EL1 " - "is supported"); - return; - } - unset_feature(&cpu->env, ARM_FEATURE_AARCH64); - } else { - set_feature(&cpu->env, ARM_FEATURE_AARCH64); - } -} - -static void aarch64_cpu_finalizefn(Object *obj) -{ -} - -static const gchar *aarch64_gdb_arch_name(CPUState *cs) -{ - return "aarch64"; -} - -static void aarch64_cpu_class_init(ObjectClass *oc, void *data) -{ - CPUClass *cc = CPU_CLASS(oc); - - cc->gdb_read_register = aarch64_cpu_gdb_read_register; - cc->gdb_write_register = aarch64_cpu_gdb_write_register; - cc->gdb_core_xml_file = "aarch64-core.xml"; - cc->gdb_arch_name = aarch64_gdb_arch_name; - - object_class_property_add_bool(oc, "aarch64", aarch64_cpu_get_aarch64, - aarch64_cpu_set_aarch64); - object_class_property_set_description(oc, "aarch64", - "Set on/off to enable/disable aarch64 " - "execution state "); -} - -static void aarch64_cpu_instance_init(Object *obj) -{ - ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); - - acc->info->initfn(obj); - arm_cpu_post_init(obj); -} - -static void cpu_register_class_init(ObjectClass *oc, void *data) -{ - ARMCPUClass *acc = ARM_CPU_CLASS(oc); - - acc->info = data; -} - -void aarch64_cpu_register(const ARMCPUInfo *info) -{ - TypeInfo type_info = { - .parent = TYPE_AARCH64_CPU, - .instance_init = aarch64_cpu_instance_init, - .class_init = info->class_init ?: cpu_register_class_init, - .class_data = (void *)info, - }; - - type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); - type_register_static(&type_info); - g_free((void *)type_info.name); -} - -static const TypeInfo aarch64_cpu_type_info = { - .name = TYPE_AARCH64_CPU, - .parent = TYPE_ARM_CPU, - .instance_finalize = aarch64_cpu_finalizefn, - .abstract = true, - .class_init = aarch64_cpu_class_init, -}; - static void aarch64_cpu_register_types(void) { size_t i; - type_register_static(&aarch64_cpu_type_info); - for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { - aarch64_cpu_register(&aarch64_cpus[i]); + arm_cpu_register(&aarch64_cpus[i]); } } diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index a9a619b..579516e 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -11,10 +11,12 @@ #include "internals.h" #include "cpu-features.h" #include "cpregs.h" -#include "exec/exec-all.h" -#include "exec/helper-proto.h" +#include "exec/watchpoint.h" #include "system/tcg.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + #ifdef CONFIG_TCG /* Return the Exception Level targeted by debug exceptions. */ static int arm_debug_target_el(CPUARMState *env) @@ -378,7 +380,7 @@ bool arm_debug_check_breakpoint(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - target_ulong pc; + vaddr pc; int n; /* @@ -938,6 +940,13 @@ static void dbgclaimclr_write(CPUARMState *env, const ARMCPRegInfo *ri, env->cp15.dbgclaim &= ~(value & 0xFF); } +static CPAccessResult access_bogus(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Always UNDEF, as if this cpreg didn't exist */ + return CP_ACCESS_UNDEFINED; +} + static const ARMCPRegInfo debug_cp_reginfo[] = { /* * DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped @@ -986,11 +995,42 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */ - { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, + /* Architecturally DBGDTRTX is named DBGDTRRX when used for reads */ + { .name = "DBGDTRTX_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, .access = PL0_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDTRTX", .state = ARM_CP_STATE_AA32, .cp = 14, + .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* This is AArch64-only and is a combination of DBGDTRTX and DBGDTRRX */ + { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * This is not a real AArch32 register. We used to incorrectly expose + * this due to a QEMU bug; to avoid breaking migration compatibility we + * need to continue to provide it so that we don't fail the inbound + * migration when it tells us about a sysreg that we don't have. + * We set an always-fails .accessfn, which means that the guest doesn't + * actually see this register (it will always UNDEF, identically to if + * there were no cpreg definition for it other than that we won't print + * a LOG_UNIMP message about it), and we set the ARM_CP_NO_GDB flag so the + * gdbstub won't see it either. + * (We can't just set .access = 0, because add_cpreg_to_hashtable() + * helpfully ignores cpregs which aren't accessible to the highest + * implemented EL.) + * + * TODO: implement a system for being able to describe "this register + * can be ignored if it appears in the inbound stream"; then we can + * remove this temporary hack. + */ + { .name = "BOGUS_DBGDTR_EL0", .state = ARM_CP_STATE_AA32, + .cp = 14, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_bogus, + .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 }, /* * OSECCR_EL1 provides a mechanism for an operating system * to access the contents of EDECCR. EDECCR is not implemented though, diff --git a/target/arm/el2-stubs.c b/target/arm/el2-stubs.c new file mode 100644 index 0000000..972023c --- /dev/null +++ b/target/arm/el2-stubs.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* QEMU ARM CPU - user-mode emulation stubs for EL2 interrupts + * + * These should not really be needed, but CP registers for EL2 + * are not elided by user-mode emulation and they call these + * functions. Leave them as stubs until it's cleaned up. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" + +void arm_cpu_update_virq(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vfiq(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vinmi(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vfnmi(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vserr(ARMCPU *cpu) +{ + g_assert_not_reached(); +} diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index 30068c2..8d2229f 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -44,6 +44,12 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; +#ifdef TARGET_AARCH64 + if (arm_gdbstub_is_aarch64(cpu)) { + return aarch64_cpu_gdb_read_register(cs, mem_buf, n); + } +#endif + if (n < 16) { /* Core integer register. */ return gdb_get_reg32(mem_buf, env->regs[n]); @@ -66,6 +72,12 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) CPUARMState *env = &cpu->env; uint32_t tmp; +#ifdef TARGET_AARCH64 + if (arm_gdbstub_is_aarch64(cpu)) { + return aarch64_cpu_gdb_write_register(cs, mem_buf, n); + } +#endif + tmp = ldl_p(mem_buf); /* @@ -235,10 +247,20 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg) key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg]; ri = get_arm_cp_reginfo(cpu->cp_regs, key); if (ri) { - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: + if (ri->vhe_redir_to_el2 && + (arm_hcr_el2_eff(env) & HCR_E2H) && + arm_current_el(env) == 2) { + ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2); + } else if (ri->vhe_redir_to_el01) { + ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01); + } return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri)); - } else { + case MO_32: return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri)); + default: + g_assert_not_reached(); } } return 0; @@ -515,7 +537,8 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) * registers so we don't need to include both. */ #ifdef TARGET_AARCH64 - if (isar_feature_aa64_sve(&cpu->isar)) { + if (isar_feature_aa64_sve(&cpu->isar) || + isar_feature_aa64_sme(&cpu->isar)) { GDBFeature *feature = arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs); gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg, aarch64_gdb_set_sve_reg, feature, 0); @@ -525,6 +548,13 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) gdb_find_static_feature("aarch64-fpu.xml"), 0); } + + if (isar_feature_aa64_sme(&cpu->isar)) { + GDBFeature *sme_feature = + arm_gen_dynamic_smereg_feature(cs, cs->gdb_num_regs); + gdb_register_coprocessor(cs, aarch64_gdb_get_sme_reg, + aarch64_gdb_set_sme_reg, sme_feature, 0); + } /* * Note that we report pauth information via the feature name * org.gnu.gdb.aarch64.pauth_v2, not org.gnu.gdb.aarch64.pauth. diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c index 1a4dbec..65d6bbe 100644 --- a/target/arm/gdbstub64.c +++ b/target/arm/gdbstub64.c @@ -27,6 +27,10 @@ #include <sys/prctl.h> #include "mte_user_helper.h" #endif +#ifdef CONFIG_TCG +#include "accel/tcg/cpu-mmu-index.h" +#include "exec/target_page.h" +#endif int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { @@ -43,6 +47,7 @@ int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) case 32: return gdb_get_reg64(mem_buf, env->pc); case 33: + /* pstate is now a 64-bit value; can we simply adjust the xml? */ return gdb_get_reg32(mem_buf, pstate_read(env)); } /* Unknown register. */ @@ -71,6 +76,7 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 8; case 33: /* CPSR */ + /* pstate is now a 64-bit value; can we simply adjust the xml? */ pstate_write(env, tmp); return 4; } @@ -111,8 +117,22 @@ int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg) /* 128 bit FP register */ { uint64_t *q = aa64_vfp_qreg(env, reg); - q[0] = ldq_le_p(buf); - q[1] = ldq_le_p(buf + 8); + + /* + * On the wire these are target-endian 128 bit values. + * In the CPU state these are host-order uint64_t values + * with the least-significant one first. This means they're + * the other way around for target_big_endian() (which is + * only true for us for aarch64_be-linux-user). + */ + if (target_big_endian()) { + q[1] = ldq_p(buf); + q[0] = ldq_p(buf + 8); + } else{ + q[0] = ldq_p(buf); + q[1] = ldq_p(buf + 8); + } + return 16; } case 32: @@ -188,10 +208,17 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) case 0 ... 31: { int vq, len = 0; - uint64_t *p = (uint64_t *) buf; for (vq = 0; vq < cpu->sve_max_vq; vq++) { - env->vfp.zregs[reg].d[vq * 2 + 1] = *p++; - env->vfp.zregs[reg].d[vq * 2] = *p++; + if (target_big_endian()) { + env->vfp.zregs[reg].d[vq * 2 + 1] = ldq_p(buf); + buf += 8; + env->vfp.zregs[reg].d[vq * 2] = ldq_p(buf); + } else{ + env->vfp.zregs[reg].d[vq * 2] = ldq_p(buf); + buf += 8; + env->vfp.zregs[reg].d[vq * 2 + 1] = ldq_p(buf); + } + buf += 8; len += 16; } return len; @@ -206,9 +233,9 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) { int preg = reg - 34; int vq, len = 0; - uint64_t *p = (uint64_t *) buf; for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { - env->vfp.pregs[preg].p[vq / 4] = *p++; + env->vfp.pregs[preg].p[vq / 4] = ldq_p(buf); + buf += 8; len += 8; } return len; @@ -224,6 +251,90 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) return 0; } +int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: /* svg register */ + { + int vq = 0; + if (FIELD_EX64(env->svcr, SVCR, SM)) { + vq = sve_vqm1_for_el_sm(env, arm_current_el(env), + FIELD_EX64(env->svcr, SVCR, SM)) + 1; + } + /* svg = vector granules (2 * vector quardwords) in streaming mode */ + return gdb_get_reg64(buf, vq * 2); + } + case 1: /* svcr register */ + return gdb_get_reg64(buf, env->svcr); + case 2: /* za register */ + { + int len = 0; + int vq = cpu->sme_max_vq; + int svl = vq * 16; + for (int i = 0; i < svl; i++) { + for (int q = 0; q < vq; q++) { + len += gdb_get_reg128(buf, + env->za_state.za[i].d[q * 2 + 1], + env->za_state.za[i].d[q * 2]); + } + } + return len; + } + default: + /* gdbstub asked for something out of range */ + qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg); + break; + } + + return 0; +} + +int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: /* svg register */ + /* cannot set svg via gdbstub */ + return 8; + case 1: /* svcr register */ + aarch64_set_svcr(env, ldq_le_p(buf), + R_SVCR_SM_MASK | R_SVCR_ZA_MASK); + return 8; + case 2: /* za register */ + { + int len = 0; + int vq = cpu->sme_max_vq; + int svl = vq * 16; + for (int i = 0; i < svl; i++) { + for (int q = 0; q < vq; q++) { + if (target_big_endian()) { + env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf); + buf += 8; + env->za_state.za[i].d[q * 2] = ldq_p(buf); + } else{ + env->za_state.za[i].d[q * 2] = ldq_p(buf); + buf += 8; + env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf); + } + buf += 8; + len += 16; + } + } + return len; + } + default: + /* gdbstub asked for something out of range */ + break; + } + + return 0; +} + int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg) { ARMCPU *cpu = ARM_CPU(cs); @@ -388,6 +499,41 @@ GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg) return &cpu->dyn_svereg_feature.desc; } +GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cs, int base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + int vq = cpu->sme_max_vq; + int svl = vq * 16; + GDBFeatureBuilder builder; + int reg = 0; + + gdb_feature_builder_init(&builder, &cpu->dyn_smereg_feature.desc, + "org.gnu.gdb.aarch64.sme", "sme-registers.xml", + base_reg); + + + /* Create the sme_bv vector type. */ + gdb_feature_builder_append_tag( + &builder, "<vector id=\"sme_bv\" type=\"uint8\" count=\"%d\"/>", + svl); + + /* Create the sme_bvv vector type. */ + gdb_feature_builder_append_tag( + &builder, "<vector id=\"sme_bvv\" type=\"sme_bv\" count=\"%d\"/>", + svl); + + /* Define the svg, svcr, and za registers. */ + + gdb_feature_builder_append_reg(&builder, "svg", 64, reg++, "int", NULL); + gdb_feature_builder_append_reg(&builder, "svcr", 64, reg++, "int", NULL); + gdb_feature_builder_append_reg(&builder, "za", svl * svl * 8, reg++, + "sme_bvv", NULL); + + gdb_feature_builder_end(&builder); + + return &cpu->dyn_smereg_feature.desc; +} + #ifdef CONFIG_USER_ONLY int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg) { diff --git a/target/arm/helper.c b/target/arm/helper.c index bb445e3..167f290 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12,51 +12,85 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/helper-proto.h" #include "exec/page-protection.h" +#include "exec/mmap-lock.h" #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/bitops.h" #include "qemu/qemu-print.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "hw/irq.h" #include "system/cpu-timers.h" +#include "exec/icount.h" #include "system/kvm.h" #include "system/tcg.h" #include "qapi/error.h" #include "qemu/guest-random.h" #ifdef CONFIG_TCG +#include "accel/tcg/probe.h" +#include "accel/tcg/getpc.h" #include "semihosting/common-semi.h" #endif #include "cpregs.h" #include "target/arm/gtimer.h" -#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" static void switch_mode(CPUARMState *env, int mode); -static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) +int compare_u64(const void *a, const void *b) +{ + if (*(uint64_t *)a > *(uint64_t *)b) { + return 1; + } + if (*(uint64_t *)a < *(uint64_t *)b) { + return -1; + } + return 0; +} + +/* + * Macros which are lvalues for the field in CPUARMState for the + * ARMCPRegInfo *ri. + */ +#define CPREG_FIELD32(env, ri) \ + (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) +#define CPREG_FIELD64(env, ri) \ + (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) + +uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) { assert(ri->fieldoffset); - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: return CPREG_FIELD64(env, ri); - } else { + case MO_32: return CPREG_FIELD32(env, ri); + default: + g_assert_not_reached(); } } void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { assert(ri->fieldoffset); - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: CPREG_FIELD64(env, ri) = value; - } else { + break; + case MO_32: CPREG_FIELD32(env, ri) = value; + break; + default: + g_assert_not_reached(); } } +#undef CPREG_FIELD32 +#undef CPREG_FIELD64 + static void *raw_ptr(CPUARMState *env, const ARMCPRegInfo *ri) { return (char *)env + ri->fieldoffset; @@ -195,11 +229,11 @@ bool write_list_to_cpustate(ARMCPU *cpu) return ok; } -static void add_cpreg_to_list(gpointer key, gpointer opaque) +static void add_cpreg_to_list(gpointer key, gpointer value, gpointer opaque) { ARMCPU *cpu = opaque; uint32_t regidx = (uintptr_t)key; - const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + const ARMCPRegInfo *ri = value; if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); @@ -208,64 +242,52 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque) } } -static void count_cpreg(gpointer key, gpointer opaque) +static void count_cpreg(gpointer key, gpointer value, gpointer opaque) { ARMCPU *cpu = opaque; - const ARMCPRegInfo *ri; - - ri = g_hash_table_lookup(cpu->cp_regs, key); + const ARMCPRegInfo *ri = value; if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_array_len++; } } -static gint cpreg_key_compare(gconstpointer a, gconstpointer b) -{ - uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a); - uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b); - - if (aidx > bidx) { - return 1; - } - if (aidx < bidx) { - return -1; - } - return 0; -} - void init_cpreg_list(ARMCPU *cpu) { /* * Initialise the cpreg_tuples[] array based on the cp_regs hash. * Note that we require cpreg_tuples[] to be sorted by key ID. */ - GList *keys; int arraylen; - keys = g_hash_table_get_keys(cpu->cp_regs); - keys = g_list_sort(keys, cpreg_key_compare); - cpu->cpreg_array_len = 0; - - g_list_foreach(keys, count_cpreg, cpu); + g_hash_table_foreach(cpu->cp_regs, count_cpreg, cpu); arraylen = cpu->cpreg_array_len; - cpu->cpreg_indexes = g_new(uint64_t, arraylen); - cpu->cpreg_values = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len; + if (arraylen) { + cpu->cpreg_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_values = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen); + } else { + cpu->cpreg_indexes = NULL; + cpu->cpreg_values = NULL; + cpu->cpreg_vmstate_indexes = NULL; + cpu->cpreg_vmstate_values = NULL; + } + cpu->cpreg_vmstate_array_len = arraylen; cpu->cpreg_array_len = 0; - g_list_foreach(keys, add_cpreg_to_list, cpu); + g_hash_table_foreach(cpu->cp_regs, add_cpreg_to_list, cpu); assert(cpu->cpreg_array_len == arraylen); - g_list_free(keys); + if (arraylen) { + qsort(cpu->cpreg_indexes, arraylen, sizeof(uint64_t), compare_u64); + } } -static bool arm_pan_enabled(CPUARMState *env) +bool arm_pan_enabled(CPUARMState *env) { if (is_a64(env)) { if ((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1)) { @@ -314,25 +336,6 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env, return CP_ACCESS_UNDEFINED; } -/* - * Check for traps to performance monitor registers, which are controlled - * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. - */ -static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - - if (el < 2 && (mdcr_el2 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_OK; -} - /* Check for traps from EL1 due to HCR_EL2.TVM and HCR_EL2.TRVM. */ CPAccessResult access_tvm_trvm(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -417,7 +420,9 @@ int alle1_tlbmask(CPUARMState *env) */ return (ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_1_GCS | ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS | ARMMMUIdxBit_Stage2 | ARMMMUIdxBit_Stage2_S); } @@ -451,6 +456,8 @@ static const ARMCPRegInfo cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_CONTEXTIDR_EL1, .nv2_redirect_offset = 0x108 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 1), .secure = ARM_CP_SECSTATE_NS, .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]), .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, @@ -668,291 +675,16 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { */ { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, }, - { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, + { .name = "CPACR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access, .fgt = FGT_CPACR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 1, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 2), .nv2_redirect_offset = 0x100 | NV2_REDIR_NV1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1), .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read }, }; -typedef struct pm_event { - uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */ - /* If the event is supported on this CPU (used to generate PMCEID[01]) */ - bool (*supported)(CPUARMState *); - /* - * Retrieve the current count of the underlying event. The programmed - * counters hold a difference from the return value from this function - */ - uint64_t (*get_count)(CPUARMState *); - /* - * Return how many nanoseconds it will take (at a minimum) for count events - * to occur. A negative value indicates the counter will never overflow, or - * that the counter has otherwise arranged for the overflow bit to be set - * and the PMU interrupt to be raised on overflow. - */ - int64_t (*ns_per_count)(uint64_t); -} pm_event; - -static bool event_always_supported(CPUARMState *env) -{ - return true; -} - -static uint64_t swinc_get_count(CPUARMState *env) -{ - /* - * SW_INCR events are written directly to the pmevcntr's by writes to - * PMSWINC, so there is no underlying count maintained by the PMU itself - */ - return 0; -} - -static int64_t swinc_ns_per(uint64_t ignored) -{ - return -1; -} - -/* - * Return the underlying cycle count for the PMU cycle counters. If we're in - * usermode, simply return 0. - */ -static uint64_t cycles_get_count(CPUARMState *env) -{ -#ifndef CONFIG_USER_ONLY - return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - ARM_CPU_FREQ, NANOSECONDS_PER_SECOND); -#else - return cpu_get_host_ticks(); -#endif -} - -#ifndef CONFIG_USER_ONLY -static int64_t cycles_ns_per(uint64_t cycles) -{ - return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles; -} - -static bool instructions_supported(CPUARMState *env) -{ - /* Precise instruction counting */ - return icount_enabled() == ICOUNT_PRECISE; -} - -static uint64_t instructions_get_count(CPUARMState *env) -{ - assert(icount_enabled() == ICOUNT_PRECISE); - return (uint64_t)icount_get_raw(); -} - -static int64_t instructions_ns_per(uint64_t icount) -{ - assert(icount_enabled() == ICOUNT_PRECISE); - return icount_to_ns((int64_t)icount); -} -#endif - -static bool pmuv3p1_events_supported(CPUARMState *env) -{ - /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmuv3p1, env_archcpu(env)); -} - -static bool pmuv3p4_events_supported(CPUARMState *env) -{ - /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmuv3p4, env_archcpu(env)); -} - -static uint64_t zero_event_get_count(CPUARMState *env) -{ - /* For events which on QEMU never fire, so their count is always zero */ - return 0; -} - -static int64_t zero_event_ns_per(uint64_t cycles) -{ - /* An event which never fires can never overflow */ - return -1; -} - -static const pm_event pm_events[] = { - { .number = 0x000, /* SW_INCR */ - .supported = event_always_supported, - .get_count = swinc_get_count, - .ns_per_count = swinc_ns_per, - }, -#ifndef CONFIG_USER_ONLY - { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */ - .supported = instructions_supported, - .get_count = instructions_get_count, - .ns_per_count = instructions_ns_per, - }, - { .number = 0x011, /* CPU_CYCLES, Cycle */ - .supported = event_always_supported, - .get_count = cycles_get_count, - .ns_per_count = cycles_ns_per, - }, -#endif - { .number = 0x023, /* STALL_FRONTEND */ - .supported = pmuv3p1_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, - { .number = 0x024, /* STALL_BACKEND */ - .supported = pmuv3p1_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, - { .number = 0x03c, /* STALL */ - .supported = pmuv3p4_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, -}; - -/* - * Note: Before increasing MAX_EVENT_ID beyond 0x3f into the 0x40xx range of - * events (i.e. the statistical profiling extension), this implementation - * should first be updated to something sparse instead of the current - * supported_event_map[] array. - */ -#define MAX_EVENT_ID 0x3c -#define UNSUPPORTED_EVENT UINT16_MAX -static uint16_t supported_event_map[MAX_EVENT_ID + 1]; - -/* - * Called upon CPU initialization to initialize PMCEID[01]_EL0 and build a map - * of ARM event numbers to indices in our pm_events array. - * - * Note: Events in the 0x40XX range are not currently supported. - */ -void pmu_init(ARMCPU *cpu) -{ - unsigned int i; - - /* - * Empty supported_event_map and cpu->pmceid[01] before adding supported - * events to them - */ - for (i = 0; i < ARRAY_SIZE(supported_event_map); i++) { - supported_event_map[i] = UNSUPPORTED_EVENT; - } - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; - - for (i = 0; i < ARRAY_SIZE(pm_events); i++) { - const pm_event *cnt = &pm_events[i]; - assert(cnt->number <= MAX_EVENT_ID); - /* We do not currently support events in the 0x40xx range */ - assert(cnt->number <= 0x3f); - - if (cnt->supported(&cpu->env)) { - supported_event_map[cnt->number] = i; - uint64_t event_mask = 1ULL << (cnt->number & 0x1f); - if (cnt->number & 0x20) { - cpu->pmceid1 |= event_mask; - } else { - cpu->pmceid0 |= event_mask; - } - } - } -} - -/* - * Check at runtime whether a PMU event is supported for the current machine - */ -static bool event_supported(uint16_t number) -{ - if (number > MAX_EVENT_ID) { - return false; - } - return supported_event_map[number] != UNSUPPORTED_EVENT; -} - -static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - /* - * Performance monitor registers user accessibility is controlled - * by PMUSERENR. MDCR_EL2.TPM and MDCR_EL3.TPM allow configurable - * trapping to EL2 or EL3 for other accesses. - */ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - - if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) { - return CP_ACCESS_TRAP_EL1; - } - if (el < 2 && (mdcr_el2 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL3; - } - - return CP_ACCESS_OK; -} - -static CPAccessResult pmreg_access_xevcntr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* ER: event counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 3)) != 0 - && isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_swinc(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* SW: software increment write trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 1)) != 0 - && !isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_selr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* ER: event counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 3)) != 0) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_ccntr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* CR: cycle counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 2)) != 0 - && isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - /* * Bits in MDCR_EL2 and MDCR_EL3 which pmu_counter_enabled() looks at. * We use these to decide whether we need to wrap a write to MDCR_EL2 @@ -962,684 +694,6 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState *env, (MDCR_HPME | MDCR_HPMD | MDCR_HPMN | MDCR_HCCD | MDCR_HLP) #define MDCR_EL3_PMU_ENABLE_BITS (MDCR_SPME | MDCR_SCCD) -/* - * Returns true if the counter (pass 31 for PMCCNTR) should count events using - * the current EL, security state, and register configuration. - */ -static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) -{ - uint64_t filter; - bool e, p, u, nsk, nsu, nsh, m; - bool enabled, prohibited = false, filtered; - bool secure = arm_is_secure(env); - int el = arm_current_el(env); - uint64_t mdcr_el2; - uint8_t hpmn; - - /* - * We might be called for M-profile cores where MDCR_EL2 doesn't - * exist and arm_mdcr_el2_eff() will assert, so this early-exit check - * must be before we read that value. - */ - if (!arm_feature(env, ARM_FEATURE_PMU)) { - return false; - } - - mdcr_el2 = arm_mdcr_el2_eff(env); - hpmn = mdcr_el2 & MDCR_HPMN; - - if (!arm_feature(env, ARM_FEATURE_EL2) || - (counter < hpmn || counter == 31)) { - e = env->cp15.c9_pmcr & PMCRE; - } else { - e = mdcr_el2 & MDCR_HPME; - } - enabled = e && (env->cp15.c9_pmcnten & (1 << counter)); - - /* Is event counting prohibited? */ - if (el == 2 && (counter < hpmn || counter == 31)) { - prohibited = mdcr_el2 & MDCR_HPMD; - } - if (secure) { - prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME); - } - - if (counter == 31) { - /* - * The cycle counter defaults to running. PMCR.DP says "disable - * the cycle counter when event counting is prohibited". - * Some MDCR bits disable the cycle counter specifically. - */ - prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP; - if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - if (secure) { - prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD); - } - if (el == 2) { - prohibited = prohibited || (mdcr_el2 & MDCR_HCCD); - } - } - } - - if (counter == 31) { - filter = env->cp15.pmccfiltr_el0; - } else { - filter = env->cp15.c14_pmevtyper[counter]; - } - - p = filter & PMXEVTYPER_P; - u = filter & PMXEVTYPER_U; - nsk = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSK); - nsu = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSU); - nsh = arm_feature(env, ARM_FEATURE_EL2) && (filter & PMXEVTYPER_NSH); - m = arm_el_is_aa64(env, 1) && - arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_M); - - if (el == 0) { - filtered = secure ? u : u != nsu; - } else if (el == 1) { - filtered = secure ? p : p != nsk; - } else if (el == 2) { - filtered = !nsh; - } else { /* EL3 */ - filtered = m != p; - } - - if (counter != 31) { - /* - * If not checking PMCCNTR, ensure the counter is setup to an event we - * support - */ - uint16_t event = filter & PMXEVTYPER_EVTCOUNT; - if (!event_supported(event)) { - return false; - } - } - - return enabled && !prohibited && !filtered; -} - -static void pmu_update_irq(CPUARMState *env) -{ - ARMCPU *cpu = env_archcpu(env); - qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) && - (env->cp15.c9_pminten & env->cp15.c9_pmovsr)); -} - -static bool pmccntr_clockdiv_enabled(CPUARMState *env) -{ - /* - * Return true if the clock divider is enabled and the cycle counter - * is supposed to tick only once every 64 clock cycles. This is - * controlled by PMCR.D, but if PMCR.LC is set to enable the long - * (64-bit) cycle counter PMCR.D has no effect. - */ - return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD; -} - -static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) -{ - /* Return true if the specified event counter is configured to be 64 bit */ - - /* This isn't intended to be used with the cycle counter */ - assert(counter < 31); - - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - return false; - } - - if (arm_feature(env, ARM_FEATURE_EL2)) { - /* - * MDCR_EL2.HLP still applies even when EL2 is disabled in the - * current security state, so we don't use arm_mdcr_el2_eff() here. - */ - bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; - int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; - - if (counter >= hpmn) { - return hlp; - } - } - return env->cp15.c9_pmcr & PMCRLP; -} - -/* - * Ensure c15_ccnt is the guest-visible count so that operations such as - * enabling/disabling the counter or filtering, modifying the count itself, - * etc. can be done logically. This is essentially a no-op if the counter is - * not enabled at the time of the call. - */ -static void pmccntr_op_start(CPUARMState *env) -{ - uint64_t cycles = cycles_get_count(env); - - if (pmu_counter_enabled(env, 31)) { - uint64_t eff_cycles = cycles; - if (pmccntr_clockdiv_enabled(env)) { - eff_cycles /= 64; - } - - uint64_t new_pmccntr = eff_cycles - env->cp15.c15_ccnt_delta; - - uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \ - 1ull << 63 : 1ull << 31; - if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) { - env->cp15.c9_pmovsr |= (1ULL << 31); - pmu_update_irq(env); - } - - env->cp15.c15_ccnt = new_pmccntr; - } - env->cp15.c15_ccnt_delta = cycles; -} - -/* - * If PMCCNTR is enabled, recalculate the delta between the clock and the - * guest-visible count. A call to pmccntr_op_finish should follow every call to - * pmccntr_op_start. - */ -static void pmccntr_op_finish(CPUARMState *env) -{ - if (pmu_counter_enabled(env, 31)) { -#ifndef CONFIG_USER_ONLY - /* Calculate when the counter will next overflow */ - uint64_t remaining_cycles = -env->cp15.c15_ccnt; - if (!(env->cp15.c9_pmcr & PMCRLC)) { - remaining_cycles = (uint32_t)remaining_cycles; - } - int64_t overflow_in = cycles_ns_per(remaining_cycles); - - if (overflow_in > 0) { - int64_t overflow_at; - - if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - overflow_in, &overflow_at)) { - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); - } - } -#endif - - uint64_t prev_cycles = env->cp15.c15_ccnt_delta; - if (pmccntr_clockdiv_enabled(env)) { - prev_cycles /= 64; - } - env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt; - } -} - -static void pmevcntr_op_start(CPUARMState *env, uint8_t counter) -{ - - uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; - uint64_t count = 0; - if (event_supported(event)) { - uint16_t event_idx = supported_event_map[event]; - count = pm_events[event_idx].get_count(env); - } - - if (pmu_counter_enabled(env, counter)) { - uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; - uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ? - 1ULL << 63 : 1ULL << 31; - - if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) { - env->cp15.c9_pmovsr |= (1 << counter); - pmu_update_irq(env); - } - env->cp15.c14_pmevcntr[counter] = new_pmevcntr; - } - env->cp15.c14_pmevcntr_delta[counter] = count; -} - -static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter) -{ - if (pmu_counter_enabled(env, counter)) { -#ifndef CONFIG_USER_ONLY - uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; - uint16_t event_idx = supported_event_map[event]; - uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1); - int64_t overflow_in; - - if (!pmevcntr_is_64_bit(env, counter)) { - delta = (uint32_t)delta; - } - overflow_in = pm_events[event_idx].ns_per_count(delta); - - if (overflow_in > 0) { - int64_t overflow_at; - - if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - overflow_in, &overflow_at)) { - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); - } - } -#endif - - env->cp15.c14_pmevcntr_delta[counter] -= - env->cp15.c14_pmevcntr[counter]; - } -} - -void pmu_op_start(CPUARMState *env) -{ - unsigned int i; - pmccntr_op_start(env); - for (i = 0; i < pmu_num_counters(env); i++) { - pmevcntr_op_start(env, i); - } -} - -void pmu_op_finish(CPUARMState *env) -{ - unsigned int i; - pmccntr_op_finish(env); - for (i = 0; i < pmu_num_counters(env); i++) { - pmevcntr_op_finish(env, i); - } -} - -void pmu_pre_el_change(ARMCPU *cpu, void *ignored) -{ - pmu_op_start(&cpu->env); -} - -void pmu_post_el_change(ARMCPU *cpu, void *ignored) -{ - pmu_op_finish(&cpu->env); -} - -void arm_pmu_timer_cb(void *opaque) -{ - ARMCPU *cpu = opaque; - - /* - * Update all the counter values based on the current underlying counts, - * triggering interrupts to be raised, if necessary. pmu_op_finish() also - * has the effect of setting the cpu->pmu_timer to the next earliest time a - * counter may expire. - */ - pmu_op_start(&cpu->env); - pmu_op_finish(&cpu->env); -} - -static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - - if (value & PMCRC) { - /* The counter has been reset */ - env->cp15.c15_ccnt = 0; - } - - if (value & PMCRP) { - unsigned int i; - for (i = 0; i < pmu_num_counters(env); i++) { - env->cp15.c14_pmevcntr[i] = 0; - } - } - - env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; - env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); - - pmu_op_finish(env); -} - -static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint64_t pmcr = env->cp15.c9_pmcr; - - /* - * If EL2 is implemented and enabled for the current security state, reads - * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN. - */ - if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) { - pmcr &= ~PMCRN_MASK; - pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT; - } - - return pmcr; -} - -static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - unsigned int i; - uint64_t overflow_mask, new_pmswinc; - - for (i = 0; i < pmu_num_counters(env); i++) { - /* Increment a counter's count iff: */ - if ((value & (1 << i)) && /* counter's bit is set */ - /* counter is enabled and not filtered */ - pmu_counter_enabled(env, i) && - /* counter is SW_INCR */ - (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) { - pmevcntr_op_start(env, i); - - /* - * Detect if this write causes an overflow since we can't predict - * PMSWINC overflows like we can for other events - */ - new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; - - overflow_mask = pmevcntr_is_64_bit(env, i) ? - 1ULL << 63 : 1ULL << 31; - - if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) { - env->cp15.c9_pmovsr |= (1 << i); - pmu_update_irq(env); - } - - env->cp15.c14_pmevcntr[i] = new_pmswinc; - - pmevcntr_op_finish(env, i); - } - } -} - -static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint64_t ret; - pmccntr_op_start(env); - ret = env->cp15.c15_ccnt; - pmccntr_op_finish(env); - return ret; -} - -static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and - * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the - * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are - * accessed. - */ - env->cp15.c9_pmselr = value & 0x1f; -} - -static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - env->cp15.c15_ccnt = value; - pmccntr_op_finish(env); -} - -static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint64_t cur_val = pmccntr_read(env, NULL); - - pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value)); -} - -static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0; - pmccntr_op_finish(env); -} - -static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - /* M is not accessible from AArch32 */ - env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) | - (value & PMCCFILTR); - pmccntr_op_finish(env); -} - -static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri) -{ - /* M is not visible in AArch32 */ - return env->cp15.pmccfiltr_el0 & PMCCFILTR; -} - -static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - value &= pmu_counter_mask(env); - env->cp15.c9_pmcnten |= value; - pmu_op_finish(env); -} - -static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - value &= pmu_counter_mask(env); - env->cp15.c9_pmcnten &= ~value; - pmu_op_finish(env); -} - -static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pmovsr &= ~value; - pmu_update_irq(env); -} - -static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pmovsr |= value; - pmu_update_irq(env); -} - -static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value, const uint8_t counter) -{ - if (counter == 31) { - pmccfiltr_write(env, ri, value); - } else if (counter < pmu_num_counters(env)) { - pmevcntr_op_start(env, counter); - - /* - * If this counter's event type is changing, store the current - * underlying count for the new type in c14_pmevcntr_delta[counter] so - * pmevcntr_op_finish has the correct baseline when it converts back to - * a delta. - */ - uint16_t old_event = env->cp15.c14_pmevtyper[counter] & - PMXEVTYPER_EVTCOUNT; - uint16_t new_event = value & PMXEVTYPER_EVTCOUNT; - if (old_event != new_event) { - uint64_t count = 0; - if (event_supported(new_event)) { - uint16_t event_idx = supported_event_map[new_event]; - count = pm_events[event_idx].get_count(env); - } - env->cp15.c14_pmevcntr_delta[counter] = count; - } - - env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; - pmevcntr_op_finish(env, counter); - } - /* - * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when - * PMSELR value is equal to or greater than the number of implemented - * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. - */ -} - -static uint64_t pmevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri, - const uint8_t counter) -{ - if (counter == 31) { - return env->cp15.pmccfiltr_el0; - } else if (counter < pmu_num_counters(env)) { - return env->cp15.c14_pmevtyper[counter]; - } else { - /* - * We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER - * are CONSTRAINED UNPREDICTABLE. See comments in pmevtyper_write(). - */ - return 0; - } -} - -static void pmevtyper_writefn(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - pmevtyper_write(env, ri, value, counter); -} - -static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - env->cp15.c14_pmevtyper[counter] = value; - - /* - * pmevtyper_rawwrite is called between a pair of pmu_op_start and - * pmu_op_finish calls when loading saved state for a migration. Because - * we're potentially updating the type of event here, the value written to - * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a - * different counter type. Therefore, we need to set this value to the - * current count for the counter type we're writing so that pmu_op_finish - * has the correct count for its calculation. - */ - uint16_t event = value & PMXEVTYPER_EVTCOUNT; - if (event_supported(event)) { - uint16_t event_idx = supported_event_map[event]; - env->cp15.c14_pmevcntr_delta[counter] = - pm_events[event_idx].get_count(env); - } -} - -static uint64_t pmevtyper_readfn(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - return pmevtyper_read(env, ri, counter); -} - -static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmevtyper_write(env, ri, value, env->cp15.c9_pmselr & 31); -} - -static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - return pmevtyper_read(env, ri, env->cp15.c9_pmselr & 31); -} - -static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value, uint8_t counter) -{ - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ - value &= MAKE_64BIT_MASK(0, 32); - } - if (counter < pmu_num_counters(env)) { - pmevcntr_op_start(env, counter); - env->cp15.c14_pmevcntr[counter] = value; - pmevcntr_op_finish(env, counter); - } - /* - * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. - */ -} - -static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint8_t counter) -{ - if (counter < pmu_num_counters(env)) { - uint64_t ret; - pmevcntr_op_start(env, counter); - ret = env->cp15.c14_pmevcntr[counter]; - pmevcntr_op_finish(env, counter); - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ - ret &= MAKE_64BIT_MASK(0, 32); - } - return ret; - } else { - /* - * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. - */ - return 0; - } -} - -static void pmevcntr_writefn(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - pmevcntr_write(env, ri, value, counter); -} - -static uint64_t pmevcntr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - return pmevcntr_read(env, ri, counter); -} - -static void pmevcntr_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - assert(counter < pmu_num_counters(env)); - env->cp15.c14_pmevcntr[counter] = value; - pmevcntr_write(env, ri, value, counter); -} - -static uint64_t pmevcntr_rawread(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - assert(counter < pmu_num_counters(env)); - return env->cp15.c14_pmevcntr[counter]; -} - -static void pmxevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmevcntr_write(env, ri, value, env->cp15.c9_pmselr & 31); -} - -static uint64_t pmxevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - return pmevcntr_read(env, ri, env->cp15.c9_pmselr & 31); -} - -static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - if (arm_feature(env, ARM_FEATURE_V8)) { - env->cp15.c9_pmuserenr = value & 0xf; - } else { - env->cp15.c9_pmuserenr = value & 1; - } -} - -static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* We have no event counters so only the C bit can be changed */ - value &= pmu_counter_mask(env); - env->cp15.c9_pminten |= value; - pmu_update_irq(env); -} - -static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pminten &= ~value; - pmu_update_irq(env); -} - static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -1712,6 +766,22 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (cpu_isar_feature(aa64_ecv, cpu)) { valid_mask |= SCR_ECVEN; } + if (cpu_isar_feature(aa64_gcs, cpu)) { + valid_mask |= SCR_GCSEN; + } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + valid_mask |= SCR_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + valid_mask |= SCR_SCTLR2EN; + } + if (cpu_isar_feature(aa64_s1pie, cpu) || + cpu_isar_feature(aa64_s2pie, cpu)) { + valid_mask |= SCR_PIEN; + } + if (cpu_isar_feature(aa64_mec, cpu)) { + valid_mask |= SCR_MECEN; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); if (cpu_isar_feature(aa32_ras, cpu)) { @@ -1746,12 +816,17 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) */ if (changed & (SCR_NS | SCR_NSE)) { tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS | ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS | ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_1_GCS | + ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2)); + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS)); } } @@ -1804,40 +879,40 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) uint64_t ret = 0; if (hcr_el2 & HCR_IMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VIRQ)) { ret |= CPSR_I; } - if (cs->interrupt_request & CPU_INTERRUPT_VINMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VINMI)) { ret |= ISR_IS; ret |= CPSR_I; } } else { - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { ret |= CPSR_I; } - if (cs->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { ret |= ISR_IS; ret |= CPSR_I; } } if (hcr_el2 & HCR_FMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VFIQ)) { ret |= CPSR_F; } - if (cs->interrupt_request & CPU_INTERRUPT_VFNMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VFNMI)) { ret |= ISR_FS; ret |= CPSR_F; } } else { - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_FIQ)) { ret |= CPSR_F; } } if (hcr_el2 & HCR_AMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VSERR) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VSERR)) { ret |= CPSR_A; } } @@ -1869,171 +944,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */ { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_NOP }, - /* - * Performance monitors are implementation defined in v7, - * but with an ARM recommended set of registers, which we - * follow. - * - * Performance registers fall into three categories: - * (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR) - * (b) RO in PL0 (ie UNDEF on write), RW in PL1 (PMUSERENR) - * (c) UNDEF in PL0 if PMUSERENR.EN==0, otherwise accessible (all others) - * For the cases controlled by PMUSERENR we must set .access to PL0_RW - * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. - */ - { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenset_write, - .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .raw_writefn = raw_write }, - { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, - .writefn = pmcntenset_write, .raw_writefn = raw_write }, - { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, - .access = PL0_RW, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), - .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .writefn = pmcntenclr_write, - .type = ARM_CP_ALIAS | ARM_CP_IO }, - { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenclr_write }, - { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, - .access = PL0_RW, .type = ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), - .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .writefn = pmovsr_write, - .raw_writefn = raw_write }, - { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsr_write, - .raw_writefn = raw_write }, - { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, - .access = PL0_W, .accessfn = pmreg_access_swinc, - .fgt = FGT_PMSWINC_EL0, - .type = ARM_CP_NO_RAW | ARM_CP_IO, - .writefn = pmswinc_write }, - { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4, - .access = PL0_W, .accessfn = pmreg_access_swinc, - .fgt = FGT_PMSWINC_EL0, - .type = ARM_CP_NO_RAW | ARM_CP_IO, - .writefn = pmswinc_write }, - { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, - .access = PL0_RW, .type = ARM_CP_ALIAS, - .fgt = FGT_PMSELR_EL0, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr), - .accessfn = pmreg_access_selr, .writefn = pmselr_write, - .raw_writefn = raw_write}, - { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5, - .access = PL0_RW, .accessfn = pmreg_access_selr, - .fgt = FGT_PMSELR_EL0, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr), - .writefn = pmselr_write, .raw_writefn = raw_write, }, - { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO, - .fgt = FGT_PMCCNTR_EL0, - .readfn = pmccntr_read, .writefn = pmccntr_write32, - .accessfn = pmreg_access_ccntr }, - { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, - .access = PL0_RW, .accessfn = pmreg_access_ccntr, - .fgt = FGT_PMCCNTR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt), - .readfn = pmccntr_read, .writefn = pmccntr_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, }, - { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7, - .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCCFILTR_EL0, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .resetvalue = 0, }, - { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, - .writefn = pmccfiltr_write, .raw_writefn = raw_write, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCCFILTR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), - .resetvalue = 0, }, - { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, - { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, - { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access_xevcntr, - .fgt = FGT_PMEVCNTRN_EL0, - .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, - { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access_xevcntr, - .fgt = FGT_PMEVCNTRN_EL0, - .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, - { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, - .access = PL0_R | PL1_RW, .accessfn = access_tpm, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmuserenr), - .resetvalue = 0, - .writefn = pmuserenr_write, .raw_writefn = raw_write }, - { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 0, - .access = PL0_R | PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), - .resetvalue = 0, - .writefn = pmuserenr_write, .raw_writefn = raw_write }, - { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten), - .resetvalue = 0, - .writefn = pmintenset_write, .raw_writefn = raw_write }, - { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenset_write, .raw_writefn = raw_write, - .resetvalue = 0x0 }, - { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write, }, - { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, @@ -2067,12 +977,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AFSR0_EL1, .nv2_redirect_offset = 0x128 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 0), .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AFSR1_EL1, .nv2_redirect_offset = 0x130 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 1), .type = ARM_CP_CONST, .resetvalue = 0 }, /* * MAIR can just read-as-written because we don't implement caches @@ -2083,6 +997,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_MAIR_EL1, .nv2_redirect_offset = 0x140 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 0), .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]), .resetvalue = 0 }, { .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64, @@ -2116,25 +1032,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, }; -static const ARMCPRegInfo pmovsset_cp_reginfo[] = { - /* PMOVSSET is not implemented in v7 before v7ve */ - { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsset_write, - .raw_writefn = raw_write }, - { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsset_write, - .raw_writefn = raw_write }, -}; - static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -2211,7 +1108,7 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { .resetvalue = 0 }, }; -static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) +static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); @@ -3148,9 +2045,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .resetfn = arm_gt_cntfrq_reset, }, /* overall control: mostly access permissions */ - { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH, + { .name = "CNTKCTL_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0, .access = PL1_RW, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 14, 1, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 14, 1, 0), .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl), .resetvalue = 0, }, @@ -3443,402 +2342,6 @@ static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) } } -#ifndef CONFIG_USER_ONLY -/* get_phys_addr() isn't present for user-mode-only targets */ - -static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (ri->opc2 & 4) { - /* - * The ATS12NSO* operations must trap to EL3 or EL2 if executed in - * Secure EL1 (which can only happen if EL3 is AArch64). - * They are simply UNDEF if executed from NS EL1. - * They function normally from EL2 or EL3. - */ - if (arm_current_el(env) == 1) { - if (arm_is_secure_below_el3(env)) { - if (env->cp15.scr_el3 & SCR_EEL2) { - return CP_ACCESS_TRAP_EL2; - } - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_UNDEFINED; - } - } - return CP_ACCESS_OK; -} - -#ifdef CONFIG_TCG -static int par_el1_shareability(GetPhysAddrResult *res) -{ - /* - * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC - * memory -- see pseudocode PAREncodeShareability(). - */ - if (((res->cacheattrs.attrs & 0xf0) == 0) || - res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { - return 2; - } - return res->cacheattrs.shareability; -} - -static uint64_t do_ats_write(CPUARMState *env, uint64_t value, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - ARMSecuritySpace ss) -{ - bool ret; - uint64_t par64; - bool format64 = false; - ARMMMUFaultInfo fi = {}; - GetPhysAddrResult res = {}; - - /* - * I_MXTJT: Granule protection checks are not performed on the final - * address of a successful translation. This is a translation not a - * memory reference, so "memop = none = 0". - */ - ret = get_phys_addr_with_space_nogpc(env, value, access_type, 0, - mmu_idx, ss, &res, &fi); - - /* - * ATS operations only do S1 or S1+S2 translations, so we never - * have to deal with the ARMCacheAttrs format for S2 only. - */ - assert(!res.cacheattrs.is_s2_format); - - if (ret) { - /* - * Some kinds of translation fault must cause exceptions rather - * than being reported in the PAR. - */ - int current_el = arm_current_el(env); - int target_el; - uint32_t syn, fsr, fsc; - bool take_exc = false; - - if (fi.s1ptw && current_el == 1 - && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { - /* - * Synchronous stage 2 fault on an access made as part of the - * translation table walk for AT S1E0* or AT S1E1* insn - * executed from NS EL1. If this is a synchronous external abort - * and SCR_EL3.EA == 1, then we take a synchronous external abort - * to EL3. Otherwise the fault is taken as an exception to EL2, - * and HPFAR_EL2 holds the faulting IPA. - */ - if (fi.type == ARMFault_SyncExternalOnWalk && - (env->cp15.scr_el3 & SCR_EA)) { - target_el = 3; - } else { - env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4; - if (arm_is_secure_below_el3(env) && fi.s1ns) { - env->cp15.hpfar_el2 |= HPFAR_NS; - } - target_el = 2; - } - take_exc = true; - } else if (fi.type == ARMFault_SyncExternalOnWalk) { - /* - * Synchronous external aborts during a translation table walk - * are taken as Data Abort exceptions. - */ - if (fi.stage2) { - if (current_el == 3) { - target_el = 3; - } else { - target_el = 2; - } - } else { - target_el = exception_target_el(env); - } - take_exc = true; - } - - if (take_exc) { - /* Construct FSR and FSC using same logic as arm_deliver_fault() */ - if (target_el == 2 || arm_el_is_aa64(env, target_el) || - arm_s1_regime_using_lpae_format(env, mmu_idx)) { - fsr = arm_fi_to_lfsc(&fi); - fsc = extract32(fsr, 0, 6); - } else { - fsr = arm_fi_to_sfsc(&fi); - fsc = 0x3f; - } - /* - * Report exception with ESR indicating a fault due to a - * translation table walk for a cache maintenance instruction. - */ - syn = syn_data_abort_no_iss(current_el == target_el, 0, - fi.ea, 1, fi.s1ptw, 1, fsc); - env->exception.vaddress = value; - env->exception.fsr = fsr; - raise_exception(env, EXCP_DATA_ABORT, syn, target_el); - } - } - - if (is_a64(env)) { - format64 = true; - } else if (arm_feature(env, ARM_FEATURE_LPAE)) { - /* - * ATS1Cxx: - * * TTBCR.EAE determines whether the result is returned using the - * 32-bit or the 64-bit PAR format - * * Instructions executed in Hyp mode always use the 64bit format - * - * ATS1S2NSOxx uses the 64bit format if any of the following is true: - * * The Non-secure TTBCR.EAE bit is set to 1 - * * The implementation includes EL2, and the value of HCR.VM is 1 - * - * (Note that HCR.DC makes HCR.VM behave as if it is 1.) - * - * ATS1Hx always uses the 64bit format. - */ - format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); - - if (arm_feature(env, ARM_FEATURE_EL2)) { - if (mmu_idx == ARMMMUIdx_E10_0 || - mmu_idx == ARMMMUIdx_E10_1 || - mmu_idx == ARMMMUIdx_E10_1_PAN) { - format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); - } else { - format64 |= arm_current_el(env) == 2; - } - } - } - - if (format64) { - /* Create a 64-bit PAR */ - par64 = (1 << 11); /* LPAE bit always set */ - if (!ret) { - par64 |= res.f.phys_addr & ~0xfffULL; - if (!res.f.attrs.secure) { - par64 |= (1 << 9); /* NS */ - } - par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ - par64 |= par_el1_shareability(&res) << 7; /* SH */ - } else { - uint32_t fsr = arm_fi_to_lfsc(&fi); - - par64 |= 1; /* F */ - par64 |= (fsr & 0x3f) << 1; /* FS */ - if (fi.stage2) { - par64 |= (1 << 9); /* S */ - } - if (fi.s1ptw) { - par64 |= (1 << 8); /* PTW */ - } - } - } else { - /* - * fsr is a DFSR/IFSR value for the short descriptor - * translation table format (with WnR always clear). - * Convert it to a 32-bit PAR. - */ - if (!ret) { - /* We do not set any attribute bits in the PAR */ - if (res.f.lg_page_size == 24 - && arm_feature(env, ARM_FEATURE_V7)) { - par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); - } else { - par64 = res.f.phys_addr & 0xfffff000; - } - if (!res.f.attrs.secure) { - par64 |= (1 << 9); /* NS */ - } - } else { - uint32_t fsr = arm_fi_to_sfsc(&fi); - - par64 = ((fsr & (1 << 10)) >> 5) | ((fsr & (1 << 12)) >> 6) | - ((fsr & 0xf) << 1) | 1; - } - } - return par64; -} -#endif /* CONFIG_TCG */ - -static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - uint64_t par64; - ARMMMUIdx mmu_idx; - int el = arm_current_el(env); - ARMSecuritySpace ss = arm_security_space(env); - - switch (ri->opc2 & 6) { - case 0: - /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ - switch (el) { - case 3: - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = ARMMMUIdx_E30_3_PAN; - } else { - mmu_idx = ARMMMUIdx_E3; - } - break; - case 2: - g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ - /* fall through */ - case 1: - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = ARMMMUIdx_Stage1_E1_PAN; - } else { - mmu_idx = ARMMMUIdx_Stage1_E1; - } - break; - default: - g_assert_not_reached(); - } - break; - case 2: - /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ - switch (el) { - case 3: - mmu_idx = ARMMMUIdx_E30_0; - break; - case 2: - g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ - mmu_idx = ARMMMUIdx_Stage1_E0; - break; - case 1: - mmu_idx = ARMMMUIdx_Stage1_E0; - break; - default: - g_assert_not_reached(); - } - break; - case 4: - /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ - mmu_idx = ARMMMUIdx_E10_1; - ss = ARMSS_NonSecure; - break; - case 6: - /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ - mmu_idx = ARMMMUIdx_E10_0; - ss = ARMSS_NonSecure; - break; - default: - g_assert_not_reached(); - } - - par64 = do_ats_write(env, value, access_type, mmu_idx, ss); - - A32_BANKED_CURRENT_REG_SET(env, par, par64); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} - -static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - uint64_t par64; - - /* There is no SecureEL2 for AArch32. */ - par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, - ARMSS_NonSecure); - - A32_BANKED_CURRENT_REG_SET(env, par, par64); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} - -static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - /* - * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level - * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can - * only happen when executing at EL3 because that combination also causes an - * illegal exception return. We don't need to check FEAT_RME either, because - * scr_write() ensures that the NSE bit is not set otherwise. - */ - if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { - return CP_ACCESS_UNDEFINED; - } - return CP_ACCESS_OK; -} - -static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 3 && - !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { - return CP_ACCESS_UNDEFINED; - } - return at_e012_access(env, ri, isread); -} - -static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) { - return CP_ACCESS_TRAP_EL2; - } - return at_e012_access(env, ri, isread); -} - -static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - ARMMMUIdx mmu_idx; - uint64_t hcr_el2 = arm_hcr_el2_eff(env); - bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); - bool for_el3 = false; - ARMSecuritySpace ss; - - switch (ri->opc2 & 6) { - case 0: - switch (ri->opc1) { - case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = regime_e20 ? - ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN; - } else { - mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; - } - break; - case 4: /* AT S1E2R, AT S1E2W */ - mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; - break; - case 6: /* AT S1E3R, AT S1E3W */ - mmu_idx = ARMMMUIdx_E3; - for_el3 = true; - break; - default: - g_assert_not_reached(); - } - break; - case 2: /* AT S1E0R, AT S1E0W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0; - break; - case 4: /* AT S12E1R, AT S12E1W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1; - break; - case 6: /* AT S12E0R, AT S12E0W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0; - break; - default: - g_assert_not_reached(); - } - - ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env); - env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx, ss); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} -#endif - /* Return basic MPU access permission bits. */ static uint32_t simple_mpu_ap_bits(uint32_t val) { @@ -4276,7 +2779,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ - if (cpreg_field_is_64bit(ri) && + if (cpreg_field_type(ri) == MO_64 && extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = env_archcpu(env); tlb_flush(CPU(cpu)); @@ -4297,7 +2800,9 @@ static void vmsa_tcr_ttbr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, (arm_hcr_el2_eff(env) & HCR_E2H)) { uint16_t mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; tlb_flush_by_mmuidx(env_cpu(env), mask); } raw_write(env, ri, value); @@ -4337,6 +2842,8 @@ static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_FAR_EL1, .nv2_redirect_offset = 0x220 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 6, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 6, 0, 0), .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]), .resetvalue = 0, }, }; @@ -4347,12 +2854,16 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_ESR_EL1, .nv2_redirect_offset = 0x138 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 2, 0), .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, }, { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR0_EL1, .nv2_redirect_offset = 0x200 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 0), .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) } }, @@ -4361,6 +2872,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR1_EL1, .nv2_redirect_offset = 0x210 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 1), .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) } }, @@ -4369,6 +2882,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TCR_EL1, .nv2_redirect_offset = 0x120 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 2), .writefn = vmsa_tcr_el12_write, .raw_writefn = raw_write, .resetvalue = 0, @@ -4413,8 +2928,12 @@ static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri, static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else /* Wait-for-interrupt (deprecated) */ cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HALT); +#endif } static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4468,39 +2987,6 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, }; -static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - env->cp15.c15_cpar = value & 0x3fff; -} - -static const ARMCPRegInfo xscale_cp_reginfo[] = { - { .name = "XSCALE_CPAR", - .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c15_cpar), .resetvalue = 0, - .writefn = xscale_cpar_write, }, - { .name = "XSCALE_AUXCR", - .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr), - .resetvalue = 0, }, - /* - * XScale specific cache-lockdown: since we have no cache we NOP these - * and hope the guest does not really rely on cache behaviour. - */ - { .name = "XSCALE_LOCK_ICACHE_LINE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NOP }, - { .name = "XSCALE_UNLOCK_ICACHE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NOP }, - { .name = "XSCALE_DCACHE_LOCK", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_NOP }, - { .name = "XSCALE_UNLOCK_DCACHE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NOP }, -}; - static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { /* * RAZ/WI the whole crn=15 space, when we don't have a more specific @@ -4603,12 +3089,14 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) } static const ARMCPRegInfo lpae_cp_reginfo[] = { - /* NOP AMAIR0/1 */ - { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, + /* AMAIR0 is mapped to AMAIR_EL1[31:0] */ + { .name = "AMAIR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AMAIR_EL1, .nv2_redirect_offset = 0x148 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 3, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 3, 0), .type = ARM_CP_CONST, .resetvalue = 0 }, /* AMAIR1 is mapped to AMAIR_EL1[63:32] */ { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1, @@ -4891,16 +3379,6 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); - - if (tcg_enabled() && ri->type & ARM_CP_SUPPRESS_TB_END) { - /* - * Normally we would always end the TB on an SCTLR write; see the - * comment in ARMCPRegInfo sctlr initialization below for why Xscale - * is special. Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild - * of hflags from the translator, so do it here. - */ - arm_rebuild_hflags(env); - } } static void mdcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4948,15 +3426,71 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static CPAccessResult access_nv1_with_nvx(uint64_t hcr_nv) +{ + return hcr_nv == (HCR_NV | HCR_NV1) ? CP_ACCESS_TRAP_EL2 : CP_ACCESS_OK; +} + static CPAccessResult access_nv1(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (arm_current_el(env) == 1) { - uint64_t hcr_nv = arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1 | HCR_NV2); + return access_nv1_with_nvx(arm_hcr_el2_nvx_eff(env)); + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_nv1_or_exlock_el1(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + uint64_t nvx = arm_hcr_el2_nvx_eff(env); - if (hcr_nv == (HCR_NV | HCR_NV1)) { - return CP_ACCESS_TRAP_EL2; + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[1] & GCSCR_EXLOCKEN) && + !(nvx & HCR_NV1)) { + return CP_ACCESS_EXLOCK; } + return access_nv1_with_nvx(nvx); + } + + /* + * At EL2, since VHE redirection is done at translation time, + * el_is_in_host is always false here, so EXLOCK does not apply. + */ + return CP_ACCESS_OK; +} + +static CPAccessResult access_exlock_el2(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) +{ + int el = arm_current_el(env); + + if (el == 3) { + return CP_ACCESS_OK; + } + + /* + * Access to the EL2 register from EL1 means NV is set, and + * EXLOCK has priority over an NV1 trap to EL2. + */ + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN)) { + return CP_ACCESS_EXLOCK; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_exlock_el3(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) +{ + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[3] & GCSCR_EXLOCKEN)) { + return CP_ACCESS_EXLOCK; } return CP_ACCESS_OK; } @@ -4985,7 +3519,7 @@ static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri, mmap_lock(); - tb_invalidate_phys_range(start_address, end_address); + tb_invalidate_phys_range(env_cpu(env), start_address, end_address); mmap_unlock(); } @@ -5089,53 +3623,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, .fgt = FGT_DCCISW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, -#ifndef CONFIG_USER_ONLY - /* 64 bit address translation operations */ - { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1R, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1W, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E0R, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E0W, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ - { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, - { .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, { .name = "PAR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 7, .crm = 4, .opc2 = 0, @@ -5143,7 +3630,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .fgt = FGT_PAR_EL1, .fieldoffset = offsetof(CPUARMState, cp15.par_el[1]), .writefn = par_write }, -#endif /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab }, @@ -5180,14 +3666,18 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL1_RW, .accessfn = access_nv1, + .access = PL1_RW, .accessfn = access_nv1_or_exlock_el1, .nv2_redirect_offset = 0x230 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 1), .fieldoffset = offsetof(CPUARMState, elr_el[1]) }, { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL1_RW, .accessfn = access_nv1, + .access = PL1_RW, .accessfn = access_nv1_or_exlock_el1, .nv2_redirect_offset = 0x160 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 0), .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) }, /* * We rely on the access checks not allowing the guest to write to the @@ -5327,7 +3817,8 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) value &= valid_mask; /* RW is RAO/WI if EL1 is AArch64 only */ - if (!cpu_isar_feature(aa64_aa32_el1, cpu)) { + if (arm_feature(env, ARM_FEATURE_AARCH64) && + !cpu_isar_feature(aa64_aa32_el1, cpu)) { value |= HCR_RW; } @@ -5472,6 +3963,16 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) return arm_hcr_el2_eff_secstate(env, arm_security_space_below_el3(env)); } +uint64_t arm_hcr_el2_nvx_eff(CPUARMState *env) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + + if (!(hcr & HCR_NV)) { + return 0; /* CONSTRAINED UNPREDICTABLE wrt NV1 */ + } + return hcr & (HCR_NV2 | HCR_NV1 | HCR_NV); +} + /* * Corresponds to ARM pseudocode function ELIsInHost(). */ @@ -5506,23 +4007,27 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); uint64_t valid_mask = 0; - /* FEAT_MOPS adds MSCEn and MCE2 */ if (cpu_isar_feature(aa64_mops, cpu)) { valid_mask |= HCRX_MSCEN | HCRX_MCE2; } - - /* FEAT_NMI adds TALLINT, VINMI and VFNMI */ if (cpu_isar_feature(aa64_nmi, cpu)) { valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI; } - /* FEAT_CMOW adds CMOW */ if (cpu_isar_feature(aa64_cmow, cpu)) { valid_mask |= HCRX_CMOW; } - /* FEAT_XS adds FGTnXS, FnXS */ if (cpu_isar_feature(aa64_xs, cpu)) { valid_mask |= HCRX_FGTNXS | HCRX_FNXS; } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + valid_mask |= HCRX_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + valid_mask |= HCRX_SCTLR2EN; + } + if (cpu_isar_feature(aa64_gcs, cpu)) { + valid_mask |= HCRX_GCSEN; + } /* Clear RES0 bits. */ env->cp15.hcrx_el2 = value & valid_mask; @@ -5580,11 +4085,22 @@ uint64_t arm_hcrx_el2_eff(CPUARMState *env) * This may need to be revisited for future bits. */ if (!arm_is_el2_enabled(env)) { + ARMCPU *cpu = env_archcpu(env); uint64_t hcrx = 0; - if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { - /* MSCEn behaves as 1 if EL2 is not enabled */ + + /* Bits which whose effective value is 1 if el2 not enabled. */ + if (cpu_isar_feature(aa64_mops, cpu)) { hcrx |= HCRX_MSCEN; } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + hcrx |= HCRX_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + hcrx |= HCRX_SCTLR2EN; + } + if (cpu_isar_feature(aa64_gcs, cpu)) { + hcrx |= HCRX_GCSEN; + } return hcrx; } if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) { @@ -5642,7 +4158,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL2_RW, + .access = PL2_RW, .accessfn = access_exlock_el2, .fieldoffset = offsetof(CPUARMState, elr_el[2]) }, { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, .type = ARM_CP_NV2_REDIRECT, @@ -5660,7 +4176,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL2_RW, + .access = PL2_RW, .accessfn = access_exlock_el2, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) }, { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, @@ -5746,33 +4262,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, #ifndef CONFIG_USER_ONLY - /* - * Unlike the other EL2-related AT operations, these must - * UNDEF from EL3 if EL2 is not implemented, which is why we - * define them here rather than with the rest of the AT ops. - */ - { .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = ats_write64 }, - { .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = ats_write64 }, - /* - * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE - * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 - * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose - * to behave as if SCR.NS was 1. - */ - { .name = "ATS1HR", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL2_W, - .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, - { .name = "ATS1HW", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL2_W, - .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0, /* @@ -5969,7 +4458,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL3_RW, + .access = PL3_RW, .accessfn = access_exlock_el3, .fieldoffset = offsetof(CPUARMState, elr_el[3]) }, { .name = "ESR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 2, .opc2 = 0, @@ -5980,7 +4469,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SPSR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL3_RW, + .access = PL3_RW, .accessfn = access_exlock_el3, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_MON]) }, { .name = "VBAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 0, @@ -6048,235 +4537,6 @@ static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri, return e2h_access(env, ri, isread); } -/* Test if system register redirection is to occur in the current state. */ -static bool redirect_for_e2h(CPUARMState *env) -{ - return arm_current_el(env) == 2 && (arm_hcr_el2_eff(env) & HCR_E2H); -} - -static uint64_t el2_e2h_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - CPReadFn *readfn; - - if (redirect_for_e2h(env)) { - /* Switch to the saved EL2 version of the register. */ - ri = ri->opaque; - readfn = ri->readfn; - } else { - readfn = ri->orig_readfn; - } - if (readfn == NULL) { - readfn = raw_read; - } - return readfn(env, ri); -} - -static void el2_e2h_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPWriteFn *writefn; - - if (redirect_for_e2h(env)) { - /* Switch to the saved EL2 version of the register. */ - ri = ri->opaque; - writefn = ri->writefn; - } else { - writefn = ri->orig_writefn; - } - if (writefn == NULL) { - writefn = raw_write; - } - writefn(env, ri, value); -} - -static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ - return ri->orig_readfn(env, ri->opaque); -} - -static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ - return ri->orig_writefn(env, ri->opaque, value); -} - -static CPAccessResult el2_e2h_e12_access(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1) { - /* - * This must be a FEAT_NV access (will either trap or redirect - * to memory). None of the registers with _EL12 aliases want to - * apply their trap controls for this kind of access, so don't - * call the orig_accessfn or do the "UNDEF when E2H is 0" check. - */ - return CP_ACCESS_OK; - } - /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */ - if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { - return CP_ACCESS_UNDEFINED; - } - if (ri->orig_accessfn) { - return ri->orig_accessfn(env, ri->opaque, isread); - } - return CP_ACCESS_OK; -} - -static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) -{ - struct E2HAlias { - uint32_t src_key, dst_key, new_key; - const char *src_name, *dst_name, *new_name; - bool (*feature)(const ARMISARegisters *id); - }; - -#define K(op0, op1, crn, crm, op2) \ - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) - - static const struct E2HAlias aliases[] = { - { K(3, 0, 1, 0, 0), K(3, 4, 1, 0, 0), K(3, 5, 1, 0, 0), - "SCTLR", "SCTLR_EL2", "SCTLR_EL12" }, - { K(3, 0, 1, 0, 2), K(3, 4, 1, 1, 2), K(3, 5, 1, 0, 2), - "CPACR", "CPTR_EL2", "CPACR_EL12" }, - { K(3, 0, 2, 0, 0), K(3, 4, 2, 0, 0), K(3, 5, 2, 0, 0), - "TTBR0_EL1", "TTBR0_EL2", "TTBR0_EL12" }, - { K(3, 0, 2, 0, 1), K(3, 4, 2, 0, 1), K(3, 5, 2, 0, 1), - "TTBR1_EL1", "TTBR1_EL2", "TTBR1_EL12" }, - { K(3, 0, 2, 0, 2), K(3, 4, 2, 0, 2), K(3, 5, 2, 0, 2), - "TCR_EL1", "TCR_EL2", "TCR_EL12" }, - { K(3, 0, 4, 0, 0), K(3, 4, 4, 0, 0), K(3, 5, 4, 0, 0), - "SPSR_EL1", "SPSR_EL2", "SPSR_EL12" }, - { K(3, 0, 4, 0, 1), K(3, 4, 4, 0, 1), K(3, 5, 4, 0, 1), - "ELR_EL1", "ELR_EL2", "ELR_EL12" }, - { K(3, 0, 5, 1, 0), K(3, 4, 5, 1, 0), K(3, 5, 5, 1, 0), - "AFSR0_EL1", "AFSR0_EL2", "AFSR0_EL12" }, - { K(3, 0, 5, 1, 1), K(3, 4, 5, 1, 1), K(3, 5, 5, 1, 1), - "AFSR1_EL1", "AFSR1_EL2", "AFSR1_EL12" }, - { K(3, 0, 5, 2, 0), K(3, 4, 5, 2, 0), K(3, 5, 5, 2, 0), - "ESR_EL1", "ESR_EL2", "ESR_EL12" }, - { K(3, 0, 6, 0, 0), K(3, 4, 6, 0, 0), K(3, 5, 6, 0, 0), - "FAR_EL1", "FAR_EL2", "FAR_EL12" }, - { K(3, 0, 10, 2, 0), K(3, 4, 10, 2, 0), K(3, 5, 10, 2, 0), - "MAIR_EL1", "MAIR_EL2", "MAIR_EL12" }, - { K(3, 0, 10, 3, 0), K(3, 4, 10, 3, 0), K(3, 5, 10, 3, 0), - "AMAIR0", "AMAIR_EL2", "AMAIR_EL12" }, - { K(3, 0, 12, 0, 0), K(3, 4, 12, 0, 0), K(3, 5, 12, 0, 0), - "VBAR", "VBAR_EL2", "VBAR_EL12" }, - { K(3, 0, 13, 0, 1), K(3, 4, 13, 0, 1), K(3, 5, 13, 0, 1), - "CONTEXTIDR_EL1", "CONTEXTIDR_EL2", "CONTEXTIDR_EL12" }, - { K(3, 0, 14, 1, 0), K(3, 4, 14, 1, 0), K(3, 5, 14, 1, 0), - "CNTKCTL", "CNTHCTL_EL2", "CNTKCTL_EL12" }, - - /* - * Note that redirection of ZCR is mentioned in the description - * of ZCR_EL2, and aliasing in the description of ZCR_EL1, but - * not in the summary table. - */ - { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), - "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, - { K(3, 0, 1, 2, 6), K(3, 4, 1, 2, 6), K(3, 5, 1, 2, 6), - "SMCR_EL1", "SMCR_EL2", "SMCR_EL12", isar_feature_aa64_sme }, - - { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), - "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, - - { K(3, 0, 13, 0, 7), K(3, 4, 13, 0, 7), K(3, 5, 13, 0, 7), - "SCXTNUM_EL1", "SCXTNUM_EL2", "SCXTNUM_EL12", - isar_feature_aa64_scxtnum }, - - /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ - /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ - }; -#undef K - - size_t i; - - for (i = 0; i < ARRAY_SIZE(aliases); i++) { - const struct E2HAlias *a = &aliases[i]; - ARMCPRegInfo *src_reg, *dst_reg, *new_reg; - bool ok; - - if (a->feature && !a->feature(&cpu->isar)) { - continue; - } - - src_reg = g_hash_table_lookup(cpu->cp_regs, - (gpointer)(uintptr_t)a->src_key); - dst_reg = g_hash_table_lookup(cpu->cp_regs, - (gpointer)(uintptr_t)a->dst_key); - g_assert(src_reg != NULL); - g_assert(dst_reg != NULL); - - /* Cross-compare names to detect typos in the keys. */ - g_assert(strcmp(src_reg->name, a->src_name) == 0); - g_assert(strcmp(dst_reg->name, a->dst_name) == 0); - - /* None of the core system registers use opaque; we will. */ - g_assert(src_reg->opaque == NULL); - - /* Create alias before redirection so we dup the right data. */ - new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo)); - - new_reg->name = a->new_name; - new_reg->type |= ARM_CP_ALIAS; - /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ - new_reg->access &= PL2_RW | PL3_RW; - /* The new_reg op fields are as per new_key, not the target reg */ - new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK) - >> CP_REG_ARM64_SYSREG_CRN_SHIFT; - new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK) - >> CP_REG_ARM64_SYSREG_CRM_SHIFT; - new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK) - >> CP_REG_ARM64_SYSREG_OP0_SHIFT; - new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK) - >> CP_REG_ARM64_SYSREG_OP1_SHIFT; - new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK) - >> CP_REG_ARM64_SYSREG_OP2_SHIFT; - new_reg->opaque = src_reg; - new_reg->orig_readfn = src_reg->readfn ?: raw_read; - new_reg->orig_writefn = src_reg->writefn ?: raw_write; - new_reg->orig_accessfn = src_reg->accessfn; - if (!new_reg->raw_readfn) { - new_reg->raw_readfn = raw_read; - } - if (!new_reg->raw_writefn) { - new_reg->raw_writefn = raw_write; - } - new_reg->readfn = el2_e2h_e12_read; - new_reg->writefn = el2_e2h_e12_write; - new_reg->accessfn = el2_e2h_e12_access; - - /* - * If the _EL1 register is redirected to memory by FEAT_NV2, - * then it shares the offset with the _EL12 register, - * and which one is redirected depends on HCR_EL2.NV1. - */ - if (new_reg->nv2_redirect_offset) { - assert(new_reg->nv2_redirect_offset & NV2_REDIR_NV1); - new_reg->nv2_redirect_offset &= ~NV2_REDIR_NV1; - new_reg->nv2_redirect_offset |= NV2_REDIR_NO_NV1; - } - - ok = g_hash_table_insert(cpu->cp_regs, - (gpointer)(uintptr_t)a->new_key, new_reg); - g_assert(ok); - - src_reg->opaque = dst_reg; - src_reg->orig_readfn = src_reg->readfn ?: raw_read; - src_reg->orig_writefn = src_reg->writefn ?: raw_write; - if (!src_reg->raw_readfn) { - src_reg->raw_readfn = raw_read; - } - if (!src_reg->raw_writefn) { - src_reg->raw_writefn = raw_write; - } - src_reg->readfn = el2_e2h_read; - src_reg->writefn = el2_e2h_write; - } -} #endif static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -6569,6 +4829,8 @@ static const ARMCPRegInfo zcr_reginfo[] = { { .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0, .nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 0), .access = PL1_RW, .type = ARM_CP_SVE, .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]), .writefn = zcr_write, .raw_writefn = raw_write }, @@ -6584,7 +4846,6 @@ static const ARMCPRegInfo zcr_reginfo[] = { .writefn = zcr_write, .raw_writefn = raw_write }, }; -#ifdef TARGET_AARCH64 static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -6659,7 +4920,7 @@ void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask) * when disabled either. */ if (change & new & R_SVCR_ZA_MASK) { - memset(env->zarray, 0, sizeof(env->zarray)); + memset(&env->za_state, 0, sizeof(env->za_state)); } if (tcg_enabled()) { @@ -6678,10 +4939,14 @@ static void smcr_write(CPUARMState *env, const ARMCPRegInfo *ri, { int cur_el = arm_current_el(env); int old_len = sve_vqm1_for_el(env, cur_el); + uint64_t valid_mask = R_SMCR_LEN_MASK | R_SMCR_FA64_MASK; int new_len; QEMU_BUILD_BUG_ON(ARM_MAX_VQ > R_SMCR_LEN_MASK + 1); - value &= R_SMCR_LEN_MASK | R_SMCR_FA64_MASK; + if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) { + valid_mask |= R_SMCR_EZT0_MASK; + } + value &= valid_mask; raw_write(env, ri, value); /* @@ -6711,6 +4976,8 @@ static const ARMCPRegInfo sme_reginfo[] = { { .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6, .nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 6), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 6), .access = PL1_RW, .type = ARM_CP_SME, .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]), .writefn = smcr_write, .raw_writefn = raw_write }, @@ -6757,6 +5024,11 @@ static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri, R_GPCCR_ORGN_MASK | R_GPCCR_SH_MASK | R_GPCCR_PGS_MASK | R_GPCCR_GPC_MASK | R_GPCCR_GPCP_MASK; + if (cpu_isar_feature(aa64_rme_gpc2, env_archcpu(env))) { + rw_mask |= R_GPCCR_APPSAA_MASK | R_GPCCR_NSO_MASK | + R_GPCCR_SPAD_MASK | R_GPCCR_NSPAD_MASK | R_GPCCR_RLPAD_MASK; + } + env->cp15.gpccr_el3 = (value & rw_mask) | (env->cp15.gpccr_el3 & ~rw_mask); } @@ -6818,107 +5090,97 @@ static const ARMCPRegInfo nmi_reginfo[] = { .writefn = aa64_allint_write, .readfn = aa64_allint_read, .resetfn = arm_cp_reset_ignore }, }; -#endif /* TARGET_AARCH64 */ -static void define_pmu_regs(ARMCPU *cpu) +static CPAccessResult mecid_access(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) { - /* - * v7 performance monitor control register: same implementor - * field as main ID register, and we implement four counters in - * addition to the cycle count register. - */ - unsigned int i, pmcrn = pmu_num_counters(&cpu->env); - ARMCPRegInfo pmcr = { - .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, - .fgt = FGT_PMCR_EL0, - .type = ARM_CP_IO | ARM_CP_ALIAS, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), - .accessfn = pmreg_access, - .readfn = pmcr_read, .raw_readfn = raw_read, - .writefn = pmcr_write, .raw_writefn = raw_write, - }; - ARMCPRegInfo pmcr64 = { - .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), - .resetvalue = cpu->isar.reset_pmcr_el0, - .readfn = pmcr_read, .raw_readfn = raw_read, - .writefn = pmcr_write, .raw_writefn = raw_write, - }; + int el = arm_current_el(env); - define_one_arm_cp_reg(cpu, &pmcr); - define_one_arm_cp_reg(cpu, &pmcr64); - for (i = 0; i < pmcrn; i++) { - char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); - char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); - char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); - char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); - ARMCPRegInfo pmev_regs[] = { - { .name = pmevcntr_name, .cp = 15, .crn = 14, - .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .fgt = FGT_PMEVCNTRN_EL0, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .accessfn = pmreg_access_xevcntr }, - { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr, - .type = ARM_CP_IO, - .fgt = FGT_PMEVCNTRN_EL0, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .raw_readfn = pmevcntr_rawread, - .raw_writefn = pmevcntr_rawwrite }, - { .name = pmevtyper_name, .cp = 15, .crn = 14, - .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .fgt = FGT_PMEVTYPERN_EL0, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .accessfn = pmreg_access }, - { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .type = ARM_CP_IO, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .raw_writefn = pmevtyper_rawwrite }, - }; - define_arm_cp_regs(cpu, pmev_regs); - g_free(pmevcntr_name); - g_free(pmevcntr_el0_name); - g_free(pmevtyper_name); - g_free(pmevtyper_el0_name); - } - if (cpu_isar_feature(aa32_pmuv3p1, cpu)) { - ARMCPRegInfo v81_pmu_regs[] = { - { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid0, 32, 32) }, - { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid1, 32, 32) }, - }; - define_arm_cp_regs(cpu, v81_pmu_regs); - } - if (cpu_isar_feature(any_pmuv3p4, cpu)) { - static const ARMCPRegInfo v84_pmmir = { - .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, - .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMMIR_EL1, - .resetvalue = 0 - }; - define_one_arm_cp_reg(cpu, &v84_pmmir); + if (el == 2) { + if (arm_security_space(env) != ARMSS_Realm) { + return CP_ACCESS_UNDEFINED; + } + + if (!(env->cp15.scr_el3 & SCR_MECEN)) { + return CP_ACCESS_TRAP_EL3; + } + } + + return CP_ACCESS_OK; +} + +static void mecid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value = extract64(value, 0, MECID_WIDTH); + raw_write(env, ri, value); +} + +static CPAccessResult cipae_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + switch (arm_security_space(env)) { + case ARMSS_Root: /* EL3 */ + case ARMSS_Realm: /* Realm EL2 */ + return CP_ACCESS_OK; + default: + return CP_ACCESS_UNDEFINED; } } +static const ARMCPRegInfo mec_reginfo[] = { + { .name = "MECIDR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 7, .crn = 10, .crm = 8, + .access = PL2_R, .type = ARM_CP_CONST | ARM_CP_NV_NO_TRAP, + .resetvalue = MECID_WIDTH - 1 }, + { .name = "MECID_P0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 0, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_p0_el2) }, + { .name = "MECID_A0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 1, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_a0_el2) }, + { .name = "MECID_P1_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 2, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_p1_el2) }, + { .name = "MECID_A1_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_a1_el2) }, + { .name = "MECID_RL_A_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 1, .crn = 10, .crm = 10, + .access = PL3_RW, .accessfn = mecid_access, + .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_rl_a_el3) }, + { .name = "VMECID_P_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 0, .crn = 10, .crm = 9, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.vmecid_p_el2) }, + { .name = "VMECID_A_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 1, .crn = 10, .crm = 9, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.vmecid_a_el2) }, + { .name = "DC_CIPAE", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 14, .opc2 = 0, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_NV_NO_TRAP, + .accessfn = cipae_access }, +}; + +static const ARMCPRegInfo mec_mte_reginfo[] = { + { .name = "DC_CIGDPAE", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 14, .opc2 = 7, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_NV_NO_TRAP, + .accessfn = cipae_access }, +}; + #ifndef CONFIG_USER_ONLY /* * We don't know until after realize whether there's a GICv3 @@ -6929,10 +5191,10 @@ static void define_pmu_regs(ARMCPU *cpu) static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr1 = cpu->isar.id_pfr1; + uint64_t pfr1 = GET_IDREG(&cpu->isar, ID_PFR1); if (env->gicv3state) { - pfr1 |= 1 << 28; + pfr1 = FIELD_DP64(pfr1, ID_PFR1, GIC, 1); } return pfr1; } @@ -6940,10 +5202,10 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr0 = cpu->isar.id_aa64pfr0; + uint64_t pfr0 = GET_IDREG(&cpu->isar, ID_AA64PFR0); if (env->gicv3state) { - pfr0 |= 1 << 24; + pfr0 = FIELD_DP64(pfr0, ID_AA64PFR0, GIC, 1); } return pfr0; } @@ -7010,7 +5272,6 @@ static const ARMCPRegInfo lor_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, }; -#ifdef TARGET_AARCH64 static CPAccessResult access_pauth(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7120,7 +5381,7 @@ static const ARMCPRegInfo rndr_reginfo[] = { .access = PL0_R, .readfn = rndr_readfn }, }; -static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, +static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { #ifdef CONFIG_TCG @@ -7257,6 +5518,8 @@ static const ARMCPRegInfo mte_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, .access = PL1_RW, .accessfn = access_tfsr_el1, .nv2_redirect_offset = 0x190 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 6, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 6, 0), .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_NV2_REDIRECT, @@ -7432,6 +5695,8 @@ static const ARMCPRegInfo scxtnum_reginfo[] = { .access = PL1_RW, .accessfn = access_scxtnum_el1, .fgt = FGT_SCXTNUM_EL1, .nv2_redirect_offset = 0x188 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 7), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 7), .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) }, { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7, @@ -7503,8 +5768,6 @@ static const ARMCPRegInfo nv2_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) }, }; -#endif /* TARGET_AARCH64 */ - static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7700,32 +5963,6 @@ static const ARMCPRegInfo vhe_reginfo[] = { #endif }; -#ifndef CONFIG_USER_ONLY -static const ARMCPRegInfo ats1e1_reginfo[] = { - { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1RP, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1WP, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, -}; - -static const ARMCPRegInfo ats1cp_reginfo[] = { - { .name = "ATS1CPRP", - .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write }, - { .name = "ATS1CPWP", - .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write }, -}; -#endif - /* * ACTLR2 and HACTLR2 map to ACTLR_EL1[63:32] and * ACTLR_EL2[63:32]. They exist only if the ID_MMFR4.AC2 field @@ -7746,10 +5983,218 @@ static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = { .resetvalue = 0 }, }; +static CPAccessResult sctlr2_el2_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_SCTLR2EN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult sctlr2_el1_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret != CP_ACCESS_OK) { + return ret; + } + if (arm_current_el(env) < 2 && !(arm_hcrx_el2_eff(env) & HCRX_SCTLR2EN)) { + return CP_ACCESS_TRAP_EL2; + } + return sctlr2_el2_access(env, ri, isread); +} + +static void sctlr2_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + value &= valid_mask; + raw_write(env, ri, value); +} + +static void sctlr2_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= SCTLR2_EMEC; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static void sctlr2_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= SCTLR2_EMEC; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static const ARMCPRegInfo sctlr2_reginfo[] = { + { .name = "SCTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL1_RW, .accessfn = sctlr2_el1_access, + .writefn = sctlr2_el1_write, .fgt = FGT_SCTLR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 3), + .nv2_redirect_offset = 0x278 | NV2_REDIR_NV1, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[1]) }, + { .name = "SCTLR2_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL2_RW, .accessfn = sctlr2_el2_access, + .writefn = sctlr2_el2_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[2]) }, + { .name = "SCTLR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL3_RW, .writefn = sctlr2_el3_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[3]) }, +}; + +static CPAccessResult tcr2_el2_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_TCR2EN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult tcr2_el1_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret != CP_ACCESS_OK) { + return ret; + } + if (arm_current_el(env) < 2 && !(arm_hcrx_el2_eff(env) & HCRX_TCR2EN)) { + return CP_ACCESS_TRAP_EL2; + } + return tcr2_el2_access(env, ri, isread); +} + +static void tcr2_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + valid_mask |= TCR2_PIE; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static void tcr2_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + valid_mask |= TCR2_PIE; + } + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= TCR2_AMEC0 | TCR2_AMEC1; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static const ARMCPRegInfo tcr2_reginfo[] = { + { .name = "TCR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 2, .crm = 0, + .access = PL1_RW, .accessfn = tcr2_el1_access, + .writefn = tcr2_el1_write, .fgt = FGT_TCR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 3), + .nv2_redirect_offset = 0x270 | NV2_REDIR_NV1, + .fieldoffset = offsetof(CPUARMState, cp15.tcr2_el[1]) }, + { .name = "TCR2_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 2, .crm = 0, + .access = PL2_RW, .accessfn = tcr2_el2_access, + .writefn = tcr2_el2_write, + .fieldoffset = offsetof(CPUARMState, cp15.tcr2_el[2]) }, +}; + +static CPAccessResult pien_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_PIEN) + && arm_current_el(env) < 3) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult pien_el1_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret == CP_ACCESS_OK) { + ret = pien_access(env, ri, isread); + } + return ret; +} + +static const ARMCPRegInfo s1pie_reginfo[] = { + { .name = "PIR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL1_RW, .accessfn = pien_el1_access, + .fgt = FGT_NPIR_EL1, .nv2_redirect_offset = 0x2a0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 3), + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[1]) }, + { .name = "PIR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[2]) }, + { .name = "PIR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[3]) }, + { .name = "PIRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 10, .crm = 2, + .access = PL1_RW, .accessfn = pien_el1_access, + .fgt = FGT_NPIRE0_EL1, .nv2_redirect_offset = 0x290 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 2), + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[0]) }, + { .name = "PIRE0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 2, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .fieldoffset = offsetof(CPUARMState, cp15.pire0_el2) }, +}; + +static const ARMCPRegInfo s2pie_reginfo[] = { + { .name = "S2PIR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 5, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .nv2_redirect_offset = 0x2b0, + .fieldoffset = offsetof(CPUARMState, cp15.s2pir_el2) }, +}; + void register_cp_regs_for_features(ARMCPU *cpu) { /* Register all the coprocessor registers based on feature bits */ CPUARMState *env = &cpu->env; + ARMISARegisters *isar = &cpu->isar; + if (arm_feature(env, ARM_FEATURE_M)) { /* M profile has no coprocessor registers */ return; @@ -7764,7 +6209,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, not_v8_cp_reginfo); } - define_tlb_insn_regs(cpu); +#ifndef CONFIG_USER_ONLY + if (tcg_enabled()) { + define_tlb_insn_regs(cpu); + define_at_insn_regs(cpu); + } +#endif if (arm_feature(env, ARM_FEATURE_V6)) { /* The ID registers all have impdef reset values */ @@ -7773,7 +6223,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_pfr0 }, + .resetvalue = GET_IDREG(isar, ID_PFR0)}, /* * ID_PFR1 is not a plain ARM_CP_CONST because we don't know * the value of the GIC field until after we define these regs. @@ -7784,7 +6234,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .accessfn = access_aa32_tid3, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_pfr1, + .resetvalue = GET_IDREG(isar, ID_PFR1), #else .type = ARM_CP_NO_RAW, .accessfn = access_aa32_tid3, @@ -7796,72 +6246,72 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_dfr0 }, + .resetvalue = GET_IDREG(isar, ID_DFR0)}, { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_afr0 }, + .resetvalue = GET_IDREG(isar, ID_AFR0)}, { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_MMFR0)}, { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_MMFR1)}, { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_MMFR2)}, { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_MMFR3)}, { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar0 }, + .resetvalue = GET_IDREG(isar, ID_ISAR0)}, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar1 }, + .resetvalue = GET_IDREG(isar, ID_ISAR1)}, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar2 }, + .resetvalue = GET_IDREG(isar, ID_ISAR2)}, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar3 }, + .resetvalue = GET_IDREG(isar, ID_ISAR3) }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar4 }, + .resetvalue = GET_IDREG(isar, ID_ISAR4) }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar5 }, + .resetvalue = GET_IDREG(isar, ID_ISAR5) }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr4 }, + .resetvalue = GET_IDREG(isar, ID_MMFR4)}, { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar6 }, + .resetvalue = GET_IDREG(isar, ID_ISAR6) }, }; define_arm_cp_regs(cpu, v6_idregs); define_arm_cp_regs(cpu, v6_cp_reginfo); @@ -7871,9 +6321,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_V6K)) { define_arm_cp_regs(cpu, v6k_cp_reginfo); } - if (arm_feature(env, ARM_FEATURE_V7VE)) { - define_arm_cp_regs(cpu, pmovsset_cp_reginfo); - } if (arm_feature(env, ARM_FEATURE_V7)) { ARMCPRegInfo clidr = { .name = "CLIDR", .state = ARM_CP_STATE_BOTH, @@ -7881,12 +6328,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_tid4, .fgt = FGT_CLIDR_EL1, - .resetvalue = cpu->clidr + .resetvalue = GET_IDREG(isar, CLIDR) }; define_one_arm_cp_reg(cpu, &clidr); define_arm_cp_regs(cpu, v7_cp_reginfo); define_debug_regs(cpu); - define_pmu_regs(cpu); } else { define_arm_cp_regs(cpu, not_v7_cp_reginfo); } @@ -7912,7 +6358,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_aa64pfr0 + .resetvalue = GET_IDREG(isar, ID_AA64PFR0) #else .type = ARM_CP_NO_RAW, .accessfn = access_aa64_tid3, @@ -7924,12 +6370,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64pfr1}, - { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .resetvalue = GET_IDREG(isar, ID_AA64PFR1)}, + { .name = "ID_AA64PFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = GET_IDREG(isar, ID_AA64PFR2)}, { .name = "ID_AA64PFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -7939,12 +6385,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64zfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ZFR0)}, { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64smfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64SMFR0)}, { .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -7959,12 +6405,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR0) }, { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR1) }, { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -7979,12 +6425,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64afr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64AFR0) }, { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64afr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64AFR1) }, { .name = "ID_AA64AFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -7999,17 +6445,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR0)}, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar1 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR1)}, { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar2 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR2)}, { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8039,22 +6485,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR0)}, { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR1) }, { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR2) }, { .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR3) }, { .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, @@ -8126,42 +6572,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_pfr2 }, + .resetvalue = GET_IDREG(isar, ID_PFR2)}, { .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_dfr1 }, + .resetvalue = GET_IDREG(isar, ID_DFR1)}, { .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_mmfr5 }, + .resetvalue = GET_IDREG(isar, ID_MMFR5)}, { .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = 0 }, - { .name = "PMCEID0", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid0, 0, 32) }, - { .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = cpu->pmceid0 }, - { .name = "PMCEID1", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid1, 0, 32) }, - { .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = cpu->pmceid1 }, }; #ifdef CONFIG_USER_ONLY static const ARMCPRegUserSpaceInfo v8_user_idregs[] = { @@ -8177,6 +6603,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) R_ID_AA64PFR1_SSBS_MASK | R_ID_AA64PFR1_MTE_MASK | R_ID_AA64PFR1_SME_MASK }, + { .name = "ID_AA64PFR2_EL1", + .exported_bits = 0 }, { .name = "ID_AA64PFR*_EL1_RESERVED", .is_glob = true }, { .name = "ID_AA64ZFR0_EL1", @@ -8496,12 +6924,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s), offsetoflow32(CPUARMState, cp15.par_ns) }, .writefn = par_write}, -#ifndef CONFIG_USER_ONLY - /* This underdecoding is safe because the reginfo is NO_RAW. */ - { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, - .access = PL1_W, .accessfn = ats_access, - .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, -#endif }; /* @@ -8528,9 +6950,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_STRONGARM)) { define_arm_cp_regs(cpu, strongarm_cp_reginfo); } - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - define_arm_cp_regs(cpu, xscale_cp_reginfo); - } if (arm_feature(env, ARM_FEATURE_DUMMY_C15_REGS)) { define_arm_cp_regs(cpu, dummy_c15_cp_reginfo); } @@ -8853,12 +7272,14 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_VBAR)) { static const ARMCPRegInfo vbar_cp_reginfo[] = { - { .name = "VBAR", .state = ARM_CP_STATE_BOTH, + { .name = "VBAR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .writefn = vbar_write, .accessfn = access_nv1, .fgt = FGT_VBAR_EL1, .nv2_redirect_offset = 0x250 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 12, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 12, 0, 0), .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s), offsetof(CPUARMState, cp15.vbar_ns) }, .resetvalue = 0 }, @@ -8869,24 +7290,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) /* Generic registers whose values depend on the implementation */ { ARMCPRegInfo sctlr = { - .name = "SCTLR", .state = ARM_CP_STATE_BOTH, + .name = "SCTLR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_SCTLR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 0), .nv2_redirect_offset = 0x110 | NV2_REDIR_NV1, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s), offsetof(CPUARMState, cp15.sctlr_ns) }, .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr, .raw_writefn = raw_write, }; - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - /* - * Normally we would always end the TB on an SCTLR write, but Linux - * arch/arm/mach-pxa/sleep.S expects two instructions following - * an MMU enable to execute from cache. Imitate this behaviour. - */ - sctlr.type |= ARM_CP_SUPPRESS_TB_END; - } define_one_arm_cp_reg(cpu, &sctlr); if (arm_feature(env, ARM_FEATURE_PMSA) && @@ -8907,14 +7322,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_pan, cpu)) { define_one_arm_cp_reg(cpu, &pan_reginfo); } -#ifndef CONFIG_USER_ONLY - if (cpu_isar_feature(aa64_ats1e1, cpu)) { - define_arm_cp_regs(cpu, ats1e1_reginfo); - } - if (cpu_isar_feature(aa32_ats1e1, cpu)) { - define_arm_cp_regs(cpu, ats1cp_reginfo); - } -#endif if (cpu_isar_feature(aa64_uao, cpu)) { define_one_arm_cp_reg(cpu, &uao_reginfo); } @@ -8945,7 +7352,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo); } -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sme, cpu)) { define_arm_cp_regs(cpu, sme_reginfo); } @@ -9006,7 +7412,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_nmi, cpu)) { define_arm_cp_regs(cpu, nmi_reginfo); } -#endif + + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + define_arm_cp_regs(cpu, sctlr2_reginfo); + } + + if (cpu_isar_feature(aa64_tcr2, cpu)) { + define_arm_cp_regs(cpu, tcr2_reginfo); + } + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + define_arm_cp_regs(cpu, s1pie_reginfo); + } + if (cpu_isar_feature(aa64_s2pie, cpu)) { + define_arm_cp_regs(cpu, s2pie_reginfo); + } + if (cpu_isar_feature(aa64_mec, cpu)) { + define_arm_cp_regs(cpu, mec_reginfo); + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mec_mte_reginfo); + } + } if (cpu_isar_feature(any_predinv, cpu)) { define_arm_cp_regs(cpu, predinv_reginfo); @@ -9016,60 +7442,42 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, ccsidr2_reginfo); } -#ifndef CONFIG_USER_ONLY - /* - * Register redirections and aliases must be done last, - * after the registers from the other extensions have been defined. - */ - if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) { - define_arm_vh_e2h_redirects_aliases(cpu); + define_pm_cpregs(cpu); + define_gcs_cpregs(cpu); +} + +/* + * Copy a ARMCPRegInfo structure, allocating it along with the name + * and an optional suffix to the name. + */ +static ARMCPRegInfo *alloc_cpreg(const ARMCPRegInfo *in, const char *suffix) +{ + const char *name = in->name; + size_t name_len = strlen(name); + size_t suff_len = suffix ? strlen(suffix) : 0; + ARMCPRegInfo *out = g_malloc(sizeof(*in) + name_len + suff_len + 1); + char *p = (char *)(out + 1); + + *out = *in; + out->name = p; + + memcpy(p, name, name_len + 1); + if (suffix) { + memcpy(p + name_len, suffix, suff_len + 1); } -#endif + return out; } /* - * Private utility function for define_one_arm_cp_reg_with_opaque(): + * Private utility function for define_one_arm_cp_reg(): * add a single reginfo struct to the hash table. */ -static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, - void *opaque, CPState state, - CPSecureState secstate, - int crm, int opc1, int opc2, - const char *name) +static void add_cpreg_to_hashtable(ARMCPU *cpu, ARMCPRegInfo *r, + CPState state, CPSecureState secstate, + uint32_t key) { CPUARMState *env = &cpu->env; - uint32_t key; - ARMCPRegInfo *r2; - bool is64 = r->type & ARM_CP_64BIT; bool ns = secstate & ARM_CP_SECSTATE_NS; - int cp = r->cp; - size_t name_len; - bool make_const; - - switch (state) { - case ARM_CP_STATE_AA32: - /* We assume it is a cp15 register if the .cp field is left unset. */ - if (cp == 0 && r->state == ARM_CP_STATE_BOTH) { - cp = 15; - } - key = ENCODE_CP_REG(cp, is64, ns, r->crn, crm, opc1, opc2); - break; - case ARM_CP_STATE_AA64: - /* - * To allow abbreviation of ARMCPRegInfo definitions, we treat - * cp == 0 as equivalent to the value for "standard guest-visible - * sysreg". STATE_BOTH definitions are also always "standard sysreg" - * in their AArch64 view (the .cp value may be non-zero for the - * benefit of the AArch32 view). - */ - if (cp == 0 || r->state == ARM_CP_STATE_BOTH) { - cp = CP_REG_ARM64_SYSREG_CP; - } - key = ENCODE_AA64_CP_REG(cp, r->crn, crm, r->opc0, opc1, opc2); - break; - default: - g_assert_not_reached(); - } /* Overriding of an existing definition must be explicitly requested. */ if (!(r->type & ARM_CP_OVERRIDE)) { @@ -9079,84 +7487,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, } } - /* - * Eliminate registers that are not present because the EL is missing. - * Doing this here makes it easier to put all registers for a given - * feature into the same ARMCPRegInfo array and define them all at once. - */ - make_const = false; - if (arm_feature(env, ARM_FEATURE_EL3)) { - /* - * An EL2 register without EL2 but with EL3 is (usually) RES0. - * See rule RJFFP in section D1.1.3 of DDI0487H.a. - */ - int min_el = ctz32(r->access) / 2; - if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) { - if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) { - return; - } - make_const = !(r->type & ARM_CP_EL3_NO_EL2_KEEP); - } - } else { - CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2) - ? PL2_RW : PL1_RW); - if ((r->access & max_el) == 0) { - return; - } - } - - /* Combine cpreg and name into one allocation. */ - name_len = strlen(name) + 1; - r2 = g_malloc(sizeof(*r2) + name_len); - *r2 = *r; - r2->name = memcpy(r2 + 1, name, name_len); - - /* - * Update fields to match the instantiation, overwiting wildcards - * such as CP_ANY, ARM_CP_STATE_BOTH, or ARM_CP_SECSTATE_BOTH. - */ - r2->cp = cp; - r2->crm = crm; - r2->opc1 = opc1; - r2->opc2 = opc2; - r2->state = state; - r2->secure = secstate; - if (opaque) { - r2->opaque = opaque; - } - - if (make_const) { - /* This should not have been a very special register to begin. */ - int old_special = r2->type & ARM_CP_SPECIAL_MASK; - assert(old_special == 0 || old_special == ARM_CP_NOP); - /* - * Set the special function to CONST, retaining the other flags. - * This is important for e.g. ARM_CP_SVE so that we still - * take the SVE trap if CPTR_EL3.EZ == 0. - */ - r2->type = (r2->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST; - /* - * Usually, these registers become RES0, but there are a few - * special cases like VPIDR_EL2 which have a constant non-zero - * value with writes ignored. - */ - if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) { - r2->resetvalue = 0; - } - /* - * ARM_CP_CONST has precedence, so removing the callbacks and - * offsets are not strictly necessary, but it is potentially - * less confusing to debug later. - */ - r2->readfn = NULL; - r2->writefn = NULL; - r2->raw_readfn = NULL; - r2->raw_writefn = NULL; - r2->resetfn = NULL; - r2->fieldoffset = 0; - r2->bank_fieldoffsets[0] = 0; - r2->bank_fieldoffsets[1] = 0; - } else { + { bool isbanked = r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]; if (isbanked) { @@ -9165,7 +7496,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, * Overwriting fieldoffset as the array is only used to define * banked registers but later only fieldoffset is used. */ - r2->fieldoffset = r->bank_fieldoffsets[ns]; + r->fieldoffset = r->bank_fieldoffsets[ns]; } if (state == ARM_CP_STATE_AA32) { if (isbanked) { @@ -9182,54 +7513,187 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, */ if ((r->state == ARM_CP_STATE_BOTH && ns) || (arm_feature(env, ARM_FEATURE_V8) && !ns)) { - r2->type |= ARM_CP_ALIAS; + r->type |= ARM_CP_ALIAS; } } else if ((secstate != r->secure) && !ns) { /* * The register is not banked so we only want to allow * migration of the non-secure instance. */ - r2->type |= ARM_CP_ALIAS; - } - - if (HOST_BIG_ENDIAN && - r->state == ARM_CP_STATE_BOTH && r2->fieldoffset) { - r2->fieldoffset += sizeof(uint32_t); + r->type |= ARM_CP_ALIAS; } } } /* - * By convention, for wildcarded registers only the first - * entry is used for migration; the others are marked as - * ALIAS so we don't try to transfer the register - * multiple times. Special registers (ie NOP/WFI) are - * never migratable and not even raw-accessible. + * For 32-bit AArch32 regs shared with 64-bit AArch64 regs, + * adjust the field offset for endianness. This had to be + * delayed until banked registers were resolved. */ - if (r2->type & ARM_CP_SPECIAL_MASK) { - r2->type |= ARM_CP_NO_RAW; + if (HOST_BIG_ENDIAN && + state == ARM_CP_STATE_AA32 && + r->state == ARM_CP_STATE_BOTH && + r->fieldoffset) { + r->fieldoffset += sizeof(uint32_t); } - if (((r->crm == CP_ANY) && crm != 0) || - ((r->opc1 == CP_ANY) && opc1 != 0) || - ((r->opc2 == CP_ANY) && opc2 != 0)) { - r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB; + + /* + * Special registers (ie NOP/WFI) are never migratable and + * are not even raw-accessible. + */ + if (r->type & ARM_CP_SPECIAL_MASK) { + r->type |= ARM_CP_NO_RAW; } /* + * Update fields to match the instantiation, overwiting wildcards + * such as ARM_CP_STATE_BOTH or ARM_CP_SECSTATE_BOTH. + */ + r->state = state; + r->secure = secstate; + + /* * Check that raw accesses are either forbidden or handled. Note that * we can't assert this earlier because the setup of fieldoffset for * banked registers has to be done first. */ - if (!(r2->type & ARM_CP_NO_RAW)) { - assert(!raw_accessors_invalid(r2)); + if (!(r->type & ARM_CP_NO_RAW)) { + assert(!raw_accessors_invalid(r)); } - g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r2); + g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r); +} + +static void add_cpreg_to_hashtable_aa32(ARMCPU *cpu, ARMCPRegInfo *r) +{ + /* + * Under AArch32 CP registers can be common + * (same for secure and non-secure world) or banked. + */ + ARMCPRegInfo *r_s; + bool is64 = r->type & ARM_CP_64BIT; + uint32_t key = ENCODE_CP_REG(r->cp, is64, 0, r->crn, + r->crm, r->opc1, r->opc2); + + assert(!(r->type & ARM_CP_ADD_TLBI_NXS)); /* aa64 only */ + r->vhe_redir_to_el2 = 0; + r->vhe_redir_to_el01 = 0; + + switch (r->secure) { + case ARM_CP_SECSTATE_NS: + key |= CP_REG_AA32_NS_MASK; + /* fall through */ + case ARM_CP_SECSTATE_S: + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32, r->secure, key); + break; + case ARM_CP_SECSTATE_BOTH: + r_s = alloc_cpreg(r, "_S"); + add_cpreg_to_hashtable(cpu, r_s, ARM_CP_STATE_AA32, + ARM_CP_SECSTATE_S, key); + + key |= CP_REG_AA32_NS_MASK; + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32, + ARM_CP_SECSTATE_NS, key); + break; + default: + g_assert_not_reached(); + } } +static void add_cpreg_to_hashtable_aa64(ARMCPU *cpu, ARMCPRegInfo *r) +{ + uint32_t key = ENCODE_AA64_CP_REG(r->opc0, r->opc1, + r->crn, r->crm, r->opc2); + + if ((r->type & ARM_CP_ADD_TLBI_NXS) && + cpu_isar_feature(aa64_xs, cpu)) { + /* + * This is a TLBI insn which has an NXS variant. The + * NXS variant is at the same encoding except that + * crn is +1, and has the same behaviour except for + * fine-grained trapping. Add the NXS insn here and + * then fall through to add the normal register. + * add_cpreg_to_hashtable() copies the cpreg struct + * and name that it is passed, so it's OK to use + * a local struct here. + */ + ARMCPRegInfo *nxs_ri = alloc_cpreg(r, "NXS"); + uint32_t nxs_key; + + assert(nxs_ri->crn < 0xf); + nxs_ri->crn++; + /* Also increment the CRN field inside the key value */ + nxs_key = key + (1 << CP_REG_ARM64_SYSREG_CRN_SHIFT); + if (nxs_ri->fgt) { + nxs_ri->fgt |= R_FGT_NXS_MASK; + } + + add_cpreg_to_hashtable(cpu, nxs_ri, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, nxs_key); + } + + if (!r->vhe_redir_to_el01) { + assert(!r->vhe_redir_to_el2); + } else if (!arm_feature(&cpu->env, ARM_FEATURE_EL2) || + !cpu_isar_feature(aa64_vh, cpu)) { + r->vhe_redir_to_el2 = 0; + r->vhe_redir_to_el01 = 0; + } else { + /* Create the FOO_EL12 alias. */ + ARMCPRegInfo *r2 = alloc_cpreg(r, "2"); + uint32_t key2 = r->vhe_redir_to_el01; + + /* + * Clear EL1 redirection on the FOO_EL1 reg; + * Clear EL2 redirection on the FOO_EL12 reg; + * Install redirection from FOO_EL12 back to FOO_EL1. + */ + r->vhe_redir_to_el01 = 0; + r2->vhe_redir_to_el2 = 0; + r2->vhe_redir_to_el01 = key; -void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, - const ARMCPRegInfo *r, void *opaque) + r2->type |= ARM_CP_ALIAS | ARM_CP_NO_RAW; + /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ + r2->access &= PL2_RW | PL3_RW; + /* The new_reg op fields are as per new_key, not the target reg */ + r2->crn = (key2 & CP_REG_ARM64_SYSREG_CRN_MASK) + >> CP_REG_ARM64_SYSREG_CRN_SHIFT; + r2->crm = (key2 & CP_REG_ARM64_SYSREG_CRM_MASK) + >> CP_REG_ARM64_SYSREG_CRM_SHIFT; + r2->opc0 = (key2 & CP_REG_ARM64_SYSREG_OP0_MASK) + >> CP_REG_ARM64_SYSREG_OP0_SHIFT; + r2->opc1 = (key2 & CP_REG_ARM64_SYSREG_OP1_MASK) + >> CP_REG_ARM64_SYSREG_OP1_SHIFT; + r2->opc2 = (key2 & CP_REG_ARM64_SYSREG_OP2_MASK) + >> CP_REG_ARM64_SYSREG_OP2_SHIFT; + + /* Non-redirected access to this register will abort. */ + r2->readfn = NULL; + r2->writefn = NULL; + r2->raw_readfn = NULL; + r2->raw_writefn = NULL; + r2->accessfn = NULL; + r2->fieldoffset = 0; + + /* + * If the _EL1 register is redirected to memory by FEAT_NV2, + * then it shares the offset with the _EL12 register, + * and which one is redirected depends on HCR_EL2.NV1. + */ + if (r2->nv2_redirect_offset) { + assert(r2->nv2_redirect_offset & NV2_REDIR_NV1); + r2->nv2_redirect_offset &= ~NV2_REDIR_NV1; + r2->nv2_redirect_offset |= NV2_REDIR_NO_NV1; + } + add_cpreg_to_hashtable(cpu, r2, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, key2); + } + + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, key); +} + +void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *r) { /* * Define implementations of coprocessor registers. @@ -9255,21 +7719,27 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, * bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of * the register, if any. */ - int crm, opc1, opc2; int crmmin = (r->crm == CP_ANY) ? 0 : r->crm; int crmmax = (r->crm == CP_ANY) ? 15 : r->crm; int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1; int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1; int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2; int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2; - CPState state; + int cp = r->cp; + ARMCPRegInfo r_const; + CPUARMState *env = &cpu->env; - /* 64 bit registers have only CRm and Opc1 fields */ - assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn))); + /* + * AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless. + * Moreover, the encoding test just following in general prevents + * shared encoding so ARM_CP_STATE_BOTH won't work either. + */ + assert(r->state == ARM_CP_STATE_AA32 || !(r->type & ARM_CP_64BIT)); + /* AArch32 64-bit registers have only CRm and Opc1 fields. */ + assert(!(r->type & ARM_CP_64BIT) || !(r->opc2 || r->crn)); /* op0 only exists in the AArch64 encodings */ - assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0)); - /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */ - assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT)); + assert(r->state != ARM_CP_STATE_AA32 || r->opc0 == 0); + /* * This API is only for Arm's system coprocessors (14 and 15) or * (M-profile or v7A-and-earlier only) for implementation defined @@ -9280,21 +7750,25 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, */ switch (r->state) { case ARM_CP_STATE_BOTH: - /* 0 has a special meaning, but otherwise the same rules as AA32. */ - if (r->cp == 0) { + /* + * If the cp field is left unset, assume cp15. + * Otherwise apply the same rules as AA32. + */ + if (cp == 0) { + cp = 15; break; } /* fall through */ case ARM_CP_STATE_AA32: if (arm_feature(&cpu->env, ARM_FEATURE_V8) && !arm_feature(&cpu->env, ARM_FEATURE_M)) { - assert(r->cp >= 14 && r->cp <= 15); + assert(cp >= 14 && cp <= 15); } else { - assert(r->cp < 8 || (r->cp >= 14 && r->cp <= 15)); + assert(cp < 8 || (cp >= 14 && cp <= 15)); } break; case ARM_CP_STATE_AA64: - assert(r->cp == 0 || r->cp == CP_REG_ARM64_SYSREG_CP); + assert(cp == 0); break; default: g_assert_not_reached(); @@ -9359,75 +7833,104 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, } } - for (crm = crmmin; crm <= crmmax; crm++) { - for (opc1 = opc1min; opc1 <= opc1max; opc1++) { - for (opc2 = opc2min; opc2 <= opc2max; opc2++) { - for (state = ARM_CP_STATE_AA32; - state <= ARM_CP_STATE_AA64; state++) { - if (r->state != state && r->state != ARM_CP_STATE_BOTH) { - continue; - } - if ((r->type & ARM_CP_ADD_TLBI_NXS) && - cpu_isar_feature(aa64_xs, cpu)) { - /* - * This is a TLBI insn which has an NXS variant. The - * NXS variant is at the same encoding except that - * crn is +1, and has the same behaviour except for - * fine-grained trapping. Add the NXS insn here and - * then fall through to add the normal register. - * add_cpreg_to_hashtable() copies the cpreg struct - * and name that it is passed, so it's OK to use - * a local struct here. - */ - ARMCPRegInfo nxs_ri = *r; - g_autofree char *name = g_strdup_printf("%sNXS", r->name); - - assert(state == ARM_CP_STATE_AA64); - assert(nxs_ri.crn < 0xf); - nxs_ri.crn++; - if (nxs_ri.fgt) { - nxs_ri.fgt |= R_FGT_NXS_MASK; - } - add_cpreg_to_hashtable(cpu, &nxs_ri, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, name); - } - if (state == ARM_CP_STATE_AA32) { - /* - * Under AArch32 CP registers can be common - * (same for secure and non-secure world) or banked. - */ - char *name; - - switch (r->secure) { - case ARM_CP_SECSTATE_S: - case ARM_CP_SECSTATE_NS: - add_cpreg_to_hashtable(cpu, r, opaque, state, - r->secure, crm, opc1, opc2, - r->name); - break; - case ARM_CP_SECSTATE_BOTH: - name = g_strdup_printf("%s_S", r->name); - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_S, - crm, opc1, opc2, name); - g_free(name); - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, r->name); - break; - default: - g_assert_not_reached(); - } - } else { - /* - * AArch64 registers get mapped to non-secure instance - * of AArch32 - */ - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, r->name); - } + /* + * Eliminate registers that are not present because the EL is missing. + * Doing this here makes it easier to put all registers for a given + * feature into the same ARMCPRegInfo array and define them all at once. + */ + if (arm_feature(env, ARM_FEATURE_EL3)) { + /* + * An EL2 register without EL2 but with EL3 is (usually) RES0. + * See rule RJFFP in section D1.1.3 of DDI0487H.a. + */ + int min_el = ctz32(r->access) / 2; + if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) { + if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) { + return; + } + if (!(r->type & ARM_CP_EL3_NO_EL2_KEEP)) { + /* This should not have been a very special register. */ + int old_special = r->type & ARM_CP_SPECIAL_MASK; + assert(old_special == 0 || old_special == ARM_CP_NOP); + + r_const = *r; + + /* + * Set the special function to CONST, retaining the other flags. + * This is important for e.g. ARM_CP_SVE so that we still + * take the SVE trap if CPTR_EL3.EZ == 0. + */ + r_const.type = (r->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST; + /* + * Usually, these registers become RES0, but there are a few + * special cases like VPIDR_EL2 which have a constant non-zero + * value with writes ignored. + */ + if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) { + r_const.resetvalue = 0; + } + /* + * ARM_CP_CONST has precedence, so removing the callbacks and + * offsets are not strictly necessary, but it is potentially + * less confusing to debug later. + */ + r_const.readfn = NULL; + r_const.writefn = NULL; + r_const.raw_readfn = NULL; + r_const.raw_writefn = NULL; + r_const.resetfn = NULL; + r_const.fieldoffset = 0; + r_const.bank_fieldoffsets[0] = 0; + r_const.bank_fieldoffsets[1] = 0; + + r = &r_const; + } + } + } else { + CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2) + ? PL2_RW : PL1_RW); + if ((r->access & max_el) == 0) { + return; + } + } + + for (int crm = crmmin; crm <= crmmax; crm++) { + for (int opc1 = opc1min; opc1 <= opc1max; opc1++) { + for (int opc2 = opc2min; opc2 <= opc2max; opc2++) { + ARMCPRegInfo *r2 = alloc_cpreg(r, NULL); + ARMCPRegInfo *r3; + + /* + * By convention, for wildcarded registers only the first + * entry is used for migration; the others are marked as + * ALIAS so we don't try to transfer the register + * multiple times. + */ + if (crm != crmmin || opc1 != opc1min || opc2 != opc2min) { + r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB; + } + + /* Overwrite CP_ANY with the instantiation. */ + r2->crm = crm; + r2->opc1 = opc1; + r2->opc2 = opc2; + + switch (r->state) { + case ARM_CP_STATE_AA32: + add_cpreg_to_hashtable_aa32(cpu, r2); + break; + case ARM_CP_STATE_AA64: + add_cpreg_to_hashtable_aa64(cpu, r2); + break; + case ARM_CP_STATE_BOTH: + r3 = alloc_cpreg(r2, NULL); + r2->cp = cp; + add_cpreg_to_hashtable_aa32(cpu, r2); + r3->cp = 0; + add_cpreg_to_hashtable_aa64(cpu, r3); + break; + default: + g_assert_not_reached(); } } } @@ -9435,12 +7938,10 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, } /* Define a whole list of registers */ -void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, - void *opaque, size_t len) +void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len) { - size_t i; - for (i = 0; i < len; ++i) { - define_one_arm_cp_reg_with_opaque(cpu, regs + i, opaque); + for (size_t i = 0; i < len; ++i) { + define_one_arm_cp_reg(cpu, regs + i); } } @@ -9502,7 +8003,7 @@ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri) return 0; } -void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque) +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri) { /* Helper coprocessor reset function for do-nothing-on-reset registers */ } @@ -10569,7 +9070,7 @@ static int aarch64_regnum(CPUARMState *env, int aarch32_reg) } } -static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) +uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) { uint32_t ret = cpsr_read(env); @@ -10584,6 +9085,24 @@ static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) return ret; } +void cpsr_write_from_spsr_elx(CPUARMState *env, uint32_t val) +{ + uint32_t mask; + + /* Save SPSR_ELx.SS into PSTATE. */ + env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); + val &= ~PSTATE_SS; + + /* Move DIT to the correct location for CPSR */ + if (val & PSTATE_DIT) { + val &= ~PSTATE_DIT; + val |= CPSR_DIT; + } + + mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar); + cpsr_write(env, val, mask, CPSRWriteRaw); +} + static bool syndrome_is_sync_extabt(uint32_t syndrome) { /* Return true if this syndrome value is a synchronous external abort */ @@ -10615,9 +9134,9 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; unsigned int new_el = env->exception.target_el; - target_ulong addr = env->cp15.vbar_el[new_el]; - unsigned int new_mode = aarch64_pstate_mode(new_el, true); - unsigned int old_mode; + vaddr addr = env->cp15.vbar_el[new_el]; + uint64_t new_mode = aarch64_pstate_mode(new_el, true); + uint64_t old_mode; unsigned int cur_el = arm_current_el(env); int rt; @@ -10660,8 +9179,13 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } else { addr += 0x600; } - } else if (pstate_read(env) & PSTATE_SP) { - addr += 0x200; + } else { + if (pstate_read(env) & PSTATE_SP) { + addr += 0x200; + } + if (is_a64(env) && (env->cp15.gcscr_el[new_el] & GCSCR_EXLOCKEN)) { + new_mode |= PSTATE_EXLOCK; + } } switch (cs->exception_index) { @@ -10765,7 +9289,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) * If NV2 is disabled, change SPSR when NV,NV1 == 1,0 (I_ZJRNN) * If NV2 is enabled, change SPSR when NV is 1 (I_DBTLM) */ - old_mode = deposit32(old_mode, 2, 2, 2); + old_mode = deposit64(old_mode, 2, 2, 2); } } } else { @@ -10778,7 +9302,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } env->banked_spsr[aarch64_banked_spsr_index(new_el)] = old_mode; - qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%x\n", old_mode); + qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%" PRIx64 "\n", old_mode); qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n", env->elr_el[new_el]); @@ -10832,7 +9356,8 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) env->pc = addr; - qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 " PSTATE 0x%x\n", + qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 + " PSTATE 0x%" PRIx64 "\n", new_el, env->pc, pstate_read(env)); } @@ -10888,7 +9413,7 @@ void arm_cpu_do_interrupt(CPUState *cs) new_el); if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { - qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", + qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx64 "\n", syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -10930,7 +9455,7 @@ void arm_cpu_do_interrupt(CPUState *cs) arm_call_el_change_hook(cpu); if (!kvm_enabled()) { - cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + cpu_set_interrupt(cs, CPU_INTERRUPT_EXITTB); } } #endif /* !CONFIG_USER_ONLY */ @@ -11078,21 +9603,34 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, bool el1_is_aa32) { uint64_t tcr = regime_tcr(env, mmu_idx); - bool epd, hpd, tsz_oob, ds, ha, hd; + bool epd, hpd, tsz_oob, ds, ha, hd, pie = false; int select, tsz, tbi, max_tsz, min_tsz, ps, sh; ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); bool stage2 = regime_is_stage2(mmu_idx); + int r_el = regime_el(mmu_idx); if (!regime_has_2_ranges(mmu_idx)) { select = 0; tsz = extract32(tcr, 0, 6); gran = tg0_to_gran_size(extract32(tcr, 14, 2)); if (stage2) { - /* VTCR_EL2 */ - hpd = false; + /* + * Stage2 does not have hierarchical permissions. + * Thus disabling them makes things easier during ptw. + */ + hpd = true; + pie = extract64(tcr, 36, 1) && cpu_isar_feature(aa64_s2pie, cpu); } else { hpd = extract32(tcr, 24, 1); + if (r_el == 3) { + pie = (extract64(tcr, 35, 1) + && cpu_isar_feature(aa64_s1pie, cpu)); + } else { + pie = ((env->cp15.tcr2_el[2] & TCR2_PIE) + && (!arm_feature(env, ARM_FEATURE_EL3) + || (env->cp15.scr_el3 & SCR_TCR2EN))); + } } epd = false; sh = extract32(tcr, 12, 2); @@ -11129,10 +9667,16 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ds = extract64(tcr, 59, 1); if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && - regime_is_user(env, mmu_idx)) { + regime_is_user(mmu_idx)) { epd = true; } + + pie = ((env->cp15.tcr2_el[r_el] & TCR2_PIE) + && (!arm_feature(env, ARM_FEATURE_EL3) + || (env->cp15.scr_el3 & SCR_TCR2EN)) + && (r_el == 2 || (arm_hcrx_el2_eff(env) & HCRX_TCR2EN))); } + hpd |= pie; gran = sanitize_gran_size(cpu, gran, stage2); @@ -11211,6 +9755,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, .ha = ha, .hd = ha && hd, .gran = gran, + .pie = pie, }; } @@ -11325,33 +9870,6 @@ int fp_exception_el(CPUARMState *env, int cur_el) return 0; } -/* Return the exception level we're running at if this is our mmu_idx */ -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) -{ - if (mmu_idx & ARM_MMU_IDX_M) { - return mmu_idx & ARM_MMU_IDX_M_PRIV; - } - - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E30_0: - return 0; - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - return 1; - case ARMMMUIdx_E2: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - return 2; - case ARMMMUIdx_E3: - case ARMMMUIdx_E30_3_PAN: - return 3; - default: - g_assert_not_reached(); - } -} - #ifndef CONFIG_TCG ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) { @@ -11417,116 +9935,6 @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env) return arm_mmu_idx_el(env, arm_current_el(env)); } -static bool mve_no_pred(CPUARMState *env) -{ - /* - * Return true if there is definitely no predication of MVE - * instructions by VPR or LTPSIZE. (Returning false even if there - * isn't any predication is OK; generated code will just be - * a little worse.) - * If the CPU does not implement MVE then this TB flag is always 0. - * - * NOTE: if you change this logic, the "recalculate s->mve_no_pred" - * logic in gen_update_fp_context() needs to be updated to match. - * - * We do not include the effect of the ECI bits here -- they are - * tracked in other TB flags. This simplifies the logic for - * "when did we emit code that changes the MVE_NO_PRED TB flag - * and thus need to end the TB?". - */ - if (cpu_isar_feature(aa32_mve, env_archcpu(env))) { - return false; - } - if (env->v7m.vpr) { - return false; - } - if (env->v7m.ltpsize < 4) { - return false; - } - return true; -} - -void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - CPUARMTBFlags flags; - - assert_hflags_rebuild_correctly(env); - flags = env->hflags; - - if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { - *pc = env->pc; - if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { - DP_TBFLAG_A64(flags, BTYPE, env->btype); - } - } else { - *pc = env->regs[15]; - - if (arm_feature(env, ARM_FEATURE_M)) { - if (arm_feature(env, ARM_FEATURE_M_SECURITY) && - FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) - != env->v7m.secure) { - DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); - } - - if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && - (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) || - (env->v7m.secure && - !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) { - /* - * ASPEN is set, but FPCA/SFPA indicate that there is no - * active FP context; we must create a new FP context before - * executing any FP insn. - */ - DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); - } - - bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; - if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { - DP_TBFLAG_M32(flags, LSPACT, 1); - } - - if (mve_no_pred(env)) { - DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); - } - } else { - /* - * Note that XSCALE_CPAR shares bits with VECSTRIDE. - * Note that VECLEN+VECSTRIDE are RES0 for M-profile. - */ - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); - } else { - DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); - DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); - } - if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { - DP_TBFLAG_A32(flags, VFPEN, 1); - } - } - - DP_TBFLAG_AM32(flags, THUMB, env->thumb); - DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); - } - - /* - * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine - * states defined in the ARM ARM for software singlestep: - * SS_ACTIVE PSTATE.SS State - * 0 x Inactive (the TB flag for SS is always 0) - * 1 0 Active-pending - * 1 1 Active-not-pending - * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. - */ - if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { - DP_TBFLAG_ANY(flags, PSTATE__SS, 1); - } - - *pflags = flags.flags; - *cs_base = flags.flags2; -} - -#ifdef TARGET_AARCH64 /* * The manual says that when SVE is enabled and VQ is widened the * implementation is allowed to zero the previously inaccessible @@ -11641,7 +10049,6 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, aarch64_sve_narrow_vq(env, new_len + 1); } } -#endif #ifndef CONFIG_USER_ONLY ARMSecuritySpace arm_security_space(CPUARMState *env) diff --git a/target/arm/helper.h b/target/arm/helper.h index 0907505..f340a49 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1,1154 +1,6 @@ -DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) +/* SPDX-License-Identifier: GPL-2.0-or-later */ -DEF_HELPER_3(add_setq, i32, env, i32, i32) -DEF_HELPER_3(add_saturate, i32, env, i32, i32) -DEF_HELPER_3(sub_saturate, i32, env, i32, i32) -DEF_HELPER_3(add_usaturate, i32, env, i32, i32) -DEF_HELPER_3(sub_usaturate, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32) -DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32) - -#define PAS_OP(pfx) \ - DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr) - -PAS_OP(s) -PAS_OP(u) -#undef PAS_OP - -#define PAS_OP(pfx) \ - DEF_HELPER_2(pfx ## add8, i32, i32, i32) \ - DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \ - DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \ - DEF_HELPER_2(pfx ## add16, i32, i32, i32) \ - DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \ - DEF_HELPER_2(pfx ## subaddx, i32, i32, i32) -PAS_OP(q) -PAS_OP(sh) -PAS_OP(uq) -PAS_OP(uh) -#undef PAS_OP - -DEF_HELPER_3(ssat, i32, env, i32, i32) -DEF_HELPER_3(usat, i32, env, i32, i32) -DEF_HELPER_3(ssat16, i32, env, i32, i32) -DEF_HELPER_3(usat16, i32, env, i32, i32) - -DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) - -DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, - i32, i32, i32, i32) -DEF_HELPER_2(exception_internal, noreturn, env, i32) -DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32) -DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32) -DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32) -DEF_HELPER_2(exception_swstep, noreturn, env, i32) -DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl) -DEF_HELPER_1(setend, void, env) -DEF_HELPER_2(wfi, void, env, i32) -DEF_HELPER_1(wfe, void, env) -DEF_HELPER_2(wfit, void, env, i64) -DEF_HELPER_1(yield, void, env) -DEF_HELPER_1(pre_hvc, void, env) -DEF_HELPER_2(pre_smc, void, env, i32) -DEF_HELPER_1(vesb, void, env) - -DEF_HELPER_3(cpsr_write, void, env, i32, i32) -DEF_HELPER_2(cpsr_write_eret, void, env, i32) -DEF_HELPER_1(cpsr_read, i32, env) - -DEF_HELPER_3(v7m_msr, void, env, i32, i32) -DEF_HELPER_2(v7m_mrs, i32, env, i32) - -DEF_HELPER_2(v7m_bxns, void, env, i32) -DEF_HELPER_2(v7m_blxns, void, env, i32) - -DEF_HELPER_3(v7m_tt, i32, env, i32, i32) - -DEF_HELPER_1(v7m_preserve_fp_state, void, env) - -DEF_HELPER_2(v7m_vlstm, void, env, i32) -DEF_HELPER_2(v7m_vlldm, void, env, i32) - -DEF_HELPER_2(v8m_stackcheck, void, env, i32) - -DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) - -DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) -DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) -DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) -DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) -DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) -DEF_HELPER_2(get_cp_reg, i32, env, cptr) -DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) -DEF_HELPER_2(get_cp_reg64, i64, env, cptr) - -DEF_HELPER_2(get_r13_banked, i32, env, i32) -DEF_HELPER_3(set_r13_banked, void, env, i32, i32) - -DEF_HELPER_3(mrs_banked, i32, env, i32, i32) -DEF_HELPER_4(msr_banked, void, env, i32, i32, i32) - -DEF_HELPER_2(get_user_reg, i32, env, i32) -DEF_HELPER_3(set_user_reg, void, env, i32, i32) - -DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env) -DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int) -DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) -DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) -DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) - -DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) - -DEF_HELPER_1(vfp_get_fpscr, i32, env) -DEF_HELPER_2(vfp_set_fpscr, void, env, i32) - -DEF_HELPER_3(vfp_addh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_adds, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_addd, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_subh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_subs, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_subd, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_mulh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_muls, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_muld, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_divh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_divs, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_divd, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_maxh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_maxs, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_maxd, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_minh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_mins, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_mind, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_maxnums, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, fpst) -DEF_HELPER_3(vfp_minnumh, f16, f16, f16, fpst) -DEF_HELPER_3(vfp_minnums, f32, f32, f32, fpst) -DEF_HELPER_3(vfp_minnumd, f64, f64, f64, fpst) -DEF_HELPER_2(vfp_sqrth, f16, f16, fpst) -DEF_HELPER_2(vfp_sqrts, f32, f32, fpst) -DEF_HELPER_2(vfp_sqrtd, f64, f64, fpst) -DEF_HELPER_3(vfp_cmph, void, f16, f16, env) -DEF_HELPER_3(vfp_cmps, void, f32, f32, env) -DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) -DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env) -DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) -DEF_HELPER_3(vfp_cmped, void, f64, f64, env) - -DEF_HELPER_2(vfp_fcvtds, f64, f32, fpst) -DEF_HELPER_2(vfp_fcvtsd, f32, f64, fpst) -DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, fpst) -DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, fpst) - -DEF_HELPER_2(vfp_uitoh, f16, i32, fpst) -DEF_HELPER_2(vfp_uitos, f32, i32, fpst) -DEF_HELPER_2(vfp_uitod, f64, i32, fpst) -DEF_HELPER_2(vfp_sitoh, f16, i32, fpst) -DEF_HELPER_2(vfp_sitos, f32, i32, fpst) -DEF_HELPER_2(vfp_sitod, f64, i32, fpst) - -DEF_HELPER_2(vfp_touih, i32, f16, fpst) -DEF_HELPER_2(vfp_touis, i32, f32, fpst) -DEF_HELPER_2(vfp_touid, i32, f64, fpst) -DEF_HELPER_2(vfp_touizh, i32, f16, fpst) -DEF_HELPER_2(vfp_touizs, i32, f32, fpst) -DEF_HELPER_2(vfp_touizd, i32, f64, fpst) -DEF_HELPER_2(vfp_tosih, s32, f16, fpst) -DEF_HELPER_2(vfp_tosis, s32, f32, fpst) -DEF_HELPER_2(vfp_tosid, s32, f64, fpst) -DEF_HELPER_2(vfp_tosizh, s32, f16, fpst) -DEF_HELPER_2(vfp_tosizs, s32, f32, fpst) -DEF_HELPER_2(vfp_tosizd, s32, f64, fpst) - -DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_touhh, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toshh, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toulh, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_toslh, i32, f16, i32, fpst) -DEF_HELPER_3(vfp_touqh, i64, f16, i32, fpst) -DEF_HELPER_3(vfp_tosqh, i64, f16, i32, fpst) -DEF_HELPER_3(vfp_toshs, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_tosls, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_tosqs, i64, f32, i32, fpst) -DEF_HELPER_3(vfp_touhs, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_touls, i32, f32, i32, fpst) -DEF_HELPER_3(vfp_touqs, i64, f32, i32, fpst) -DEF_HELPER_3(vfp_toshd, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tosld, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tosqd, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_touhd, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_tould, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_touqd, i64, f64, i32, fpst) -DEF_HELPER_3(vfp_shtos, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_sltos, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_sqtos, f32, i64, i32, fpst) -DEF_HELPER_3(vfp_uhtos, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_ultos, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_uqtos, f32, i64, i32, fpst) -DEF_HELPER_3(vfp_shtod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_sltod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_sqtod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_uhtod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_ultod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_uqtod, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_shtoh, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_sltoh, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_ultoh, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, fpst) -DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, fpst) - -DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, fpst) -DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, fpst) -DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, fpst) -DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, fpst) - -DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, fpst) - -DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, fpst, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, fpst, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, fpst, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, fpst, i32) - -DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, fpst) -DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, fpst) -DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) - -DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) -DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) -DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) -DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) -DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) -DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) -DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64) - -DEF_HELPER_3(shl_cc, i32, env, i32, i32) -DEF_HELPER_3(shr_cc, i32, env, i32, i32) -DEF_HELPER_3(sar_cc, i32, env, i32, i32) -DEF_HELPER_3(ror_cc, i32, env, i32, i32) - -DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, fpst) -DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, fpst) -DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, fpst) -DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, fpst) - -DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env) -DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, fpst) - -DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) - -/* neon_helper.c */ -DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) -DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) -DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) -DEF_HELPER_2(neon_pmin_s16, i32, i32, i32) -DEF_HELPER_2(neon_pmax_u8, i32, i32, i32) -DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) -DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) -DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) - -DEF_HELPER_2(neon_shl_u16, i32, i32, i32) -DEF_HELPER_2(neon_shl_s16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) -DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) -DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_2(neon_add_u8, i32, i32, i32) -DEF_HELPER_2(neon_add_u16, i32, i32, i32) -DEF_HELPER_2(neon_sub_u8, i32, i32, i32) -DEF_HELPER_2(neon_sub_u16, i32, i32, i32) -DEF_HELPER_2(neon_mul_u8, i32, i32, i32) -DEF_HELPER_2(neon_mul_u16, i32, i32, i32) - -DEF_HELPER_2(neon_tst_u8, i32, i32, i32) -DEF_HELPER_2(neon_tst_u16, i32, i32, i32) -DEF_HELPER_2(neon_tst_u32, i32, i32, i32) - -DEF_HELPER_1(neon_clz_u8, i32, i32) -DEF_HELPER_1(neon_clz_u16, i32, i32) -DEF_HELPER_1(neon_cls_s8, i32, i32) -DEF_HELPER_1(neon_cls_s16, i32, i32) -DEF_HELPER_1(neon_cls_s32, i32, i32) -DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) -DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32) -DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32) -DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) -DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32) -DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32) - -DEF_HELPER_1(neon_narrow_u8, i64, i64) -DEF_HELPER_1(neon_narrow_u16, i64, i64) -DEF_HELPER_2(neon_unarrow_sat8, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_u8, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_s8, i64, env, i64) -DEF_HELPER_2(neon_unarrow_sat16, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_u16, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_s16, i64, env, i64) -DEF_HELPER_2(neon_unarrow_sat32, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_u32, i64, env, i64) -DEF_HELPER_2(neon_narrow_sat_s32, i64, env, i64) -DEF_HELPER_1(neon_narrow_high_u8, i32, i64) -DEF_HELPER_1(neon_narrow_high_u16, i32, i64) -DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) -DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64) -DEF_HELPER_1(neon_widen_u8, i64, i32) -DEF_HELPER_1(neon_widen_s8, i64, i32) -DEF_HELPER_1(neon_widen_u16, i64, i32) -DEF_HELPER_1(neon_widen_s16, i64, i32) - -DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) -DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) -DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s32, i64, i32, i32) -DEF_HELPER_2(neon_abdl_u64, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s64, i64, i32, i32) -DEF_HELPER_2(neon_mull_u8, i64, i32, i32) -DEF_HELPER_2(neon_mull_s8, i64, i32, i32) -DEF_HELPER_2(neon_mull_u16, i64, i32, i32) -DEF_HELPER_2(neon_mull_s16, i64, i32, i32) - -DEF_HELPER_1(neon_negl_u16, i64, i64) -DEF_HELPER_1(neon_negl_u32, i64, i64) - -DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) -DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) - -DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, fpst) -DEF_HELPER_3(neon_cge_f32, i32, i32, i32, fpst) -DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, fpst) -DEF_HELPER_3(neon_acge_f32, i32, i32, i32, fpst) -DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst) -DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst) -DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst) - -/* iwmmxt_helper.c */ -DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) -DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) - -#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ -DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ - -DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) -DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) - -DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) - -DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) - -DEF_IWMMXT_HELPER_SIZE_ENV(mins) -DEF_IWMMXT_HELPER_SIZE_ENV(minu) -DEF_IWMMXT_HELPER_SIZE_ENV(maxs) -DEF_IWMMXT_HELPER_SIZE_ENV(maxu) - -DEF_IWMMXT_HELPER_SIZE_ENV(subn) -DEF_IWMMXT_HELPER_SIZE_ENV(addn) -DEF_IWMMXT_HELPER_SIZE_ENV(subu) -DEF_IWMMXT_HELPER_SIZE_ENV(addu) -DEF_IWMMXT_HELPER_SIZE_ENV(subs) -DEF_IWMMXT_HELPER_SIZE_ENV(adds) - -DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) -DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) - -DEF_HELPER_1(iwmmxt_bcstb, i64, i32) -DEF_HELPER_1(iwmmxt_bcstw, i64, i32) -DEF_HELPER_1(iwmmxt_bcstl, i64, i32) - -DEF_HELPER_1(iwmmxt_addcb, i64, i64) -DEF_HELPER_1(iwmmxt_addcw, i64, i64) -DEF_HELPER_1(iwmmxt_addcl, i64, i64) - -DEF_HELPER_1(iwmmxt_msbb, i32, i64) -DEF_HELPER_1(iwmmxt_msbw, i32, i64) -DEF_HELPER_1(iwmmxt_msbl, i32, i64) - -DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) - -DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) - -DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) - -DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) - -DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, fpst) -DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, fpst) -DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, fpst) - -DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) - -DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +#include "tcg/helper.h" #ifdef TARGET_AARCH64 #include "tcg/helper-a64.h" diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c new file mode 100644 index 0000000..ff13726 --- /dev/null +++ b/target/arm/hvf-stub.c @@ -0,0 +1,20 @@ +/* + * QEMU Hypervisor.framework (HVF) stubs for ARM + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hvf_arm.h" + +uint32_t hvf_arm_get_default_ipa_bit_size(void) +{ + g_assert_not_reached(); +} + +uint32_t hvf_arm_get_max_ipa_bit_size(void) +{ + g_assert_not_reached(); +} diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 2439af6..0658a99 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -19,10 +19,12 @@ #include "system/hw_accel.h" #include "hvf_arm.h" #include "cpregs.h" +#include "cpu-sysregs.h" #include <mach/mach_time.h> -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "hw/boards.h" #include "hw/irq.h" #include "qemu/main-loop.h" @@ -150,9 +152,6 @@ void hvf_arm_init_debug(void) g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); } -#define HVF_SYSREG(crn, crm, op0, op1, op2) \ - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) - #define SYSREG_OP0_SHIFT 20 #define SYSREG_OP0_MASK 0x3 #define SYSREG_OP0(sysreg) ((sysreg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK) @@ -184,6 +183,7 @@ void hvf_arm_init_debug(void) #define SYSREG_OSLAR_EL1 SYSREG(2, 0, 1, 0, 4) #define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4) #define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4) +#define SYSREG_LORC_EL1 SYSREG(3, 0, 10, 4, 3) #define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1) #define SYSREG_CNTP_CTL_EL0 SYSREG(3, 3, 14, 2, 1) #define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0) @@ -394,156 +394,34 @@ static const struct hvf_reg_match hvf_fpreg_match[] = { { HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) }, }; -struct hvf_sreg_match { - int reg; - uint32_t key; - uint32_t cp_idx; -}; +/* + * QEMU uses KVM system register ids in the migration format. + * Conveniently, HVF uses the same encoding of the op* and cr* parameters + * within the low 16 bits of the ids. Thus conversion between the + * formats is trivial. + */ -static struct hvf_sreg_match hvf_sreg_match[] = { - { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) }, - -#ifdef SYNC_NO_RAW_REGS - /* - * The registers below are manually synced on init because they are - * marked as NO_RAW. We still list them to make number space sync easier. - */ - { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) }, - { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) }, - { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) }, - { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) }, -#endif - { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) }, - { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) }, - { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) }, -#ifdef SYNC_NO_MMFR0 - /* We keep the hardware MMFR0 around. HW limits are there anyway */ - { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) }, -#endif - { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) }, - /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ - - { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) }, - { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) }, - { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) }, - { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) }, - { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) }, - { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) }, - - { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) }, - { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) }, - { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) }, - { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) }, - { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) }, - { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) }, - { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) }, - { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) }, - { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) }, - { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) }, - - { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) }, - { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) }, - { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) }, - { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) }, - { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) }, - { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) }, - { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) }, - { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) }, - { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) }, - { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) }, - { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) }, - { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) }, - { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) }, - { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) }, - { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) }, - { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) }, - { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) }, - { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) }, - { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) }, - { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) }, +#define KVMID_TO_HVF(KVM) ((KVM) & 0xffff) +#define HVF_TO_KVMID(HVF) \ + (CP_REG_ARM64 | CP_REG_SIZE_U64 | CP_REG_ARM64_SYSREG | (HVF)) + +/* Verify this at compile-time. */ + +#define DEF_SYSREG(HVF_ID, ...) \ + QEMU_BUILD_BUG_ON(HVF_ID != KVMID_TO_HVF(KVMID_AA64_SYS_REG64(__VA_ARGS__))); + +#include "sysreg.c.inc" + +#undef DEF_SYSREG + +#define DEF_SYSREG(HVF_ID, op0, op1, crn, crm, op2) HVF_ID, + +static const hv_sys_reg_t hvf_sreg_list[] = { +#include "sysreg.c.inc" }; +#undef DEF_SYSREG + int hvf_get_registers(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -551,7 +429,7 @@ int hvf_get_registers(CPUState *cpu) hv_return_t ret; uint64_t val; hv_simd_fp_uchar16_t fpval; - int i; + int i, n; for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { ret = hv_vcpu_get_reg(cpu->accel->fd, hvf_reg_match[i].reg, &val); @@ -580,14 +458,13 @@ int hvf_get_registers(CPUState *cpu) assert_hvf_ok(ret); pstate_write(env, val); - for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) { - if (hvf_sreg_match[i].cp_idx == -1) { - continue; - } + for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) { + uint64_t kvm_id = arm_cpu->cpreg_indexes[i]; + int hvf_id = KVMID_TO_HVF(kvm_id); if (cpu->accel->guest_debug_enabled) { /* Handle debug registers */ - switch (hvf_sreg_match[i].reg) { + switch (hvf_id) { case HV_SYS_REG_DBGBVR0_EL1: case HV_SYS_REG_DBGBCR0_EL1: case HV_SYS_REG_DBGWVR0_EL1: @@ -661,20 +538,22 @@ int hvf_get_registers(CPUState *cpu) * vCPU but simply keep the values from the previous * environment. */ - const ARMCPRegInfo *ri; - ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key); + uint32_t key = kvm_to_cpreg_id(kvm_id); + const ARMCPRegInfo *ri = + get_arm_cp_reginfo(arm_cpu->cp_regs, key); + val = read_raw_cp_reg(env, ri); - arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + arm_cpu->cpreg_values[i] = val; continue; } } } - ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, &val); + ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_id, &val); assert_hvf_ok(ret); - arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + arm_cpu->cpreg_values[i] = val; } assert(write_list_to_cpustate(arm_cpu)); @@ -690,7 +569,7 @@ int hvf_put_registers(CPUState *cpu) hv_return_t ret; uint64_t val; hv_simd_fp_uchar16_t fpval; - int i; + int i, n; for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset); @@ -717,14 +596,13 @@ int hvf_put_registers(CPUState *cpu) aarch64_save_sp(env, arm_current_el(env)); assert(write_cpustate_to_list(arm_cpu, false)); - for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) { - if (hvf_sreg_match[i].cp_idx == -1) { - continue; - } + for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) { + uint64_t kvm_id = arm_cpu->cpreg_indexes[i]; + int hvf_id = KVMID_TO_HVF(kvm_id); if (cpu->accel->guest_debug_enabled) { /* Handle debug registers */ - switch (hvf_sreg_match[i].reg) { + switch (hvf_id) { case HV_SYS_REG_DBGBVR0_EL1: case HV_SYS_REG_DBGBCR0_EL1: case HV_SYS_REG_DBGWVR0_EL1: @@ -798,8 +676,8 @@ int hvf_put_registers(CPUState *cpu) } } - val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx]; - ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, val); + val = arm_cpu->cpreg_values[i]; + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_id, val); assert_hvf_ok(ret); } @@ -811,9 +689,9 @@ int hvf_put_registers(CPUState *cpu) static void flush_cpu_state(CPUState *cpu) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } } @@ -844,14 +722,17 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt) return val; } -static void clamp_id_aa64mmfr0_parange_to_ipa_size(uint64_t *id_aa64mmfr0) +static void clamp_id_aa64mmfr0_parange_to_ipa_size(ARMISARegisters *isar) { uint32_t ipa_size = chosen_ipa_bit_size ? chosen_ipa_bit_size : hvf_arm_get_max_ipa_bit_size(); + uint64_t id_aa64mmfr0; /* Clamp down the PARange to the IPA size the kernel supports. */ uint8_t index = round_down_to_parange_index(ipa_size); - *id_aa64mmfr0 = (*id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index; + id_aa64mmfr0 = GET_IDREG(isar, ID_AA64MMFR0); + id_aa64mmfr0 = (id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index; + SET_IDREG(isar, ID_AA64MMFR0, id_aa64mmfr0); } static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) @@ -861,16 +742,17 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) int reg; uint64_t *val; } regs[] = { - { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 }, - { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 }, - { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 }, - { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 }, - { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 }, - { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 }, + { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.idregs[ID_AA64PFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.idregs[ID_AA64PFR1_EL1_IDX] }, + /* Add ID_AA64PFR2_EL1 here when HVF supports it */ + { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.idregs[ID_AA64DFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.idregs[ID_AA64DFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.idregs[ID_AA64ISAR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.idregs[ID_AA64ISAR1_EL1_IDX] }, /* Add ID_AA64ISAR2_EL1 here when HVF supports it */ - { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, - { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, - { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, + { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.idregs[ID_AA64MMFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.idregs[ID_AA64MMFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.idregs[ID_AA64MMFR2_EL1_IDX] }, /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ }; hv_vcpu_t fd; @@ -878,7 +760,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) hv_vcpu_exit_t *exit; int i; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; ahcf->features = (1ULL << ARM_FEATURE_V8) | (1ULL << ARM_FEATURE_NEON) | (1ULL << ARM_FEATURE_AARCH64) | @@ -897,7 +779,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr); r |= hv_vcpu_destroy(fd); - clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar.id_aa64mmfr0); + clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar); /* * Disable SME, which is not properly handled by QEMU hvf yet. @@ -909,7 +791,8 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * - fix any assumptions we made that SME implies SVE (since * on the M4 there is SME but not SVE) */ - host_isar.id_aa64pfr1 &= ~R_ID_AA64PFR1_SME_MASK; + SET_IDREG(&host_isar, ID_AA64PFR1, + GET_IDREG(&host_isar, ID_AA64PFR1) & ~R_ID_AA64PFR1_SME_MASK); ahcf->isar = host_isar; @@ -926,7 +809,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ahcf->reset_sctlr |= 0x00800000; /* Make sure we don't advertise AArch32 support for EL0/EL1 */ - if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) { + if ((GET_IDREG(&host_isar, ID_AA64PFR0) & 0xff) != 0x11) { return false; } @@ -1005,7 +888,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; - uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match); + uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_list); uint32_t sregs_cnt = 0; uint64_t pfr; hv_return_t ret; @@ -1030,21 +913,22 @@ int hvf_arch_init_vcpu(CPUState *cpu) /* Populate cp list for all known sysregs */ for (i = 0; i < sregs_match_len; i++) { - const ARMCPRegInfo *ri; - uint32_t key = hvf_sreg_match[i].key; + hv_sys_reg_t hvf_id = hvf_sreg_list[i]; + uint64_t kvm_id = HVF_TO_KVMID(hvf_id); + uint32_t key = kvm_to_cpreg_id(kvm_id); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key); - ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key); if (ri) { assert(!(ri->type & ARM_CP_NO_RAW)); - hvf_sreg_match[i].cp_idx = sregs_cnt; - arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key); - } else { - hvf_sreg_match[i].cp_idx = -1; + arm_cpu->cpreg_indexes[sregs_cnt++] = kvm_id; } } arm_cpu->cpreg_array_len = sregs_cnt; arm_cpu->cpreg_vmstate_array_len = sregs_cnt; + /* cpreg tuples must be in strictly ascending order */ + qsort(arm_cpu->cpreg_indexes, sregs_cnt, sizeof(uint64_t), compare_u64); + assert(write_cpustate_to_list(arm_cpu, false)); /* Set CP_NO_RAW system registers on init */ @@ -1064,12 +948,12 @@ int hvf_arch_init_vcpu(CPUState *cpu) /* We're limited to underlying hardware caps, override internal versions */ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, - &arm_cpu->isar.id_aa64mmfr0); + &arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); assert_hvf_ok(ret); - clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar.id_aa64mmfr0); + clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar); ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, - arm_cpu->isar.id_aa64mmfr0); + arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); assert_hvf_ok(ret); return 0; @@ -1082,13 +966,13 @@ void hvf_kick_vcpu_thread(CPUState *cpu) } static void hvf_raise_exception(CPUState *cpu, uint32_t excp, - uint32_t syndrome) + uint32_t syndrome, int target_el) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; cpu->exception_index = excp; - env->exception.target_el = 1; + env->exception.target_el = target_el; env->exception.syndrome = syndrome; arm_cpu_do_interrupt(cpu); @@ -1241,11 +1125,10 @@ static bool is_id_sysreg(uint32_t reg) static uint32_t hvf_reg2cp_reg(uint32_t reg) { - return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + return ENCODE_AA64_CP_REG((reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK, + (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK, (reg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK, (reg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK, - (reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK, - (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK, (reg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK); } @@ -1257,6 +1140,9 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, true)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) { return false; @@ -1267,7 +1153,7 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) } else if (ri->readfn) { *val = ri->readfn(env, ri); } else { - *val = CPREG_FIELD64(env, ri); + *val = raw_read(env, ri); } trace_hvf_vgic_read(ri->name, *val); return true; @@ -1352,6 +1238,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1448,7 +1335,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) SYSREG_CRN(reg), SYSREG_CRM(reg), SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); return 1; } @@ -1537,6 +1424,9 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, false)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) { return false; @@ -1545,7 +1435,7 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) if (ri->writefn) { ri->writefn(env, ri, val); } else { - CPREG_FIELD64(env, ri) = val; + raw_write(env, ri, val); } trace_hvf_vgic_write(ri->name, val); @@ -1644,6 +1534,9 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_OSDLR_EL1: /* Dummy register */ return 0; + case SYSREG_LORC_EL1: + /* Dummy register */ + return 0; case SYSREG_ICC_AP0R0_EL1: case SYSREG_ICC_AP0R1_EL1: case SYSREG_ICC_AP0R2_EL1: @@ -1666,6 +1559,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1758,19 +1652,19 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) SYSREG_CRN(reg), SYSREG_CRM(reg), SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); return 1; } static int hvf_inject_interrupts(CPUState *cpu) { - if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_FIQ)) { trace_hvf_inject_fiq(); hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_FIQ, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { trace_hvf_inject_irq(); hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_IRQ, true); @@ -1822,7 +1716,7 @@ static void hvf_wfi(CPUState *cpu) uint64_t nanos; uint32_t cntfrq; - if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { /* Interrupt pending, no need to wait */ return; } @@ -1909,7 +1803,17 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); bql_unlock(); - assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); + r = hv_vcpu_run(cpu->accel->fd); + bql_lock(); + switch (r) { + case HV_SUCCESS: + break; + case HV_ILLEGAL_GUEST_STATE: + trace_hvf_illegal_guest_state(); + /* fall through */ + default: + g_assert_not_reached(); + } /* handle VMEXIT */ uint64_t exit_reason = hvf_exit->reason; @@ -1917,7 +1821,6 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t ec = syn_get_ec(syndrome); ret = 0; - bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ @@ -1952,7 +1855,7 @@ int hvf_vcpu_exec(CPUState *cpu) if (!hvf_find_sw_breakpoint(cpu, env->pc)) { /* Re-inject into the guest */ ret = 0; - hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0)); + hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0), 1); } break; } @@ -1990,7 +1893,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t cm = (syndrome >> 8) & 0x1; uint64_t val = 0; - trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address, + trace_hvf_data_abort(hvf_exit->exception.virtual_address, hvf_exit->exception.physical_address, isv, iswrite, s1ptw, len, srt); @@ -2057,13 +1960,13 @@ int hvf_vcpu_exec(CPUState *cpu) cpu_synchronize_state(cpu); if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) { if (!hvf_handle_psci_call(cpu)) { - trace_hvf_unknown_hvc(env->xregs[0]); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */ env->xregs[0] = -1; } } else { - trace_hvf_unknown_hvc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; case EC_AA64_SMC: @@ -2078,7 +1981,7 @@ int hvf_vcpu_exec(CPUState *cpu) } } else { trace_hvf_unknown_smc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; default: @@ -2277,28 +2180,23 @@ static inline bool hvf_arm_hw_debug_active(CPUState *cpu) return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); } -static void hvf_arch_set_traps(void) +static void hvf_arch_set_traps(CPUState *cpu) { - CPUState *cpu; bool should_enable_traps = false; hv_return_t r = HV_SUCCESS; /* Check whether guest debugging is enabled for at least one vCPU; if it * is, enable exiting the guest on all vCPUs */ - CPU_FOREACH(cpu) { - should_enable_traps |= cpu->accel->guest_debug_enabled; - } - CPU_FOREACH(cpu) { - /* Set whether debug exceptions exit the guest */ - r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd, - should_enable_traps); - assert_hvf_ok(r); + should_enable_traps |= cpu->accel->guest_debug_enabled; + /* Set whether debug exceptions exit the guest */ + r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); - /* Set whether accesses to debug registers exit the guest */ - r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd, - should_enable_traps); - assert_hvf_ok(r); - } + /* Set whether accesses to debug registers exit the guest */ + r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); } void hvf_arch_update_guest_debug(CPUState *cpu) @@ -2339,7 +2237,7 @@ void hvf_arch_update_guest_debug(CPUState *cpu) deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0); } - hvf_arch_set_traps(); + hvf_arch_set_traps(cpu); } bool hvf_arch_supports_guest_debug(void) diff --git a/target/arm/hvf/sysreg.c.inc b/target/arm/hvf/sysreg.c.inc new file mode 100644 index 0000000..067a860 --- /dev/null +++ b/target/arm/hvf/sysreg.c.inc @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +DEF_SYSREG(HV_SYS_REG_DBGBVR0_EL1, 2, 0, 0, 0, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR0_EL1, 2, 0, 0, 0, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR0_EL1, 2, 0, 0, 0, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR0_EL1, 2, 0, 0, 0, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR1_EL1, 2, 0, 0, 1, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR1_EL1, 2, 0, 0, 1, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR1_EL1, 2, 0, 0, 1, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR1_EL1, 2, 0, 0, 1, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR2_EL1, 2, 0, 0, 2, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR2_EL1, 2, 0, 0, 2, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR2_EL1, 2, 0, 0, 2, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR2_EL1, 2, 0, 0, 2, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR3_EL1, 2, 0, 0, 3, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR3_EL1, 2, 0, 0, 3, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR3_EL1, 2, 0, 0, 3, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR3_EL1, 2, 0, 0, 3, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR4_EL1, 2, 0, 0, 4, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR4_EL1, 2, 0, 0, 4, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR4_EL1, 2, 0, 0, 4, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR4_EL1, 2, 0, 0, 4, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR5_EL1, 2, 0, 0, 5, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR5_EL1, 2, 0, 0, 5, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR5_EL1, 2, 0, 0, 5, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR5_EL1, 2, 0, 0, 5, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR6_EL1, 2, 0, 0, 6, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR6_EL1, 2, 0, 0, 6, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR6_EL1, 2, 0, 0, 6, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR6_EL1, 2, 0, 0, 6, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR7_EL1, 2, 0, 0, 7, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR7_EL1, 2, 0, 0, 7, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR7_EL1, 2, 0, 0, 7, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR7_EL1, 2, 0, 0, 7, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR8_EL1, 2, 0, 0, 8, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR8_EL1, 2, 0, 0, 8, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR8_EL1, 2, 0, 0, 8, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR8_EL1, 2, 0, 0, 8, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR9_EL1, 2, 0, 0, 9, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR9_EL1, 2, 0, 0, 9, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR9_EL1, 2, 0, 0, 9, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR9_EL1, 2, 0, 0, 9, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR10_EL1, 2, 0, 0, 10, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR10_EL1, 2, 0, 0, 10, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR10_EL1, 2, 0, 0, 10, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR10_EL1, 2, 0, 0, 10, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR11_EL1, 2, 0, 0, 11, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR11_EL1, 2, 0, 0, 11, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR11_EL1, 2, 0, 0, 11, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR11_EL1, 2, 0, 0, 11, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR12_EL1, 2, 0, 0, 12, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR12_EL1, 2, 0, 0, 12, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR12_EL1, 2, 0, 0, 12, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR12_EL1, 2, 0, 0, 12, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR13_EL1, 2, 0, 0, 13, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR13_EL1, 2, 0, 0, 13, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR13_EL1, 2, 0, 0, 13, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR13_EL1, 2, 0, 0, 13, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR14_EL1, 2, 0, 0, 14, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR14_EL1, 2, 0, 0, 14, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR14_EL1, 2, 0, 0, 14, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR14_EL1, 2, 0, 0, 14, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR15_EL1, 2, 0, 0, 15, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR15_EL1, 2, 0, 0, 15, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR15_EL1, 2, 0, 0, 15, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR15_EL1, 2, 0, 0, 15, 7) + +#ifdef SYNC_NO_RAW_REGS +/* + * The registers below are manually synced on init because they are + * marked as NO_RAW. We still list them to make number space sync easier. + */ +DEF_SYSREG(HV_SYS_REG_MDCCINT_EL1, 2, 0, 0, 2, 0) +DEF_SYSREG(HV_SYS_REG_MIDR_EL1, 3, 0, 0, 0, 0) +DEF_SYSREG(HV_SYS_REG_MPIDR_EL1, 3, 0, 0, 0, 5) +DEF_SYSREG(HV_SYS_REG_ID_AA64PFR0_EL1, 3, 0, 0, 4, 0) +#endif + +DEF_SYSREG(HV_SYS_REG_ID_AA64PFR1_EL1, 3, 0, 0, 4, 1) +/* Add ID_AA64PFR2_EL1 here when HVF supports it */ +DEF_SYSREG(HV_SYS_REG_ID_AA64DFR0_EL1, 3, 0, 0, 5, 0) +DEF_SYSREG(HV_SYS_REG_ID_AA64DFR1_EL1, 3, 0, 0, 5, 1) +DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0) +DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1) + +#ifdef SYNC_NO_MMFR0 +/* We keep the hardware MMFR0 around. HW limits are there anyway */ +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0) +#endif + +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1) +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2) +/* Add ID_AA64MMFR3_EL1 here when HVF supports it */ + +DEF_SYSREG(HV_SYS_REG_MDSCR_EL1, 2, 0, 0, 2, 2) +DEF_SYSREG(HV_SYS_REG_SCTLR_EL1, 3, 0, 1, 0, 0) +DEF_SYSREG(HV_SYS_REG_CPACR_EL1, 3, 0, 1, 0, 2) +DEF_SYSREG(HV_SYS_REG_TTBR0_EL1, 3, 0, 2, 0, 0) +DEF_SYSREG(HV_SYS_REG_TTBR1_EL1, 3, 0, 2, 0, 1) +DEF_SYSREG(HV_SYS_REG_TCR_EL1, 3, 0, 2, 0, 2) + +DEF_SYSREG(HV_SYS_REG_APIAKEYLO_EL1, 3, 0, 2, 1, 0) +DEF_SYSREG(HV_SYS_REG_APIAKEYHI_EL1, 3, 0, 2, 1, 1) +DEF_SYSREG(HV_SYS_REG_APIBKEYLO_EL1, 3, 0, 2, 1, 2) +DEF_SYSREG(HV_SYS_REG_APIBKEYHI_EL1, 3, 0, 2, 1, 3) +DEF_SYSREG(HV_SYS_REG_APDAKEYLO_EL1, 3, 0, 2, 2, 0) +DEF_SYSREG(HV_SYS_REG_APDAKEYHI_EL1, 3, 0, 2, 2, 1) +DEF_SYSREG(HV_SYS_REG_APDBKEYLO_EL1, 3, 0, 2, 2, 2) +DEF_SYSREG(HV_SYS_REG_APDBKEYHI_EL1, 3, 0, 2, 2, 3) +DEF_SYSREG(HV_SYS_REG_APGAKEYLO_EL1, 3, 0, 2, 3, 0) +DEF_SYSREG(HV_SYS_REG_APGAKEYHI_EL1, 3, 0, 2, 3, 1) + +DEF_SYSREG(HV_SYS_REG_SPSR_EL1, 3, 0, 4, 0, 0) +DEF_SYSREG(HV_SYS_REG_ELR_EL1, 3, 0, 4, 0, 1) +DEF_SYSREG(HV_SYS_REG_SP_EL0, 3, 0, 4, 1, 0) +DEF_SYSREG(HV_SYS_REG_AFSR0_EL1, 3, 0, 5, 1, 0) +DEF_SYSREG(HV_SYS_REG_AFSR1_EL1, 3, 0, 5, 1, 1) +DEF_SYSREG(HV_SYS_REG_ESR_EL1, 3, 0, 5, 2, 0) +DEF_SYSREG(HV_SYS_REG_FAR_EL1, 3, 0, 6, 0, 0) +DEF_SYSREG(HV_SYS_REG_PAR_EL1, 3, 0, 7, 4, 0) +DEF_SYSREG(HV_SYS_REG_MAIR_EL1, 3, 0, 10, 2, 0) +DEF_SYSREG(HV_SYS_REG_AMAIR_EL1, 3, 0, 10, 3, 0) +DEF_SYSREG(HV_SYS_REG_VBAR_EL1, 3, 0, 12, 0, 0) +DEF_SYSREG(HV_SYS_REG_CONTEXTIDR_EL1, 3, 0, 13, 0, 1) +DEF_SYSREG(HV_SYS_REG_TPIDR_EL1, 3, 0, 13, 0, 4) +DEF_SYSREG(HV_SYS_REG_CNTKCTL_EL1, 3, 0, 14, 1, 0) +DEF_SYSREG(HV_SYS_REG_CSSELR_EL1, 3, 2, 0, 0, 0) +DEF_SYSREG(HV_SYS_REG_TPIDR_EL0, 3, 3, 13, 0, 2) +DEF_SYSREG(HV_SYS_REG_TPIDRRO_EL0, 3, 3, 13, 0, 3) +DEF_SYSREG(HV_SYS_REG_CNTV_CTL_EL0, 3, 3, 14, 3, 1) +DEF_SYSREG(HV_SYS_REG_CNTV_CVAL_EL0, 3, 3, 14, 3, 2) +DEF_SYSREG(HV_SYS_REG_SP_EL1, 3, 4, 4, 1, 0) diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events index 4fbbe4b..b29a995 100644 --- a/target/arm/hvf/trace-events +++ b/target/arm/hvf/trace-events @@ -2,12 +2,13 @@ hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)" hvf_inject_fiq(void) "injecting FIQ" hvf_inject_irq(void) "injecting IRQ" -hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" +hvf_data_abort(uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64 hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")" -hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64 +hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64 hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64 hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]" -hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x" +hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpuid=0x%x" hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]" hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]" +hvf_illegal_guest_state(void) "HV_ILLEGAL_GUEST_STATE" diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index 26c717b..ea82f26 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -11,7 +11,7 @@ #ifndef QEMU_HVF_ARM_H #define QEMU_HVF_ARM_H -#include "cpu.h" +#include "target/arm/cpu-qom.h" /** * hvf_arm_init_debug() - initialize guest debug capabilities @@ -22,23 +22,7 @@ void hvf_arm_init_debug(void); void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); -#ifdef CONFIG_HVF - uint32_t hvf_arm_get_default_ipa_bit_size(void); uint32_t hvf_arm_get_max_ipa_bit_size(void); -#else - -static inline uint32_t hvf_arm_get_default_ipa_bit_size(void) -{ - return 0; -} - -static inline uint32_t hvf_arm_get_max_ipa_bit_size(void) -{ - return 0; -} - -#endif - #endif diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c index 1e86126..bb59697 100644 --- a/target/arm/hyp_gdbstub.c +++ b/target/arm/hyp_gdbstub.c @@ -54,7 +54,7 @@ GArray *hw_breakpoints, *hw_watchpoints; * here so future PC comparisons will work properly. */ -int insert_hw_breakpoint(target_ulong addr) +int insert_hw_breakpoint(vaddr addr) { HWBreakpoint brk = { .bcr = 0x1, /* BCR E=1, enable */ @@ -80,7 +80,7 @@ int insert_hw_breakpoint(target_ulong addr) * Delete a breakpoint and shuffle any above down */ -int delete_hw_breakpoint(target_ulong pc) +int delete_hw_breakpoint(vaddr pc) { int i; for (i = 0; i < hw_breakpoints->len; i++) { @@ -125,7 +125,7 @@ int delete_hw_breakpoint(target_ulong pc) * need to ensure you mask the address as required and set BAS=0xff */ -int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) +int insert_hw_watchpoint(vaddr addr, vaddr len, int type) { HWWatchpoint wp = { .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ @@ -182,7 +182,7 @@ int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) return 0; } -bool check_watchpoint_in_range(int i, target_ulong addr) +bool check_watchpoint_in_range(int i, vaddr addr) { HWWatchpoint *wp = get_hw_wp(i); uint64_t addr_top, addr_bottom = wp->wvr; @@ -214,7 +214,7 @@ bool check_watchpoint_in_range(int i, target_ulong addr) * Delete a breakpoint and shuffle any above down */ -int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) +int delete_hw_watchpoint(vaddr addr, vaddr len, int type) { int i; for (i = 0; i < cur_hw_wps; i++) { @@ -226,7 +226,7 @@ int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) return -ENOENT; } -bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) +bool find_hw_breakpoint(CPUState *cpu, vaddr pc) { int i; @@ -239,7 +239,7 @@ bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) return false; } -CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr) { int i; diff --git a/target/arm/internals.h b/target/arm/internals.h index 28585c0..f539bbe 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -25,11 +25,16 @@ #ifndef TARGET_ARM_INTERNALS_H #define TARGET_ARM_INTERNALS_H +#include "exec/hwaddr.h" +#include "exec/vaddr.h" #include "exec/breakpoint.h" +#include "accel/tcg/tb-cpu-state.h" #include "hw/registerfields.h" #include "tcg/tcg-gvec-desc.h" +#include "system/memory.h" #include "syndrome.h" #include "cpu-features.h" +#include "mmuidx-internal.h" /* register banks for CPU modes */ #define BANK_USRSYS 0 @@ -109,11 +114,6 @@ FIELD(DBGWCR, WT, 20, 1) FIELD(DBGWCR, MASK, 24, 5) FIELD(DBGWCR, SSCE, 29, 1) -#define VTCR_NSW (1u << 29) -#define VTCR_NSA (1u << 30) -#define VSTCR_SW VTCR_NSW -#define VSTCR_SA VTCR_NSA - /* Bit definitions for CPACR (AArch32 only) */ FIELD(CPACR, CP10, 20, 2) FIELD(CPACR, CP11, 22, 2) @@ -197,6 +197,24 @@ FIELD(CPTR_EL3, TCPAC, 31, 1) #define TTBCR_SH1 (1U << 28) #define TTBCR_EAE (1U << 31) +#define TCR2_PNCH (1ULL << 0) +#define TCR2_PIE (1ULL << 1) +#define TCR2_E0POE (1ULL << 2) +#define TCR2_POE (1ULL << 3) +#define TCR2_AIE (1ULL << 4) +#define TCR2_D128 (1ULL << 5) +#define TCR2_PTTWI (1ULL << 10) +#define TCR2_HAFT (1ULL << 11) +#define TCR2_AMEC0 (1ULL << 12) +#define TCR2_AMEC1 (1ULL << 13) +#define TCR2_DISCH0 (1ULL << 14) +#define TCR2_DISCH1 (1ULL << 15) +#define TCR2_A2 (1ULL << 16) +#define TCR2_FNG0 (1ULL << 17) +#define TCR2_FNG1 (1ULL << 18) +#define TCR2_FNGNA0 (1ULL << 20) +#define TCR2_FNGNA1 (1ULL << 21) + FIELD(VTCR, T0SZ, 0, 6) FIELD(VTCR, SL0, 6, 2) FIELD(VTCR, IRGN0, 8, 2) @@ -216,6 +234,9 @@ FIELD(VTCR, NSA, 30, 1) FIELD(VTCR, DS, 32, 1) FIELD(VTCR, SL2, 33, 1) +FIELD(VSTCR, SW, 29, 1) +FIELD(VSTCR, SA, 30, 1) + #define HCRX_ENAS0 (1ULL << 0) #define HCRX_ENALS (1ULL << 1) #define HCRX_ENASR (1ULL << 2) @@ -228,6 +249,9 @@ FIELD(VTCR, SL2, 33, 1) #define HCRX_CMOW (1ULL << 9) #define HCRX_MCE2 (1ULL << 10) #define HCRX_MSCEN (1ULL << 11) +#define HCRX_TCR2EN (1ULL << 14) +#define HCRX_SCTLR2EN (1ULL << 15) +#define HCRX_GCSEN (1ULL << 22) #define HPFAR_NS (1ULL << 63) @@ -282,14 +306,14 @@ FIELD(CNTHCTL, CNTPMASK, 19, 1) * and never returns because we will longjump back up to the CPU main loop. */ G_NORETURN void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el); + uint64_t syndrome, uint32_t target_el); /* * Similarly, but also use unwinding to restore cpu state. */ G_NORETURN void raise_exception_ra(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el, - uintptr_t ra); + uint64_t syndrome, uint32_t target_el, + uintptr_t ra); /* * For AArch64, map a given EL to an index in the banked_spsr array. @@ -350,7 +374,6 @@ static inline int r14_bank_number(int mode) } void arm_cpu_register(const ARMCPUInfo *info); -void aarch64_cpu_register(const ARMCPUInfo *info); void register_cp_regs_for_features(ARMCPU *cpu); void init_cpreg_list(ARMCPU *cpu); @@ -369,10 +392,12 @@ void arm_restore_state_to_opc(CPUState *cs, const uint64_t *data); #ifdef CONFIG_TCG +TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs); void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); /* Our implementation of TCGCPUOps::cpu_exec_halt */ bool arm_cpu_exec_halt(CPUState *cs); +int arm_cpu_mmu_index(CPUState *cs, bool ifetch); #endif /* CONFIG_TCG */ typedef enum ARMFPRounding { @@ -645,16 +670,12 @@ static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) { return false; } -static inline void arm_handle_psci_call(ARMCPU *cpu) -{ - g_assert_not_reached(); -} #else /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ bool arm_is_psci_call(ARMCPU *cpu, int excp_type); +#endif /* Actually handle a PSCI call */ void arm_handle_psci_call(ARMCPU *cpu); -#endif /** * arm_clear_exclusive: clear the exclusive monitor @@ -724,8 +745,8 @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; struct ARMMMUFaultInfo { ARMFaultType type; ARMGPCF gpcf; - target_ulong s2addr; - target_ulong paddr; + hwaddr s2addr; + hwaddr paddr; ARMSecuritySpace paddr_space; int level; int domain; @@ -733,6 +754,7 @@ struct ARMMMUFaultInfo { bool s1ptw; bool s1ns; bool ea; + bool dirtybit; /* FEAT_S1PIE, FEAT_S2PIE */ }; /** @@ -964,8 +986,6 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx) return mmu_idx | ARM_MMU_IDX_A; } -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx); - /* Return the MMU index for a v7M CPU in the specified security state */ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); @@ -1008,108 +1028,10 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu) } } -/* - * Return true if this address translation regime has two ranges. - * Note that this will not return the correct answer for AArch32 - * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is - * never called from a context where EL3 can be AArch32. (The - * correct return value for ARMMMUIdx_E3 would be different for - * that case, so we can't just make the function return the - * correct value anyway; we would need an extra "bool e3_is_aarch32" - * argument which all the current callsites would pass as 'false'.) - */ -static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - return true; - default: - return false; - } -} - -static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_E30_3_PAN: - return true; - default: - return false; - } -} - -static inline bool regime_is_stage2(ARMMMUIdx mmu_idx) -{ - return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; -} - -/* Return the exception level which controls this address translation regime */ -static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_E3: - case ARMMMUIdx_E30_0: - case ARMMMUIdx_E30_3_PAN: - return 3; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - } -} - -static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E30_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSUser: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MSUserNegPri: - return true; - default: - return false; - } -} - /* Return the SCTLR value which controls this address translation regime */ static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) { - return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; + return env->cp15.sctlr_el[regime_el(mmu_idx)]; } /* @@ -1141,13 +1063,13 @@ static inline uint64_t regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) v |= env->cp15.vtcr_el2 & VTCR_SHARED_FIELD_MASK; return v; } - return env->cp15.tcr_el[regime_el(env, mmu_idx)]; + return env->cp15.tcr_el[regime_el(mmu_idx)]; } /* Return true if the translation regime is using LPAE format page tables */ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) { - int el = regime_el(env, mmu_idx); + int el = regime_el(mmu_idx); if (el == 2 || arm_el_is_aa64(env, el)) { return true; } @@ -1170,7 +1092,7 @@ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) static inline int arm_num_brps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; } @@ -1184,7 +1106,7 @@ static inline int arm_num_brps(ARMCPU *cpu) static inline int arm_num_wrps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; } @@ -1198,7 +1120,7 @@ static inline int arm_num_wrps(ARMCPU *cpu) static inline int arm_num_ctx_cmps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, CTX_CMPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; } @@ -1274,6 +1196,11 @@ static inline const char *aarch32_mode_name(uint32_t psr) } /** + * arm_cpu_exec_interrupt(): Implementation of the cpu_exec_inrerrupt hook. + */ +bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request); + +/** * arm_cpu_update_virq: Update CPU_INTERRUPT_VIRQ bit in cs->interrupt_request * * Update the CPU_INTERRUPT_VIRQ bit in cs->interrupt_request, following @@ -1354,25 +1281,6 @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx); ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env); #endif -/** - * arm_mmu_idx_is_stage1_of_2: - * @mmu_idx: The ARMMMUIdx to test - * - * Return true if @mmu_idx is a NOTLB mmu_idx that is the - * first stage of a two stage regime. - */ -static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - return true; - default: - return false; - } -} - static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features, const ARMISARegisters *id) { @@ -1467,7 +1375,7 @@ static inline int arm_granule_bits(ARMGranuleSize gran) /* * Parameters of a given virtual address, as extracted from the - * translation control register (TCR) for a given regime. + * translation controls for a given regime. */ typedef struct ARMVAParameters { unsigned tsz : 8; @@ -1482,6 +1390,7 @@ typedef struct ARMVAParameters { bool ha : 1; bool hd : 1; ARMGranuleSize gran : 2; + bool pie : 1; } ARMVAParameters; /** @@ -1552,6 +1461,13 @@ typedef struct ARMCacheAttrs { typedef struct GetPhysAddrResult { CPUTLBEntryFull f; ARMCacheAttrs cacheattrs; + /* + * For ARMMMUIdx_Stage2*, the protection installed into f.prot + * is the result for AccessType_TTW, i.e. the page table walk itself. + * The protection installed info s2prot is the one to be merged + * with the stage1 protection. + */ + int s2prot; } GetPhysAddrResult; /** @@ -1583,30 +1499,27 @@ bool get_phys_addr(CPUARMState *env, vaddr address, __attribute__((nonnull)); /** - * get_phys_addr_with_space_nogpc: get the physical address for a virtual - * address + * get_phys_addr_for_at: * @env: CPUARMState * @address: virtual address to get physical address for - * @access_type: 0 for read, 1 for write, 2 for execute - * @memop: memory operation feeding this access, or 0 for none + * @prot_check: PAGE_{READ,WRITE,EXEC}, or 0 * @mmu_idx: MMU index indicating required translation regime * @space: security space for the access * @result: set on translation success. * @fi: set to fault info if the translation fails * - * Similar to get_phys_addr, but use the given security space and don't perform - * a Granule Protection Check on the resulting address. + * Similar to get_phys_addr, but for use by AccessType_AT, i.e. + * system instructions for address translation. */ -bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, - MMUAccessType access_type, MemOp memop, - ARMMMUIdx mmu_idx, ARMSecuritySpace space, - GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) +bool get_phys_addr_for_at(CPUARMState *env, vaddr address, unsigned prot_check, + ARMMMUIdx mmu_idx, ARMSecuritySpace space, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) __attribute__((nonnull)); bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool is_secure, GetPhysAddrResult *result, + MMUAccessType access_type, unsigned prot_check, + ARMMMUIdx mmu_idx, bool is_secure, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi, uint32_t *mregion); void arm_log_exception(CPUState *cs); @@ -1622,19 +1535,13 @@ FIELD(PREDDESC, OPRSZ, 0, 6) FIELD(PREDDESC, ESZ, 6, 2) FIELD(PREDDESC, DATA, 8, 24) -/* - * The SVE simd_data field, for memory ops, contains either - * rd (5 bits) or a shift count (2 bits). - */ -#define SVE_MTEDESC_SHIFT 5 - /* Bits within a descriptor passed to the helper_mte_check* functions. */ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, ALIGN, 9, 3) -FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */ +FIELD(MTEDESC, SIZEM1, 12, 32 - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); @@ -1806,10 +1713,12 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env) return (1ULL << 31) | ((1ULL << pmu_num_counters(env)) - 1); } -#ifdef TARGET_AARCH64 GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg); +GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cpu, int base_reg); int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg); +int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg); int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg); int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg); @@ -1824,7 +1733,12 @@ void aarch64_max_tcg_initfn(Object *obj); void aarch64_add_pauth_properties(Object *obj); void aarch64_add_sve_properties(Object *obj); void aarch64_add_sme_properties(Object *obj); -#endif + +/* Return true if the gdbstub is presenting an AArch64 CPU */ +static inline bool arm_gdbstub_is_aarch64(ARMCPU *cpu) +{ + return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); +} /* Read the CONTROL register as the MRS instruction would. */ uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure); @@ -1866,6 +1780,12 @@ void define_debug_regs(ARMCPU *cpu); /* Add the cpreg definitions for TLBI instructions */ void define_tlb_insn_regs(ARMCPU *cpu); +/* Add the cpreg definitions for AT instructions */ +void define_at_insn_regs(ARMCPU *cpu); +/* Add the cpreg definitions for PM cpregs */ +void define_pm_cpregs(ARMCPU *cpu); +/* Add the cpreg definitions for GCS cpregs */ +void define_gcs_cpregs(ARMCPU *cpu); /* Effective value of MDCR_EL2 */ static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env) @@ -1898,8 +1818,6 @@ static inline bool arm_fgt_active(CPUARMState *env, int el) (!arm_feature(env, ARM_FEATURE_EL3) || (env->cp15.scr_el3 & SCR_FGTEN)); } -void assert_hflags_rebuild_correctly(CPUARMState *env); - /* * Although the ARM implementation of hardware assisted debugging * allows for different breakpoints per-core, the current GDB @@ -1941,14 +1859,14 @@ extern GArray *hw_breakpoints, *hw_watchpoints; #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) -bool find_hw_breakpoint(CPUState *cpu, target_ulong pc); -int insert_hw_breakpoint(target_ulong pc); -int delete_hw_breakpoint(target_ulong pc); +bool find_hw_breakpoint(CPUState *cpu, vaddr pc); +int insert_hw_breakpoint(vaddr pc); +int delete_hw_breakpoint(vaddr pc); -bool check_watchpoint_in_range(int i, target_ulong addr); -CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); -int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); -int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); +bool check_watchpoint_in_range(int i, vaddr addr); +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr); +int insert_hw_watchpoint(vaddr addr, vaddr len, int type); +int delete_hw_watchpoint(vaddr addr, vaddr len, int type); /* Return the current value of the system counter in ticks */ uint64_t gt_get_countervalue(CPUARMState *env); @@ -1978,5 +1896,14 @@ void vfp_clear_float_status_exc_flags(CPUARMState *env); * specified by mask changing to the values in val. */ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask); +bool arm_pan_enabled(CPUARMState *env); +uint32_t cpsr_read_for_spsr_elx(CPUARMState *env); +void cpsr_write_from_spsr_elx(CPUARMState *env, uint32_t val); + +/* Compare uint64_t for qsort and bsearch. */ +int compare_u64(const void *a, const void *b); + +/* Used in FEAT_MEC to set the MECIDWidthm1 field in the MECIDR_EL2 register. */ +#define MECID_WIDTH 16 #endif diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h index c44d23d..54ae5da 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h @@ -160,9 +160,6 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); #define CP_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define CP_REG_ARM64_SYSREG_OP2_SHIFT 0 -/* No kernel define but it's useful to QEMU */ -#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT) - MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64); MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK); MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT); @@ -180,4 +177,15 @@ MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT); #undef MISMATCH_CHECK +#define KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) \ + (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \ + ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +#define KVMID_AA64_SYS_REG64(op0, op1, crn, crm, op2) \ + (KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) | CP_REG_SIZE_U64) + #endif diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index 965a486..c93462c 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -22,3 +22,105 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) { g_assert_not_reached(); } + +/* + * It's safe to call these functions without KVM support. + * They should either do nothing or return "not supported". + */ +bool kvm_arm_aarch32_supported(void) +{ + return false; +} + +bool kvm_arm_pmu_supported(void) +{ + return false; +} + +bool kvm_arm_sve_supported(void) +{ + return false; +} + +bool kvm_arm_mte_supported(void) +{ + return false; +} + +bool kvm_arm_el2_supported(void) +{ + return false; +} + +/* + * These functions should never actually be called without KVM support. + */ +void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_add_vcpu_properties(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) +{ + g_assert_not_reached(); +} + +int kvm_arm_vgic_probe(void) +{ + g_assert_not_reached(); +} + +void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) +{ + g_assert_not_reached(); +} + +void kvm_arm_pmu_init(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) +{ + g_assert_not_reached(); +} + +void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_enable_mte(Object *cpuobj, Error **errp) +{ + g_assert_not_reached(); +} + +void kvm_arm_reset_vcpu(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level) +{ + g_assert_not_reached(); +} + +void kvm_arm_cpu_pre_save(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +bool kvm_arm_cpu_post_load(ARMCPU *cpu) +{ + g_assert_not_reached(); +} diff --git a/target/arm/kvm.c b/target/arm/kvm.c index da30bdb..0d57081 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -26,11 +26,12 @@ #include "system/kvm_int.h" #include "kvm_arm.h" #include "cpu.h" +#include "cpu-sysregs.h" #include "trace.h" #include "internals.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "gdbstub/enums.h" #include "hw/boards.h" #include "hw/irq.h" @@ -100,8 +101,7 @@ static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); } -bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, - int *fdarray, +bool kvm_arm_create_scratch_host_vcpu(int *fdarray, struct kvm_vcpu_init *init) { int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; @@ -150,40 +150,13 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, struct kvm_vcpu_init preferred; ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred); - if (!ret) { - init->target = preferred.target; - } - } - if (ret >= 0) { - ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); if (ret < 0) { goto err; } - } else if (cpus_to_try) { - /* Old kernel which doesn't know about the - * PREFERRED_TARGET ioctl: we know it will only support - * creating one kind of guest CPU which is its preferred - * CPU type. - */ - struct kvm_vcpu_init try; - - while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { - try.target = *cpus_to_try++; - memcpy(try.features, init->features, sizeof(init->features)); - ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try); - if (ret >= 0) { - break; - } - } - if (ret < 0) { - goto err; - } - init->target = try.target; - } else { - /* Treat a NULL cpus_to_try argument the same as an empty - * list, which means we will fail the call since this must - * be an old kernel which doesn't support PREFERRED_TARGET. - */ + init->target = preferred.target; + } + ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); + if (ret < 0) { goto err; } @@ -246,6 +219,29 @@ static bool kvm_arm_pauth_supported(void) kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); } + +static uint64_t idregs_sysreg_to_kvm_reg(ARMSysRegs sysreg) +{ + return ARM64_SYS_REG((sysreg & CP_REG_ARM64_SYSREG_OP0_MASK) >> CP_REG_ARM64_SYSREG_OP0_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP1_MASK) >> CP_REG_ARM64_SYSREG_OP1_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRN_MASK) >> CP_REG_ARM64_SYSREG_CRN_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRM_MASK) >> CP_REG_ARM64_SYSREG_CRM_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP2_MASK) >> CP_REG_ARM64_SYSREG_OP2_SHIFT); +} + +/* read a sysreg value and store it in the idregs */ +static int get_host_cpu_reg(int fd, ARMHostCPUFeatures *ahcf, + ARMIDRegisterIdx index) +{ + uint64_t *reg; + int ret; + + reg = &ahcf->isar.idregs[index]; + ret = read_sys_reg64(fd, reg, + idregs_sysreg_to_kvm_reg(id_register_sysreg[index])); + return ret; +} + static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { /* Identify the feature bits corresponding to the host CPU, and @@ -255,21 +251,11 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) */ int fdarray[3]; bool sve_supported; + bool el2_supported; bool pmu_supported = false; uint64_t features = 0; int err; - /* Old kernels may not know about the PREFERRED_TARGET ioctl: however - * we know these will only support creating one kind of guest CPU, - * which is its preferred CPU type. Fortunately these old kernels - * support only a very limited number of CPUs. - */ - static const uint32_t cpus_to_try[] = { - KVM_ARM_TARGET_AEM_V8, - KVM_ARM_TARGET_FOUNDATION_V8, - KVM_ARM_TARGET_CORTEX_A57, - QEMU_KVM_ARM_TARGET_NONE - }; /* * target = -1 informs kvm_arm_create_scratch_host_vcpu() * to use the preferred target @@ -286,6 +272,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) } /* + * Ask for EL2 if supported. + */ + el2_supported = kvm_arm_el2_supported(); + if (el2_supported) { + init.features[0] |= 1 << KVM_ARM_VCPU_HAS_EL2; + } + + /* * Ask for Pointer Authentication if supported, so that we get * the unsanitized field values for AA64ISAR1_EL1. */ @@ -300,15 +294,15 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) features |= 1ULL << ARM_FEATURE_PMU; } - if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) { return false; } ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; + int fd = fdarray[2]; - err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, - ARM64_SYS_REG(3, 0, 0, 4, 0)); + err = get_host_cpu_reg(fd, ahcf, ID_AA64PFR0_EL1_IDX); if (unlikely(err < 0)) { /* * Before v4.15, the kernel only exposed a limited number of system @@ -326,31 +320,21 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * ??? Either of these sounds like too much effort just * to work around running a modern host kernel. */ - ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ + SET_IDREG(&ahcf->isar, ID_AA64PFR0, 0x00000011); /* EL1&0, AArch64 only */ err = 0; } else { - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, - ARM64_SYS_REG(3, 0, 0, 4, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, - ARM64_SYS_REG(3, 0, 0, 4, 5)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, - ARM64_SYS_REG(3, 0, 0, 5, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, - ARM64_SYS_REG(3, 0, 0, 5, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, - ARM64_SYS_REG(3, 0, 0, 6, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, - ARM64_SYS_REG(3, 0, 0, 6, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, - ARM64_SYS_REG(3, 0, 0, 6, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, - ARM64_SYS_REG(3, 0, 0, 7, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, - ARM64_SYS_REG(3, 0, 0, 7, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, - ARM64_SYS_REG(3, 0, 0, 7, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3, - ARM64_SYS_REG(3, 0, 0, 7, 3)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64SMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR3_EL1_IDX); /* * Note that if AArch32 support is not present in the host, @@ -359,49 +343,31 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * than skipping the reads and leaving 0, as we must avoid * considering the values in every case. */ - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, - ARM64_SYS_REG(3, 0, 0, 1, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, - ARM64_SYS_REG(3, 0, 0, 1, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, - ARM64_SYS_REG(3, 0, 0, 1, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, - ARM64_SYS_REG(3, 0, 0, 1, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, - ARM64_SYS_REG(3, 0, 0, 1, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, - ARM64_SYS_REG(3, 0, 0, 1, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, - ARM64_SYS_REG(3, 0, 0, 1, 7)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, - ARM64_SYS_REG(3, 0, 0, 2, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, - ARM64_SYS_REG(3, 0, 0, 2, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, - ARM64_SYS_REG(3, 0, 0, 2, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, - ARM64_SYS_REG(3, 0, 0, 2, 3)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, - ARM64_SYS_REG(3, 0, 0, 2, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, - ARM64_SYS_REG(3, 0, 0, 2, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, - ARM64_SYS_REG(3, 0, 0, 2, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, - ARM64_SYS_REG(3, 0, 0, 2, 7)); - - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, + err |= get_host_cpu_reg(fd, ahcf, ID_PFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR4_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR5_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR6_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR4_EL1_IDX); + + err |= read_sys_reg32(fd, &ahcf->isar.mvfr0, ARM64_SYS_REG(3, 0, 0, 3, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr1, ARM64_SYS_REG(3, 0, 0, 3, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr2, ARM64_SYS_REG(3, 0, 0, 3, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, - ARM64_SYS_REG(3, 0, 0, 3, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, - ARM64_SYS_REG(3, 0, 0, 3, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, - ARM64_SYS_REG(3, 0, 0, 3, 6)); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR5_EL1_IDX); /* * DBGDIDR is a bit complicated because the kernel doesn't @@ -413,14 +379,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. * We only do this if the CPU supports AArch32 at EL1. */ - if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { - int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); - int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + if (FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL1) >= 2) { + int wrps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, WRPS); + int brps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, BRPS); int ctx_cmps = - FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); + FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, CTX_CMPS); int version = 6; /* ARMv8 debug architecture */ bool has_el3 = - !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); + !!FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL3); uint32_t dbgdidr = 0; dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); @@ -435,7 +401,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (pmu_supported) { /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, + err |= read_sys_reg64(fd, &ahcf->isar.reset_pmcr_el0, ARM64_SYS_REG(3, 3, 9, 12, 0)); } @@ -447,8 +413,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * enabled SVE support, which resulted in an error rather than RAZ. * So only read the register if we set KVM_ARM_VCPU_SVE above. */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, - ARM64_SYS_REG(3, 0, 0, 4, 4)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ZFR0_EL1_IDX); } } @@ -468,6 +433,10 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) features |= 1ULL << ARM_FEATURE_AARCH64; features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; + if (el2_supported) { + features |= 1ULL << ARM_FEATURE_EL2; + } + ahcf->features = features; return true; @@ -750,17 +719,6 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, memory_region_ref(kd->mr); } -static int compare_u64(const void *a, const void *b) -{ - if (*(uint64_t *)a > *(uint64_t *)b) { - return 1; - } - if (*(uint64_t *)a < *(uint64_t *)b) { - return -1; - } - return 0; -} - /* * cpreg_values are sorted in ascending order by KVM register ID * (see kvm_arm_init_cpreg_list). This allows us to cheaply find @@ -932,6 +890,58 @@ bool write_kvmstate_to_list(ARMCPU *cpu) return ok; } +/* pretty-print a KVM register */ +#define CP_REG_ARM64_SYSREG_OP(_reg, _op) \ + ((uint8_t)((_reg & CP_REG_ARM64_SYSREG_ ## _op ## _MASK) >> \ + CP_REG_ARM64_SYSREG_ ## _op ## _SHIFT)) + +static gchar *kvm_print_sve_register_name(uint64_t regidx) +{ + uint16_t sve_reg = regidx & 0x000000000000ffff; + + if (regidx == KVM_REG_ARM64_SVE_VLS) { + return g_strdup_printf("SVE VLS"); + } + /* zreg, preg, ffr */ + switch (sve_reg & 0xfc00) { + case 0: + return g_strdup_printf("SVE zreg n:%d slice:%d", + (sve_reg & 0x03e0) >> 5, sve_reg & 0x001f); + case 0x04: + return g_strdup_printf("SVE preg n:%d slice:%d", + (sve_reg & 0x01e0) >> 5, sve_reg & 0x001f); + case 0x06: + return g_strdup_printf("SVE ffr slice:%d", sve_reg & 0x001f); + default: + return g_strdup_printf("SVE ???"); + } +} + +static gchar *kvm_print_register_name(uint64_t regidx) +{ + switch ((regidx & KVM_REG_ARM_COPROC_MASK)) { + case KVM_REG_ARM_CORE: + return g_strdup_printf("core reg %"PRIx64, regidx); + case KVM_REG_ARM_DEMUX: + return g_strdup_printf("demuxed reg %"PRIx64, regidx); + case KVM_REG_ARM64_SYSREG: + return g_strdup_printf("op0:%d op1:%d crn:%d crm:%d op2:%d", + CP_REG_ARM64_SYSREG_OP(regidx, OP0), + CP_REG_ARM64_SYSREG_OP(regidx, OP1), + CP_REG_ARM64_SYSREG_OP(regidx, CRN), + CP_REG_ARM64_SYSREG_OP(regidx, CRM), + CP_REG_ARM64_SYSREG_OP(regidx, OP2)); + case KVM_REG_ARM_FW: + return g_strdup_printf("fw reg %d", (int)(regidx & 0xffff)); + case KVM_REG_ARM64_SVE: + return kvm_print_sve_register_name(regidx); + case KVM_REG_ARM_FW_FEAT_BMAP: + return g_strdup_printf("fw feat reg %d", (int)(regidx & 0xffff)); + default: + return g_strdup_printf("%"PRIx64, regidx); + } +} + bool write_list_to_kvmstate(ARMCPU *cpu, int level) { CPUState *cs = CPU(cpu); @@ -959,11 +969,45 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) g_assert_not_reached(); } if (ret) { + gchar *reg_str = kvm_print_register_name(regidx); + /* We might fail for "unknown register" and also for * "you tried to set a register which is constant with * a different value from what it actually contains". */ ok = false; + switch (ret) { + case -ENOENT: + error_report("Could not set register %s: unknown to KVM", + reg_str); + break; + case -EINVAL: + if ((regidx & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + if (!kvm_get_one_reg(cs, regidx, &v32)) { + error_report("Could not set register %s to %x (is %x)", + reg_str, (uint32_t)cpu->cpreg_values[i], + v32); + } else { + error_report("Could not set register %s to %x", + reg_str, (uint32_t)cpu->cpreg_values[i]); + } + } else /* U64 */ { + uint64_t v64; + + if (!kvm_get_one_reg(cs, regidx, &v64)) { + error_report("Could not set register %s to %"PRIx64" (is %"PRIx64")", + reg_str, cpu->cpreg_values[i], v64); + } else { + error_report("Could not set register %s to %"PRIx64, + reg_str, cpu->cpreg_values[i]); + } + } + break; + default: + error_report("Could not set register %s: %s", + reg_str, strerror(-ret)); + } + g_free(reg_str); } } return ok; @@ -977,13 +1021,24 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu) } } -void kvm_arm_cpu_post_load(ARMCPU *cpu) +bool kvm_arm_cpu_post_load(ARMCPU *cpu) { + if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) { + return false; + } + /* Note that it's OK for the TCG side not to know about + * every register in the list; KVM is authoritative if + * we're using it. + */ + write_list_to_cpustate(cpu); + /* KVM virtual time adjustment */ if (cpu->kvm_adjvtime) { cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); cpu->kvm_vtime_dirty = true; } + + return true; } void kvm_arm_reset_vcpu(ARMCPU *cpu) @@ -1797,6 +1852,11 @@ bool kvm_arm_aarch32_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); } +bool kvm_arm_el2_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL2); +} + bool kvm_arm_sve_supported(void) { return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); @@ -1835,7 +1895,7 @@ uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) probed = true; - if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) { error_report("failed to create scratch VCPU with SVE enabled"); abort(); } @@ -1874,6 +1934,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; @@ -1882,8 +1947,7 @@ int kvm_arch_init_vcpu(CPUState *cs) CPUARMState *env = &cpu->env; uint64_t psciver; - if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || - !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { + if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) { error_report("KVM is not supported for this guest CPU type"); return -EINVAL; } @@ -1913,6 +1977,9 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); } + if (cpu->has_el2 && kvm_arm_el2_supported()) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_HAS_EL2; + } /* Do KVM_ARM_VCPU_INIT ioctl */ ret = kvm_arm_vcpu_init(cpu); @@ -2056,7 +2123,7 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { uint64_t val; uint32_t fpr; @@ -2366,10 +2433,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { ram_addr_t ram_addr; hwaddr paddr; + AcpiGhesState *ags; assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - if (acpi_ghes_present() && addr) { + ags = acpi_ghes_get_state(); + if (ags && addr) { ram_addr = qemu_ram_addr_from_host(addr); if (ram_addr != RAM_ADDR_INVALID && kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { @@ -2387,7 +2456,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) */ if (code == BUS_MCEERR_AR) { kvm_cpu_synchronize_state(c); - if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC, + paddr)) { kvm_inject_arm_sea(c); } else { error_report("failed to record the error"); @@ -2468,3 +2538,32 @@ void kvm_arm_enable_mte(Object *cpuobj, Error **errp) cpu->kvm_mte = true; } } + +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level) +{ + ARMCPU *cpu = arm_cpu; + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + uint32_t linestate_bit; + int irq_id; + + switch (irq) { + case ARM_CPU_IRQ: + irq_id = KVM_ARM_IRQ_CPU_IRQ; + linestate_bit = CPU_INTERRUPT_HARD; + break; + case ARM_CPU_FIQ: + irq_id = KVM_ARM_IRQ_CPU_FIQ; + linestate_bit = CPU_INTERRUPT_FIQ; + break; + default: + g_assert_not_reached(); + } + + if (level) { + env->irq_line_state |= linestate_bit; + } else { + env->irq_line_state &= ~linestate_bit; + } + kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level); +} diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 05c3de8..6a9b637 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -12,6 +12,7 @@ #define QEMU_KVM_ARM_H #include "system/kvm.h" +#include "target/arm/cpu-qom.h" #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) @@ -83,8 +84,10 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu); * @cpu: ARMCPU * * Called from cpu_post_load() to update KVM CPU state from the cpreg list. + * + * Returns: true on success, or false if write_list_to_kvmstate failed. */ -void kvm_arm_cpu_post_load(ARMCPU *cpu); +bool kvm_arm_cpu_post_load(ARMCPU *cpu); /** * kvm_arm_reset_vcpu: @@ -94,13 +97,9 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); -#ifdef CONFIG_KVM +struct kvm_vcpu_init; /** * kvm_arm_create_scratch_host_vcpu: - * @cpus_to_try: array of QEMU_KVM_ARM_TARGET_* values (terminated with - * QEMU_KVM_ARM_TARGET_NONE) to try as fallback if the kernel does not - * know the PREFERRED_TARGET ioctl. Passing NULL is the same as passing - * an empty array. * @fdarray: filled in with kvmfd, vmfd, cpufd file descriptors in that order * @init: filled in with the necessary values for creating a host * vcpu. If NULL is provided, will not init the vCPU (though the cpufd @@ -113,8 +112,7 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu); * Returns: true on success (and fdarray and init are filled in), * false on failure (and fdarray and init are not valid). */ -bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, - int *fdarray, +bool kvm_arm_create_scratch_host_vcpu(int *fdarray, struct kvm_vcpu_init *init); /** @@ -163,6 +161,14 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu); */ void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp); +/* + * These "is some KVM subfeature enabled?" functions may be called + * when KVM support is not present, including in the user-mode + * emulators. The kvm-stub.c file is only built into the system + * emulators, so for user-mode emulation we provide "always false" + * stubs here. + */ +#ifndef CONFIG_USER_ONLY /** * kvm_arm_aarch32_supported: * @@ -194,39 +200,13 @@ bool kvm_arm_sve_supported(void); bool kvm_arm_mte_supported(void); /** - * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle - * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case - * for legacy KVM. + * kvm_arm_el2_supported: * - * Returns the number of bits in the IPA address space supported by KVM + * Returns true if KVM can enable EL2 and false otherwise. */ -int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa); - -int kvm_arm_vgic_probe(void); - -void kvm_arm_pmu_init(ARMCPU *cpu); -void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq); - -/** - * kvm_arm_pvtime_init: - * @cpu: ARMCPU - * @ipa: Per-vcpu guest physical base address of the pvtime structures - * - * Initializes PVTIME for the VCPU, setting the PVTIME IPA to @ipa. - */ -void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa); - -int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - -void kvm_arm_enable_mte(Object *cpuobj, Error **errp); - +bool kvm_arm_el2_supported(void); #else -/* - * It's safe to call these functions without KVM support. - * They should either do nothing or return "not supported". - */ static inline bool kvm_arm_aarch32_supported(void) { return false; @@ -247,59 +227,40 @@ static inline bool kvm_arm_mte_supported(void) return false; } -/* - * These functions should never actually be called without KVM support. - */ -static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -static inline void kvm_arm_add_vcpu_properties(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) -{ - g_assert_not_reached(); -} - -static inline int kvm_arm_vgic_probe(void) +static inline bool kvm_arm_el2_supported(void) { - g_assert_not_reached(); + return false; } +#endif -static inline void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) -{ - g_assert_not_reached(); -} +/** + * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle + * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case + * for legacy KVM. + * + * Returns the number of bits in the IPA address space supported by KVM + */ +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa); -static inline void kvm_arm_pmu_init(ARMCPU *cpu) -{ - g_assert_not_reached(); -} +int kvm_arm_vgic_probe(void); -static inline void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) -{ - g_assert_not_reached(); -} +void kvm_arm_pmu_init(ARMCPU *cpu); +void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq); -static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) -{ - g_assert_not_reached(); -} +/** + * kvm_arm_pvtime_init: + * @cpu: ARMCPU + * @ipa: Per-vcpu guest physical base address of the pvtime structures + * + * Initializes PVTIME for the VCPU, setting the PVTIME IPA to @ipa. + */ +void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa); -static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) -{ - g_assert_not_reached(); -} +int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); -static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp) -{ - g_assert_not_reached(); -} +void kvm_arm_enable_mte(Object *cpuobj, Error **errp); -#endif +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level); #endif diff --git a/target/arm/machine.c b/target/arm/machine.c index 978249f..44a0cf8 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -6,7 +6,8 @@ #include "kvm_arm.h" #include "internals.h" #include "cpu-features.h" -#include "migration/cpu.h" +#include "migration/qemu-file-types.h" +#include "migration/vmstate.h" #include "target/arm/gtimer.h" static bool vfp_needed(void *opaque) @@ -220,27 +221,6 @@ static const VMStateDescription vmstate_vfp = { } }; -static bool iwmmxt_needed(void *opaque) -{ - ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - - return arm_feature(env, ARM_FEATURE_IWMMXT); -} - -static const VMStateDescription vmstate_iwmmxt = { - .name = "cpu/iwmmxt", - .version_id = 1, - .minimum_version_id = 1, - .needed = iwmmxt_needed, - .fields = (const VMStateField[]) { - VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16), - VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16), - VMSTATE_END_OF_LIST() - } -}; - -#ifdef TARGET_AARCH64 /* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build, * and ARMPredicateReg is actively empty. This triggers errors * in the expansion of the VMSTATE macros. @@ -315,12 +295,30 @@ static const VMStateDescription vmstate_za = { .minimum_version_id = 1, .needed = za_needed, .fields = (const VMStateField[]) { - VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0, + VMSTATE_STRUCT_ARRAY(env.za_state.za, ARMCPU, ARM_MAX_VQ * 16, 0, vmstate_vreg, ARMVectorReg), VMSTATE_END_OF_LIST() } }; -#endif /* AARCH64 */ + +static bool zt0_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + return za_needed(cpu) && cpu_isar_feature(aa64_sme2, cpu); +} + +static const VMStateDescription vmstate_zt0 = { + .name = "cpu/zt0", + .version_id = 1, + .minimum_version_id = 1, + .needed = zt0_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.za_state.zt0, ARMCPU, + ARRAY_SIZE(((CPUARMState *)0)->za_state.zt0)), + VMSTATE_END_OF_LIST() + } +}; static bool serror_needed(void *opaque) { @@ -818,6 +816,80 @@ static const VMStateInfo vmstate_cpsr = { .put = put_cpsr, }; +static int get_pstate64(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val = qemu_get_be64(f); + + env->aarch64 = ((val & PSTATE_nRW) == 0); + if (is_a64(env)) { + pstate_write(env, val); + } else { + cpsr_write_from_spsr_elx(env, val); + } + return 0; +} + +static int put_pstate64(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val; + + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read_for_spsr_elx(env); + } + qemu_put_be64(f, val); + return 0; +} + +static bool pstate64_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val; + + if (arm_feature(env, ARM_FEATURE_M)) { + return false; + } + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read_for_spsr_elx(env); + if (val & PSTATE_SS) { + return true; + } + } + return val > UINT32_MAX; +} + +static const VMStateDescription vmstate_pstate64 = { + .name = "cpu/pstate64", + .version_id = 1, + .minimum_version_id = 1, + .needed = pstate64_needed, + .fields = (const VMStateField[]) { + { + .name = "pstate64", + .version_id = 0, + .size = sizeof(uint64_t), + .info = &(const VMStateInfo) { + .name = "pstate64", + .get = get_pstate64, + .put = put_pstate64, + }, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + }, +}; + static int get_power(QEMUFile *f, void *opaque, size_t size, const VMStateField *field) { @@ -850,6 +922,23 @@ static const VMStateInfo vmstate_powered_off = { .put = put_power, }; +static bool syndrome64_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + return cpu->env.exception.syndrome > UINT32_MAX; +} + +static const VMStateDescription vmstate_syndrome64 = { + .name = "cpu/syndrome64", + .version_id = 1, + .minimum_version_id = 1, + .needed = syndrome64_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.exception.syndrome, ARMCPU), + VMSTATE_END_OF_LIST() + }, +}; + static int cpu_pre_save(void *opaque) { ARMCPU *cpu = opaque; @@ -977,15 +1066,9 @@ static int cpu_post_load(void *opaque, int version_id) } if (kvm_enabled()) { - if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) { + if (!kvm_arm_cpu_post_load(cpu)) { return -1; } - /* Note that it's OK for the TCG side not to know about - * every register in the list; KVM is authoritative if - * we're using it. - */ - write_list_to_cpustate(cpu); - kvm_arm_cpu_post_load(cpu); } else { if (!write_list_to_cpustate(cpu)) { return -1; @@ -1043,6 +1126,12 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16), VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32), VMSTATE_UINT64(env.pc, ARMCPU), + /* + * If any bits are set in the upper 32 bits of cpsr/pstate, + * or if the cpu is in aa32 mode and PSTATE.SS is set, then + * the cpu/pstate64 subsection will override this with the + * full 64 bit state. + */ { .name = "cpsr", .version_id = 0, @@ -1073,7 +1162,19 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_UINT64(env.exclusive_val, ARMCPU), VMSTATE_UINT64(env.exclusive_high, ARMCPU), VMSTATE_UNUSED(sizeof(uint64_t)), - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + /* + * If any bits are set in the upper 32 bits of syndrome, + * then the cpu/syndrome64 subsection will override this + * with the full 64 bit state. + */ + { + .name = "env.exception.syndrome", + .version_id = 0, + .size = sizeof(uint32_t), + .info = &vmstate_info_uint32, + .flags = VMS_SINGLE, + .offset = offsetoflow32(ARMCPU, env.exception.syndrome), + }, VMSTATE_UINT32(env.exception.fsr, ARMCPU), VMSTATE_UINT64(env.exception.vaddress, ARMCPU), VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), @@ -1090,7 +1191,6 @@ const VMStateDescription vmstate_arm_cpu = { }, .subsections = (const VMStateDescription * const []) { &vmstate_vfp, - &vmstate_iwmmxt, &vmstate_m, &vmstate_thumb2ee, /* pmsav7_rnr must come before pmsav7 so that we have the @@ -1101,13 +1201,14 @@ const VMStateDescription vmstate_arm_cpu = { &vmstate_pmsav7, &vmstate_pmsav8, &vmstate_m_security, -#ifdef TARGET_AARCH64 &vmstate_sve, &vmstate_za, -#endif + &vmstate_zt0, &vmstate_serror, &vmstate_irq_line_state, &vmstate_wfxt_timer, + &vmstate_syndrome64, + &vmstate_pstate64, NULL } }; diff --git a/target/arm/meson.build b/target/arm/meson.build index 3065081..3df7e03 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -1,41 +1,73 @@ arm_ss = ss.source_set() +arm_common_ss = ss.source_set() arm_ss.add(files( - 'cpu.c', - 'debug_helper.c', 'gdbstub.c', - 'helper.c', - 'vfp_fpscr.c', )) -arm_ss.add(zlib) - -arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'), if_false: files('kvm-stub.c')) -arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', - 'gdbstub64.c', + 'gdbstub64.c' +)) + +arm_common_ss.add(files( + 'mmuidx.c', )) arm_system_ss = ss.source_set() +arm_common_system_ss = ss.source_set() arm_system_ss.add(files( + 'arm-qmp-cmds.c', +)) +arm_system_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c')) +arm_system_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) + +arm_user_ss = ss.source_set() +arm_user_ss.add(files('cpu.c')) +arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files( + 'cpu32-stubs.c', +)) +arm_user_ss.add(files( + 'cpregs-gcs.c', + 'cpregs-pmu.c', + 'debug_helper.c', + 'helper.c', + 'vfp_fpscr.c', + 'el2-stubs.c', +)) +arm_user_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) + +arm_common_system_ss.add(files('cpu.c')) +arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( + 'cpu32-stubs.c')) +arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) +arm_common_system_ss.add(files( 'arch_dump.c', 'arm-powerctl.c', - 'arm-qmp-cmds.c', 'cortex-regs.c', + 'cpregs-gcs.c', + 'cpregs-pmu.c', + 'cpu-irq.c', + 'debug_helper.c', + 'helper.c', 'machine.c', 'ptw.c', + 'vfp_fpscr.c', )) -arm_user_ss = ss.source_set() - subdir('hvf') if 'CONFIG_TCG' in config_all_accel subdir('tcg') else - arm_ss.add(files('tcg-stubs.c')) + arm_common_system_ss.add(files('tcg-stubs.c')) endif target_arch += {'arm': arm_ss} target_system_arch += {'arm': arm_system_ss} target_user_arch += {'arm': arm_user_ss} +target_common_arch += {'arm': arm_common_ss} +target_common_system_arch += {'arm': arm_common_system_ss} diff --git a/target/arm/mmuidx-internal.h b/target/arm/mmuidx-internal.h new file mode 100644 index 0000000..962b053 --- /dev/null +++ b/target/arm/mmuidx-internal.h @@ -0,0 +1,113 @@ +/* + * QEMU Arm software mmu index internal definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_ARM_MMUIDX_INTERNAL_H +#define TARGET_ARM_MMUIDX_INTERNAL_H + +#include "mmuidx.h" +#include "tcg/debug-assert.h" +#include "hw/registerfields.h" + + +FIELD(MMUIDXINFO, EL, 0, 2) +FIELD(MMUIDXINFO, ELVALID, 2, 1) +FIELD(MMUIDXINFO, REL, 3, 2) +FIELD(MMUIDXINFO, RELVALID, 5, 1) +FIELD(MMUIDXINFO, 2RANGES, 6, 1) +FIELD(MMUIDXINFO, PAN, 7, 1) +FIELD(MMUIDXINFO, USER, 8, 1) +FIELD(MMUIDXINFO, STAGE1, 9, 1) +FIELD(MMUIDXINFO, STAGE2, 10, 1) +FIELD(MMUIDXINFO, GCS, 11, 1) +FIELD(MMUIDXINFO, TG, 12, 5) + +extern const uint32_t arm_mmuidx_table[ARM_MMU_IDX_M + 8]; + +#define arm_mmuidx_is_valid(x) ((unsigned)(x) < ARRAY_SIZE(arm_mmuidx_table)) + +/* Return the exception level associated with this mmu index. */ +static inline int arm_mmu_idx_to_el(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + tcg_debug_assert(FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, ELVALID)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, EL); +} + +/* + * Return the exception level for the address translation regime + * associated with this mmu index. + */ +static inline uint32_t regime_el(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + tcg_debug_assert(FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, RELVALID)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, REL); +} + +/* + * Return true if this address translation regime has two ranges. + * Note that this will not return the correct answer for AArch32 + * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is + * never called from a context where EL3 can be AArch32. (The + * correct return value for ARMMMUIdx_E3 would be different for + * that case, so we can't just make the function return the + * correct value anyway; we would need an extra "bool e3_is_aarch32" + * argument which all the current callsites would pass as 'false'.) + */ +static inline bool regime_has_2_ranges(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, 2RANGES); +} + +/* Return true if Privileged Access Never is enabled for this mmu index. */ +static inline bool regime_is_pan(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, PAN); +} + +/* + * Return true if the exception level associated with this mmu index is 0. + * Differs from arm_mmu_idx_to_el(idx) == 0 in that this allows querying + * Stage1 and Stage2 mmu indexes. + */ +static inline bool regime_is_user(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, USER); +} + +/* Return true if this mmu index is stage 1 of a 2-stage translation. */ +static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, STAGE1); +} + +/* Return true if this mmu index is stage 2 of a 2-stage translation. */ +static inline bool regime_is_stage2(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, STAGE2); +} + +/* Return true if this mmu index implies AccessType_GCS. */ +static inline bool regime_is_gcs(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, GCS); +} + +/* Return the GCS MMUIdx for a given regime. */ +static inline ARMMMUIdx regime_to_gcs(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + uint32_t core = FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, TG); + tcg_debug_assert(core != 0); /* core 0 is E10_0, not a GCS index */ + return core | ARM_MMU_IDX_A; +} + +#endif /* TARGET_ARM_MMUIDX_INTERNAL_H */ diff --git a/target/arm/mmuidx.c b/target/arm/mmuidx.c new file mode 100644 index 0000000..a4663c8 --- /dev/null +++ b/target/arm/mmuidx.c @@ -0,0 +1,66 @@ +/* + * QEMU Arm software mmu index definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "mmuidx-internal.h" + + +#define EL(X) ((X << R_MMUIDXINFO_EL_SHIFT) | R_MMUIDXINFO_ELVALID_MASK | \ + ((X == 0) << R_MMUIDXINFO_USER_SHIFT)) +#define REL(X) ((X << R_MMUIDXINFO_REL_SHIFT) | R_MMUIDXINFO_RELVALID_MASK) +#define R2 R_MMUIDXINFO_2RANGES_MASK +#define PAN R_MMUIDXINFO_PAN_MASK +#define USER R_MMUIDXINFO_USER_MASK +#define S1 R_MMUIDXINFO_STAGE1_MASK +#define S2 R_MMUIDXINFO_STAGE2_MASK +#define GCS R_MMUIDXINFO_GCS_MASK +#define TG(X) \ + ((ARMMMUIdx_##X##_GCS & ARM_MMU_IDX_COREIDX_MASK) << R_MMUIDXINFO_TG_SHIFT) + +const uint32_t arm_mmuidx_table[ARM_MMU_IDX_M + 8] = { + /* + * A-profile. + */ + [ARMMMUIdx_E10_0] = EL(0) | REL(1) | R2 | TG(E10_0), + [ARMMMUIdx_E10_0_GCS] = EL(0) | REL(1) | R2 | GCS, + [ARMMMUIdx_E10_1] = EL(1) | REL(1) | R2 | TG(E10_1), + [ARMMMUIdx_E10_1_PAN] = EL(1) | REL(1) | R2 | TG(E10_1) | PAN, + [ARMMMUIdx_E10_1_GCS] = EL(1) | REL(1) | R2 | GCS, + + [ARMMMUIdx_E20_0] = EL(0) | REL(2) | R2 | TG(E20_0), + [ARMMMUIdx_E20_0_GCS] = EL(0) | REL(2) | R2 | GCS, + [ARMMMUIdx_E20_2] = EL(2) | REL(2) | R2 | TG(E20_2), + [ARMMMUIdx_E20_2_PAN] = EL(2) | REL(2) | R2 | TG(E20_2) | PAN, + [ARMMMUIdx_E20_2_GCS] = EL(2) | REL(2) | R2 | GCS, + + [ARMMMUIdx_E2] = EL(2) | REL(2) | TG(E2), + [ARMMMUIdx_E2_GCS] = EL(2) | REL(2) | GCS, + + [ARMMMUIdx_E3] = EL(3) | REL(3) | TG(E3), + [ARMMMUIdx_E3_GCS] = EL(3) | REL(3) | GCS, + [ARMMMUIdx_E30_0] = EL(0) | REL(3), + [ARMMMUIdx_E30_3_PAN] = EL(3) | REL(3) | PAN, + + [ARMMMUIdx_Stage2_S] = REL(2) | S2, + [ARMMMUIdx_Stage2] = REL(2) | S2, + + [ARMMMUIdx_Stage1_E0] = REL(1) | R2 | S1 | USER | TG(Stage1_E0), + [ARMMMUIdx_Stage1_E0_GCS] = REL(1) | R2 | S1 | USER | GCS, + [ARMMMUIdx_Stage1_E1] = REL(1) | R2 | S1 | TG(Stage1_E1), + [ARMMMUIdx_Stage1_E1_PAN] = REL(1) | R2 | S1 | TG(Stage1_E1) | PAN, + [ARMMMUIdx_Stage1_E1_GCS] = REL(1) | R2 | S1 | GCS, + + /* + * M-profile. + */ + [ARMMMUIdx_MUser] = EL(0) | REL(1), + [ARMMMUIdx_MPriv] = EL(1) | REL(1), + [ARMMMUIdx_MUserNegPri] = EL(0) | REL(1), + [ARMMMUIdx_MPrivNegPri] = EL(1) | REL(1), + [ARMMMUIdx_MSUser] = EL(0) | REL(1), + [ARMMMUIdx_MSPriv] = EL(1) | REL(1), + [ARMMMUIdx_MSUserNegPri] = EL(0) | REL(1), + [ARMMMUIdx_MSPrivNegPri] = EL(1) | REL(1), +}; diff --git a/target/arm/mmuidx.h b/target/arm/mmuidx.h new file mode 100644 index 0000000..8d8d273 --- /dev/null +++ b/target/arm/mmuidx.h @@ -0,0 +1,241 @@ +/* + * QEMU Arm software mmu index definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_ARM_MMUIDX_H +#define TARGET_ARM_MMUIDX_H + +/* + * Arm has the following "translation regimes" (as the Arm ARM calls them): + * + * If EL3 is 64-bit: + * + NonSecure EL1 & 0 stage 1 + * + NonSecure EL1 & 0 stage 2 + * + NonSecure EL2 + * + NonSecure EL2 & 0 (ARMv8.1-VHE) + * + Secure EL1 & 0 stage 1 + * + Secure EL1 & 0 stage 2 (FEAT_SEL2) + * + Secure EL2 (FEAT_SEL2) + * + Secure EL2 & 0 (FEAT_SEL2) + * + Realm EL1 & 0 stage 1 (FEAT_RME) + * + Realm EL1 & 0 stage 2 (FEAT_RME) + * + Realm EL2 (FEAT_RME) + * + EL3 + * If EL3 is 32-bit: + * + NonSecure PL1 & 0 stage 1 + * + NonSecure PL1 & 0 stage 2 + * + NonSecure PL2 + * + Secure PL1 & 0 + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) + * + * For QEMU, an mmu_idx is not quite the same as a translation regime because: + * 1. we need to split the "EL1 & 0" and "EL2 & 0" regimes into two mmu_idxes, + * because they may differ in access permissions even if the VA->PA map is + * the same + * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 + * translation, which means that we have one mmu_idx that deals with two + * concatenated translation regimes [this sort of combined s1+2 TLB is + * architecturally permitted] + * 3. we don't need to allocate an mmu_idx to translations that we won't be + * handling via the TLB. The only way to do a stage 1 translation without + * the immediate stage 2 translation is via the ATS or AT system insns, + * which can be slow-pathed and always do a page table walk. + * The only use of stage 2 translations is either as part of an s1+2 + * lookup or when loading the descriptors during a stage 1 page table walk, + * and in both those cases we don't use the TLB. + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" + * translation regimes, because they map reasonably well to each other + * and they can't both be active at the same time. + * 5. we want to be able to use the TLB for accesses done as part of a + * stage1 page table walk, rather than having to walk the stage2 page + * table over and over. + * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access + * Never (PAN) bit within PSTATE. + * 7. we fold together most secure and non-secure regimes for A-profile, + * because there are no banked system registers for aarch64, so the + * process of switching between secure and non-secure is + * already heavyweight. + * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, + * because both are in use simultaneously for Secure EL2. + * 9. we need separate indexes for handling AccessType_GCS. + * + * This gives us the following list of cases: + * + * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) + * EL0 EL1&0 stage 1+2 +GCS + * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) + * EL1 EL1&0 stage 1+2 +GCS + * EL0 EL2&0 + * EL0 EL2&0 +GCS + * EL2 EL2&0 + * EL2 EL2&0 +PAN + * EL2 EL2&0 +GCS + * EL2 (aka NS PL2) + * EL2 +GCS + * EL3 (aka AArch32 S PL1 PL1&0) + * EL3 +GCS + * AArch32 S PL0 PL1&0 (we call this EL30_0) + * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) + * Stage2 Secure + * Stage2 NonSecure + * plus one TLB per Physical address space: S, NS, Realm, Root + * + * for a total of 22 different mmu_idx. + * + * R profile CPUs have an MPU, but can use the same set of MMU indexes + * as A profile. They only need to distinguish EL0 and EL1 (and + * EL2 for cores like the Cortex-R52). + * + * M profile CPUs are rather different as they do not have a true MMU. + * They have the following different MMU indexes: + * User + * Privileged + * User, execution priority negative (ie the MPU HFNMIENA bit may apply) + * Privileged, execution priority negative (ditto) + * If the CPU supports the v8M Security Extension then there are also: + * Secure User + * Secure Privileged + * Secure User, execution priority negative + * Secure Privileged, execution priority negative + * + * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code + * are not quite the same -- different CPU types (most notably M profile + * vs A/R profile) would like to use MMU indexes with different semantics, + * but since we don't ever need to use all of those in a single CPU we + * can avoid having to set NB_MMU_MODES to "total number of A profile MMU + * modes + total number of M profile MMU modes". The lower bits of + * ARMMMUIdx are the core TLB mmu index, and the higher bits are always + * the same for any particular CPU. + * Variables of type ARMMUIdx are always full values, and the core + * index values are in variables of type 'int'. + * + * Our enumeration includes at the end some entries which are not "true" + * mmu_idx values in that they don't have corresponding TLBs and are only + * valid for doing slow path page table walks. + * + * The constant names here are patterned after the general style of the names + * of the AT/ATS operations. + * The values used are carefully arranged to make mmu_idx => EL lookup easy. + * For M profile we arrange them to have a bit for priv, a bit for negpri + * and a bit for secure. + */ +#define ARM_MMU_IDX_A 0x20 /* A profile */ +#define ARM_MMU_IDX_NOTLB 0x40 /* does not have a TLB */ +#define ARM_MMU_IDX_M 0x80 /* M profile */ + +/* Meanings of the bits for M profile mmu idx values */ +#define ARM_MMU_IDX_M_PRIV 0x1 +#define ARM_MMU_IDX_M_NEGPRI 0x2 +#define ARM_MMU_IDX_M_S 0x4 /* Secure */ + +#define ARM_MMU_IDX_TYPE_MASK \ + (ARM_MMU_IDX_A | ARM_MMU_IDX_M | ARM_MMU_IDX_NOTLB) +#define ARM_MMU_IDX_COREIDX_MASK 0x1f + +typedef enum ARMMMUIdx { + /* + * A-profile. + */ + + ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_0_GCS = 1 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_PAN = 3 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_GCS = 4 | ARM_MMU_IDX_A, + + ARMMMUIdx_E20_0 = 5 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_0_GCS = 6 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2 = 7 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_PAN = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_GCS = 9 | ARM_MMU_IDX_A, + + ARMMMUIdx_E2 = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_E2_GCS = 11 | ARM_MMU_IDX_A, + + ARMMMUIdx_E3 = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_E3_GCS = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_0 = 14 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_3_PAN = 15 | ARM_MMU_IDX_A, + + /* + * Used for second stage of an S12 page table walk, or for descriptor + * loads during first stage of an S1 page table walk. Note that both + * are in use simultaneously for SecureEL2: the security state for + * the S2 ptw is selected by the NS bit from the S1 ptw. + */ + ARMMMUIdx_Stage2_S = 16 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 17 | ARM_MMU_IDX_A, + + /* TLBs with 1-1 mapping to the physical address spaces. */ + ARMMMUIdx_Phys_S = 18 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 19 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 20 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 21 | ARM_MMU_IDX_A, + + /* + * These are not allocated TLBs and are used only for AT system + * instructions or for the first stage of an S12 page table walk. + */ + ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E0_GCS = 3 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1_GCS = 4 | ARM_MMU_IDX_NOTLB, + + /* + * M-profile. + */ + ARMMMUIdx_MUser = ARM_MMU_IDX_M, + ARMMMUIdx_MPriv = ARM_MMU_IDX_M | ARM_MMU_IDX_M_PRIV, + ARMMMUIdx_MUserNegPri = ARMMMUIdx_MUser | ARM_MMU_IDX_M_NEGPRI, + ARMMMUIdx_MPrivNegPri = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_NEGPRI, + ARMMMUIdx_MSUser = ARMMMUIdx_MUser | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSPriv = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSUserNegPri = ARMMMUIdx_MUserNegPri | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSPrivNegPri = ARMMMUIdx_MPrivNegPri | ARM_MMU_IDX_M_S, +} ARMMMUIdx; + +/* + * Bit macros for the core-mmu-index values for each index, + * for use when calling tlb_flush_by_mmuidx() and friends. + */ +#define TO_CORE_BIT(NAME) \ + ARMMMUIdxBit_##NAME = 1 << (ARMMMUIdx_##NAME & ARM_MMU_IDX_COREIDX_MASK) + +typedef enum ARMMMUIdxBit { + TO_CORE_BIT(E10_0), + TO_CORE_BIT(E10_0_GCS), + TO_CORE_BIT(E10_1), + TO_CORE_BIT(E10_1_PAN), + TO_CORE_BIT(E10_1_GCS), + TO_CORE_BIT(E20_0), + TO_CORE_BIT(E20_0_GCS), + TO_CORE_BIT(E20_2), + TO_CORE_BIT(E20_2_PAN), + TO_CORE_BIT(E20_2_GCS), + TO_CORE_BIT(E2), + TO_CORE_BIT(E2_GCS), + TO_CORE_BIT(E3), + TO_CORE_BIT(E3_GCS), + TO_CORE_BIT(E30_0), + TO_CORE_BIT(E30_3_PAN), + TO_CORE_BIT(Stage2), + TO_CORE_BIT(Stage2_S), + + TO_CORE_BIT(MUser), + TO_CORE_BIT(MPriv), + TO_CORE_BIT(MUserNegPri), + TO_CORE_BIT(MPrivNegPri), + TO_CORE_BIT(MSUser), + TO_CORE_BIT(MSPriv), + TO_CORE_BIT(MSUserNegPri), + TO_CORE_BIT(MSPrivNegPri), +} ARMMMUIdxBit; + +#undef TO_CORE_BIT + +#define MMU_USER_IDX 0 + +#endif /* TARGET_ARM_MMUIDX_H */ diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 4330900..d4386ed 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -10,8 +10,10 @@ #include "qemu/log.h" #include "qemu/range.h" #include "qemu/main-loop.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" +#include "accel/tcg/probe.h" #include "cpu.h" #include "internals.h" #include "cpu-features.h" @@ -34,8 +36,6 @@ typedef struct S1Translate { /* * in_space: the security space for this walk. This plus * the in_mmu_idx specify the architectural translation regime. - * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit, - * this field is updated accordingly. * * Note that the security space for the in_ptw_idx may be different * from that for the in_mmu_idx. We do not need to explicitly track @@ -51,17 +51,36 @@ typedef struct S1Translate { */ ARMSecuritySpace in_space; /* + * Like in_space, except this may be "downgraded" to NonSecure + * by an NSTable bit. + */ + ARMSecuritySpace cur_space; + /* * in_debug: is this a QEMU debug access (gdbstub, etc)? Debug * accesses will not update the guest page table access flags * and will not change the state of the softmmu TLBs. */ bool in_debug; /* + * in_at: is this AccessType_AT? + * This is also set for debug, because at heart that is also + * an address translation, and simplifies a test. + */ + bool in_at; + /* * If this is stage 2 of a stage 1+2 page table walk, then this must * be true if stage 1 is an EL0 access; otherwise this is ignored. * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}. */ bool in_s1_is_el0; + /* + * The set of PAGE_* bits to be use in the permission check. + * This is normally directly related to the access_type, but + * may be suppressed for debug or AT insns. + */ + uint8_t in_prot_check; + /* Cached EffectiveHCR_EL2_NVx() bit */ + bool in_nv1; bool out_rw; bool out_be; ARMSecuritySpace out_space; @@ -120,7 +139,7 @@ unsigned int arm_pamax(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { unsigned int parange = - FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); /* * id_aa64mmfr0 is a read-only register so values outside of the @@ -150,6 +169,10 @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) return ARMMMUIdx_Stage1_E1; case ARMMMUIdx_E10_1_PAN: return ARMMMUIdx_Stage1_E1_PAN; + case ARMMMUIdx_E10_0_GCS: + return ARMMMUIdx_Stage1_E0_GCS; + case ARMMMUIdx_E10_1_GCS: + return ARMMMUIdx_Stage1_E1_GCS; default: return mmu_idx; } @@ -191,9 +214,9 @@ static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx) return ARMMMUIdx_Phys_Realm; case ARMSS_Secure: if (stage2idx == ARMMMUIdx_Stage2_S) { - s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); + s2walk_secure = !(env->cp15.vstcr_el2 & R_VSTCR_SW_MASK); } else { - s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); + s2walk_secure = !(env->cp15.vtcr_el2 & R_VTCR_NSW_MASK); } return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; default: @@ -216,9 +239,9 @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) return env->cp15.vsttbr_el2; } if (ttbrn == 0) { - return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; + return env->cp15.ttbr0_el[regime_el(mmu_idx)]; } else { - return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; + return env->cp15.ttbr1_el[regime_el(mmu_idx)]; } } @@ -257,8 +280,10 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_0_GCS: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E10_1_GCS: /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */ hcr_el2 = arm_hcr_el2_eff_secstate(env, space); if (hcr_el2 & HCR_TGE) { @@ -267,8 +292,10 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, break; case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E0_GCS: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_Stage1_E1_GCS: /* HCR.DC means SCTLR_EL1.M behaves as 0 */ hcr_el2 = arm_hcr_el2_eff_secstate(env, space); if (hcr_el2 & HCR_DC) { @@ -277,10 +304,14 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, break; case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_0_GCS: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E20_2_GCS: case ARMMMUIdx_E2: + case ARMMMUIdx_E2_GCS: case ARMMMUIdx_E3: + case ARMMMUIdx_E3_GCS: case ARMMMUIdx_E30_0: case ARMMMUIdx_E30_3_PAN: break; @@ -301,6 +332,7 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, static bool granule_protection_check(CPUARMState *env, uint64_t paddress, ARMSecuritySpace pspace, + ARMSecuritySpace ss, ARMMMUFaultInfo *fi) { MemTxAttrs attrs = { @@ -330,7 +362,7 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, * physical address size is invalid. */ pps = FIELD_EX64(gpccr, GPCCR, PPS); - if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) { + if (pps > FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE)) { goto fault_walk; } pps = pamax_map[pps]; @@ -369,18 +401,37 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, l0gptsz = 30 + FIELD_EX64(gpccr, GPCCR, L0GPTSZ); /* - * GPC Priority 2: Secure, Realm or Root address exceeds PPS. + * GPC Priority 2: Access to Secure, NonSecure or Realm is prevented + * by one of the GPCCR_EL3 address space disable bits (R_TCWMD). + * All of these bits are checked vs aa64_rme_gpc2 in gpccr_write. + */ + { + static const uint8_t disable_masks[4] = { + [ARMSS_Secure] = R_GPCCR_SPAD_MASK, + [ARMSS_NonSecure] = R_GPCCR_NSPAD_MASK, + [ARMSS_Root] = 0, + [ARMSS_Realm] = R_GPCCR_RLPAD_MASK, + }; + + if (gpccr & disable_masks[pspace]) { + goto fault_fail; + } + } + + /* + * GPC Priority 3: Secure, Realm or Root address exceeds PPS. * R_CPDSB: A NonSecure physical address input exceeding PPS * does not experience any fault. + * R_PBPSH: Other address spaces have fault suppressed by APPSAA. */ if (paddress & ~pps_mask) { - if (pspace == ARMSS_NonSecure) { + if (pspace == ARMSS_NonSecure || FIELD_EX64(gpccr, GPCCR, APPSAA)) { return true; } - goto fault_size; + goto fault_fail; } - /* GPC Priority 3: the base address of GPTBR_EL3 exceeds PPS. */ + /* GPC Priority 4: the base address of GPTBR_EL3 exceeds PPS. */ tableaddr = env->cp15.gptbr_el3 << 12; if (tableaddr & ~pps_mask) { goto fault_size; @@ -461,18 +512,30 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, break; case 0b1111: /* all access */ return true; - case 0b1000: - case 0b1001: - case 0b1010: - case 0b1011: + case 0b1000: /* secure */ + if (!cpu_isar_feature(aa64_sel2, cpu)) { + goto fault_walk; + } + /* fall through */ + case 0b1001: /* non-secure */ + case 0b1010: /* root */ + case 0b1011: /* realm */ if (pspace == (gpi & 3)) { return true; } break; + case 0b1101: /* non-secure only */ + /* aa64_rme_gpc2 was checked in gpccr_write */ + if (FIELD_EX64(gpccr, GPCCR, NSO)) { + return (pspace == ARMSS_NonSecure && + (ss == ARMSS_NonSecure || ss == ARMSS_Root)); + } + goto fault_walk; default: goto fault_walk; /* reserved */ } + fault_fail: fi->gpcf = GPCF_Fail; goto fault_common; fault_eabt: @@ -573,12 +636,14 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, * From gdbstub, do not use softmmu so that we don't modify the * state of the cpu at all, including softmmu tlb contents. */ - ARMSecuritySpace s2_space = S2_security_space(ptw->in_space, s2_mmu_idx); + ARMSecuritySpace s2_space + = S2_security_space(ptw->cur_space, s2_mmu_idx); S1Translate s2ptw = { .in_mmu_idx = s2_mmu_idx, .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), .in_space = s2_space, .in_debug = true, + .in_prot_check = PAGE_READ, }; GetPhysAddrResult s2 = { }; @@ -615,7 +680,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, } if (regime_is_stage2(s2_mmu_idx)) { - uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->cur_space); if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { /* @@ -626,7 +691,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, fi->s2addr = addr; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, s2_mmu_idx); return false; } } @@ -642,7 +707,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, fi->s2addr = addr; fi->stage2 = regime_is_stage2(s2_mmu_idx); fi->s1ptw = fi->stage2; - fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, s2_mmu_idx); return false; } @@ -735,7 +800,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, uint64_t new_val, S1Translate *ptw, ARMMMUFaultInfo *fi) { -#if defined(TARGET_AARCH64) && defined(CONFIG_TCG) +#if defined(CONFIG_ATOMIC64) && defined(CONFIG_TCG) uint64_t cur_val; void *host = ptw->out_host; @@ -829,7 +894,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, fi->s2addr = ptw->out_virt; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = fault_s1ns(ptw->in_space, ptw->in_ptw_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, ptw->in_ptw_idx); return 0; } @@ -947,7 +1012,7 @@ static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot) { return ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, - regime_is_user(env, mmu_idx)); + regime_is_user(mmu_idx)); } /* @@ -973,7 +1038,7 @@ static int simple_ap_to_rw_prot_is_user(int ap, bool is_user) static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) { - return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(mmu_idx)); } static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, @@ -1006,7 +1071,7 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, } type = (desc & 3); domain = (desc >> 5) & 0x0f; - if (regime_el(env, ptw->in_mmu_idx) == 1) { + if (regime_el(ptw->in_mmu_idx) == 1) { dacr = env->cp15.dacr_ns; } else { dacr = env->cp15.dacr_s; @@ -1059,11 +1124,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, ap = (desc >> (4 + ((address >> 9) & 6))) & 3; result->f.lg_page_size = 12; break; - case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ + case 3: /* 1k page, or ARMv6 "extended small (4k) page" */ if (type == 1) { - /* ARMv6/XScale extended small page format */ - if (arm_feature(env, ARM_FEATURE_XSCALE) - || arm_feature(env, ARM_FEATURE_V6)) { + /* ARMv6 extended small page format */ + if (arm_feature(env, ARM_FEATURE_V6)) { phys_addr = (desc & 0xfffff000) | (address & 0xfff); result->f.lg_page_size = 12; } else { @@ -1087,7 +1151,7 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, } result->f.prot = ap_to_rw_prot(env, ptw->in_mmu_idx, ap, domain_prot); result->f.prot |= result->f.prot ? PAGE_EXEC : 0; - if (!(result->f.prot & (1 << access_type))) { + if (ptw->in_prot_check & ~result->f.prot) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; @@ -1146,7 +1210,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, /* Page or Section. */ domain = (desc >> 5) & 0x0f; } - if (regime_el(env, mmu_idx) == 1) { + if (regime_el(mmu_idx) == 1) { dacr = env->cp15.dacr_ns; } else { dacr = env->cp15.dacr_s; @@ -1210,7 +1274,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, g_assert_not_reached(); } } - out_space = ptw->in_space; + out_space = ptw->cur_space; if (ns) { /* * The NS bit will (as required by the architecture) have no effect if @@ -1240,8 +1304,8 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, } result->f.prot = get_S1prot(env, mmu_idx, false, user_rw, prot_rw, - xn, pxn, result->f.attrs.space, out_space); - if (!(result->f.prot & (1 << access_type))) { + xn, pxn, ptw->in_space, out_space); + if (ptw->in_prot_check & ~result->f.prot) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; @@ -1264,7 +1328,7 @@ do_fault: * @xn: XN (execute-never) bits * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot_noexecute(int s2ap) +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) { int prot = 0; @@ -1274,12 +1338,6 @@ static int get_S2prot_noexecute(int s2ap) if (s2ap & 2) { prot |= PAGE_WRITE; } - return prot; -} - -static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) -{ - int prot = get_S2prot_noexecute(s2ap); if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { switch (xn) { @@ -1311,6 +1369,44 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) return prot; } +static int get_S2prot_indirect(CPUARMState *env, GetPhysAddrResult *result, + int pi_index, int po_index, bool s1_is_el0) +{ + /* Last index is (priv, unpriv, ttw) */ + static const uint8_t perm_table[16][3] = { + /* 0 */ { 0, 0, 0 }, /* no access */ + /* 1 */ { 0, 0, 0 }, /* reserved */ + /* 2 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 3 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 4 */ { PAGE_WRITE, PAGE_WRITE, 0 }, + /* 5 */ { 0, 0, 0 }, /* reserved */ + /* 6 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 7 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 8 */ { PAGE_READ, PAGE_READ, PAGE_READ }, + /* 9 */ { PAGE_READ, PAGE_READ | PAGE_EXEC, PAGE_READ }, + /* A */ { PAGE_READ | PAGE_EXEC, PAGE_READ, PAGE_READ }, + /* B */ { PAGE_READ | PAGE_EXEC, PAGE_READ | PAGE_EXEC, PAGE_READ }, + /* C */ { PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE }, + /* D */ { PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE }, + /* E */ { PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE }, + /* F */ { PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE }, + }; + + uint64_t pir = (env->cp15.scr_el3 & SCR_PIEN ? env->cp15.s2pir_el2 : 0); + int s2pi = extract64(pir, pi_index * 4, 4); + + result->f.prot = perm_table[s2pi][2]; + return perm_table[s2pi][s1_is_el0]; +} + /* * Translate section/page access permissions to protection flags * @env: CPUARMState @@ -1328,7 +1424,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) { ARMCPU *cpu = env_archcpu(env); - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); bool have_wxn; int wxn = 0; @@ -1345,10 +1441,10 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, * We make the IMPDEF choices that SCR_EL3.SIF and Realm EL2&0 * do not affect EPAN. */ - if (user_rw && regime_is_pan(env, mmu_idx)) { + if (user_rw && regime_is_pan(mmu_idx)) { prot_rw = 0; } else if (cpu_isar_feature(aa64_pan3, cpu) && is_aa64 && - regime_is_pan(env, mmu_idx) && + regime_is_pan(mmu_idx) && (regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) { prot_rw = 0; } @@ -1405,7 +1501,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, xn = pxn || (user_rw & PAGE_WRITE); } } else if (arm_feature(env, ARM_FEATURE_V7)) { - switch (regime_el(env, mmu_idx)) { + switch (regime_el(mmu_idx)) { case 1: case 3: if (is_user) { @@ -1432,11 +1528,115 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, return prot_rw | PAGE_EXEC; } +/* Extra page permission bits, during get_S1prot_indirect only. */ +#define PAGE_GCS (1 << 3) +#define PAGE_WXN (1 << 4) +#define PAGE_OVERLAY (1 << 5) +QEMU_BUILD_BUG_ON(PAGE_RWX & (PAGE_GCS | PAGE_WXN | PAGE_OVERLAY)); + +static int get_S1prot_indirect(CPUARMState *env, S1Translate *ptw, + ARMMMUIdx mmu_idx, int pi_index, int po_index, + ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) +{ + static const uint8_t perm_table[16] = { + /* 0 */ PAGE_OVERLAY, /* no access */ + /* 1 */ PAGE_OVERLAY | PAGE_READ, + /* 2 */ PAGE_OVERLAY | PAGE_EXEC, + /* 3 */ PAGE_OVERLAY | PAGE_READ | PAGE_EXEC, + /* 4 */ PAGE_OVERLAY, /* reserved */ + /* 5 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE, + /* 6 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE | PAGE_EXEC | PAGE_WXN, + /* 7 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE | PAGE_EXEC, + /* 8 */ PAGE_READ, + /* 9 */ PAGE_READ | PAGE_GCS, + /* A */ PAGE_READ | PAGE_EXEC, + /* B */ 0, /* reserved */ + /* C */ PAGE_READ | PAGE_WRITE, + /* D */ 0, /* reserved */ + /* E */ PAGE_READ | PAGE_WRITE | PAGE_EXEC, + /* F */ 0, /* reserved */ + }; + + uint32_t el = regime_el(mmu_idx); + uint64_t pir = env->cp15.pir_el[el]; + uint64_t pire0 = 0; + int perm; + + if (el < 3) { + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_PIEN)) { + pir = 0; + } else if (el == 2) { + pire0 = env->cp15.pire0_el2; + } else if (!ptw->in_nv1) { + pire0 = env->cp15.pir_el[0]; + } + } + perm = perm_table[extract64(pir, pi_index * 4, 4)]; + + if (regime_has_2_ranges(mmu_idx)) { + int p_perm = perm; + int u_perm = perm_table[extract64(pire0, pi_index * 4, 4)]; + + if ((p_perm & (PAGE_EXEC | PAGE_GCS)) && + (u_perm & (PAGE_WRITE | PAGE_GCS))) { + p_perm &= ~(PAGE_RWX | PAGE_GCS); + u_perm &= ~(PAGE_RWX | PAGE_GCS); + } + if ((u_perm & (PAGE_RWX | PAGE_GCS)) && regime_is_pan(mmu_idx)) { + p_perm &= ~(PAGE_READ | PAGE_WRITE); + } + perm = regime_is_user(mmu_idx) ? u_perm : p_perm; + } + + if (in_pa != out_pa) { + switch (in_pa) { + case ARMSS_Root: + /* + * R_ZWRVD: permission fault for insn fetched from non-Root, + * I_WWBFB: SIF has no effect in EL3. + */ + perm &= ~(PAGE_EXEC | PAGE_GCS); + break; + case ARMSS_Realm: + /* + * R_PKTDS: permission fault for insn fetched from non-Realm, + * for Realm EL2 or EL2&0. The corresponding fault for EL1&0 + * happens during any stage2 translation. + */ + if (el == 2) { + perm &= ~(PAGE_EXEC | PAGE_GCS); + } + break; + case ARMSS_Secure: + if (env->cp15.scr_el3 & SCR_SIF) { + perm &= ~(PAGE_EXEC | PAGE_GCS); + } + break; + default: + /* Input NonSecure must have output NonSecure. */ + g_assert_not_reached(); + } + } + + if (regime_is_gcs(mmu_idx)) { + /* + * Note that the one s1perms.gcs bit controls both read and write + * access via AccessType_GCS. See AArch64.S1CheckPermissions. + */ + perm = (perm & PAGE_GCS ? PAGE_READ | PAGE_WRITE : 0); + } else if (perm & PAGE_WXN) { + perm &= ~PAGE_EXEC; + } + + return perm & PAGE_RWX; +} + static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, ARMMMUIdx mmu_idx) { uint64_t tcr = regime_tcr(env, mmu_idx); - uint32_t el = regime_el(env, mmu_idx); + uint32_t el = regime_el(mmu_idx); int select, tsz; bool epd, hpd; @@ -1457,8 +1657,12 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, } tsz = sextract32(tcr, 0, 4) + 8; select = 0; - hpd = false; epd = false; + /* + * Stage2 does not have hierarchical permissions. + * Thus disabling them makes things easier during ptw. + */ + hpd = true; } else if (el == 2) { /* HTCR */ tsz = extract32(tcr, 0, 3); @@ -1623,12 +1827,6 @@ static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds, } } -static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw) -{ - uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); - return (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1); -} - /** * get_phys_addr_lpae: perform one stage of page table walk, LPAE format * @@ -1658,13 +1856,13 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint64_t ttbr; hwaddr descaddr, indexmask, indexmask_grainsize; uint32_t tableattrs; - target_ulong page_size; + uint64_t page_size; uint64_t attrs; int32_t stride; int addrsize, inputsize, outputsize; uint64_t tcr = regime_tcr(env, mmu_idx); - int ap, xn, pxn; - uint32_t el = regime_el(env, mmu_idx); + int ap, prot; + uint32_t el = regime_el(mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); uint64_t descriptor, new_descriptor; @@ -1681,6 +1879,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, level = 0; /* + * Cache NV1 before we adjust ptw->in_space for NSTable. + * Note that this is only relevant for EL1&0, and that + * computing it would assert for ARMSS_Root. + */ + if (el == 1) { + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + ptw->in_nv1 = (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1); + } + + /* * If TxSZ is programmed to a value larger than the maximum, * or smaller than the effective minimum, it is IMPLEMENTATION * DEFINED whether we behave as if the field were programmed @@ -1701,7 +1909,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * ID_AA64MMFR0 is a read-only register so values outside of the * supported mappings can be considered an implementation error. */ - ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + ps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); ps = MIN(ps, param.ps); assert(ps < ARRAY_SIZE(pamax_map)); outputsize = pamax_map[ps]; @@ -1731,7 +1939,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * validation to do here. */ if (inputsize < addrsize) { - target_ulong top_bits = sextract64(address, inputsize, + uint64_t top_bits = sextract64(address, inputsize, addrsize - inputsize); if (-top_bits != param.select) { /* The gap between the two regions is a Translation fault */ @@ -1843,7 +2051,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * NonSecure. With RME, the EL3 translation regime does not change * from Root to NonSecure. */ - if (ptw->in_space == ARMSS_Secure + if (ptw->cur_space == ARMSS_Secure && !regime_is_stage2(mmu_idx) && extract32(tableattrs, 4, 1)) { /* @@ -1853,7 +2061,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS); QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2); ptw->in_ptw_idx += 1; - ptw->in_space = ARMSS_NonSecure; + ptw->cur_space = ARMSS_NonSecure; } if (!S1_ptw_translate(env, ptw, descaddr, fi)) { @@ -1920,7 +2128,12 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); - if (likely(!ptw->in_debug)) { + /* + * For AccessType_AT, DB is not updated (AArch64.SetDirtyFlag), + * and it is IMPLEMENTATION DEFINED whether AF is updated + * (AArch64.SetAccessFlag; qemu chooses to not update). + */ + if (likely(!ptw->in_at)) { /* * Access flag. * If HA is enabled, prepare to update the descriptor below. @@ -1959,21 +2172,31 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * except NSTable (which we have already handled). */ attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); - if (!regime_is_stage2(mmu_idx)) { - if (!param.hpd) { - attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ - /* - * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. - */ - attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ - } + if (!param.hpd) { + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ } ap = extract32(attrs, 6, 2); - out_space = ptw->in_space; + out_space = ptw->cur_space; if (regime_is_stage2(mmu_idx)) { + if (param.pie) { + int pi = extract64(attrs, 6, 1) + | (extract64(attrs, 51, 1) << 1) + | (extract64(attrs, 53, 2) << 2); + int po = extract64(attrs, 60, 3); + prot = get_S2prot_indirect(env, result, pi, po, ptw->in_s1_is_el0); + } else { + int xn = extract64(attrs, 53, 2); + prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + /* Install TTW permissions in f.prot. */ + result->f.prot = prot & (PAGE_READ | PAGE_WRITE); + } /* * R_GYNXY: For stage2 in Realm security state, bit 55 is NS. * The bit remains ignored for other security states. @@ -1982,11 +2205,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, */ if (out_space == ARMSS_Realm && extract64(attrs, 55, 1)) { out_space = ARMSS_NonSecure; - result->f.prot = get_S2prot_noexecute(ap); - } else { - xn = extract64(attrs, 53, 2); - result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + prot &= ~PAGE_EXEC; } + result->s2prot = prot; result->cacheattrs.is_s2_format = true; result->cacheattrs.attrs = extract32(attrs, 2, 4); @@ -2000,7 +2221,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, int nse, ns = extract32(attrs, 5, 1); uint8_t attrindx; uint64_t mair; - int user_rw, prot_rw; switch (out_space) { case ARMSS_Root: @@ -2049,37 +2269,57 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, default: g_assert_not_reached(); } - xn = extract64(attrs, 54, 1); - pxn = extract64(attrs, 53, 1); - if (el == 1 && nv_nv1_enabled(env, ptw)) { + if (param.pie) { + int pi = extract64(attrs, 6, 1) + | (extract64(attrs, 51, 1) << 1) + | (extract64(attrs, 53, 2) << 2); + int po = extract64(attrs, 60, 3); /* - * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, the block/page - * descriptor bit 54 holds PXN, 53 is RES0, and the effective value - * of UXN is 0. Similarly for bits 59 and 60 in table descriptors - * (which we have already folded into bits 53 and 54 of attrs). - * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0. - * Similarly, APTable[0] from the table descriptor is treated as 0; - * we already folded this into AP[1] and squashing that to 0 does - * the right thing. + * Note that we modified ptw->in_space earlier for NSTable, but + * result->f.attrs retains a copy of the original security space. */ - pxn = xn; - xn = 0; - ap &= ~1; - } + prot = get_S1prot_indirect(env, ptw, mmu_idx, pi, po, + result->f.attrs.space, out_space); + } else if (regime_is_gcs(mmu_idx)) { + /* + * While one must use indirect permissions to successfully + * use GCS instructions, AArch64.S1DirectBasePermissions + * faithfully supplies s1perms.gcs = 0, Just In Case. + */ + prot = 0; + } else { + int xn = extract64(attrs, 54, 1); + int pxn = extract64(attrs, 53, 1); + int user_rw, prot_rw; - user_rw = simple_ap_to_rw_prot_is_user(ap, true); - prot_rw = simple_ap_to_rw_prot_is_user(ap, false); - /* - * Note that we modified ptw->in_space earlier for NSTable, but - * result->f.attrs retains a copy of the original security space. - */ - result->f.prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw, - xn, pxn, result->f.attrs.space, out_space); + if (el == 1 && ptw->in_nv1) { + /* + * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, + * the block/page descriptor bit 54 holds PXN, + * 53 is RES0, and the effective value of UXN is 0. + * Similarly for bits 59 and 60 in table descriptors + * (which we have already folded into bits 53 and 54 of attrs). + * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0. + * Similarly, APTable[0] from the table descriptor is treated + * as 0; we already folded this into AP[1] and squashing + * that to 0 does the right thing. + */ + pxn = xn; + xn = 0; + ap &= ~1; + } + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); + prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw, + xn, pxn, ptw->in_space, out_space); + } + result->f.prot = prot; /* Index into MAIR registers for cache attributes */ attrindx = extract32(attrs, 2, 3); - mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + mair = env->cp15.mair_el[regime_el(mmu_idx)]; assert(attrindx <= 7); result->cacheattrs.is_s2_format = false; result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); @@ -2121,11 +2361,27 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.tlb_fill_flags = 0; } - if (!(result->f.prot & (1 << access_type))) { + if (ptw->in_prot_check & ~prot) { fi->type = ARMFault_Permission; goto do_fault; } + /* S1PIE and S2PIE both have a bit for software dirty page tracking. */ + if (access_type == MMU_DATA_STORE && param.pie) { + /* + * For S1PIE, bit 7 is nDirty and both HA and HD are checked. + * For S2PIE, bit 7 is Dirty and only HD is checked. + */ + bool bit7 = extract64(attrs, 7, 1); + if (regime_is_stage2(mmu_idx) + ? !bit7 && !param.hd + : bit7 && !(param.ha && param.hd)) { + fi->type = ARMFault_Permission; + fi->dirtybit = true; + goto do_fault; + } + } + /* If FEAT_HAFDBS has made changes, update the PTE. */ if (new_descriptor != descriptor) { new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi); @@ -2173,7 +2429,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, fi->level = level; fi->stage2 = regime_is_stage2(mmu_idx); } - fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, mmu_idx); return true; } @@ -2188,7 +2444,7 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t mask; uint32_t base; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { /* MPU disabled. */ @@ -2355,7 +2611,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, ARMCPU *cpu = env_archcpu(env); int n; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); bool secure = arm_space_is_secure(ptw->in_space); result->f.phys_addr = address; @@ -2535,13 +2791,13 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, fi->type = ARMFault_Permission; fi->level = 1; - return !(result->f.prot & (1 << access_type)); + return (ptw->in_prot_check & ~result->f.prot) != 0; } static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t secure) { - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { return env->pmsav8.hprbar; } else { return env->pmsav8.rbar[secure]; @@ -2551,7 +2807,7 @@ static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t secure) { - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { return env->pmsav8.hprlar; } else { return env->pmsav8.rlar[secure]; @@ -2559,8 +2815,9 @@ static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, } bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool secure, GetPhysAddrResult *result, + MMUAccessType access_type, unsigned prot_check, + ARMMMUIdx mmu_idx, bool secure, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi, uint32_t *mregion) { /* @@ -2574,7 +2831,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, * memory system to use a subpage. */ ARMCPU *cpu = env_archcpu(env); - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); int n; int matchregion = -1; bool hit = false; @@ -2582,7 +2839,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); int region_counter; - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { region_counter = cpu->pmsav8r_hdregion; } else { region_counter = cpu->pmsav7_dregion; @@ -2708,7 +2965,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 1; } - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { result->f.prot = simple_ap_to_rw_prot_is_user(ap, mmu_idx != ARMMMUIdx_E2); } else { @@ -2717,7 +2974,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, if (!arm_feature(env, ARM_FEATURE_M)) { uint8_t attrindx = extract32(matched_rlar, 1, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + uint64_t mair = env->cp15.mair_el[regime_el(mmu_idx)]; uint8_t sh = extract32(matched_rlar, 3, 2); if (regime_sctlr(env, mmu_idx) & SCTLR_WXN && @@ -2725,7 +2982,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 0x1; } - if ((regime_el(env, mmu_idx) == 1) && + if ((regime_el(mmu_idx) == 1) && regime_sctlr(env, mmu_idx) & SCTLR_UWXN && ap == 0x1) { pxn = 0x1; } @@ -2748,7 +3005,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, if (arm_feature(env, ARM_FEATURE_M)) { fi->level = 1; } - return !(result->f.prot & (1 << access_type)); + return (prot_check & ~result->f.prot) != 0; } static bool v8m_is_sau_exempt(CPUARMState *env, @@ -2950,8 +3207,8 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, } } - ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure, - result, fi, NULL); + ret = pmsav8_mpu_lookup(env, address, access_type, ptw->in_prot_check, + mmu_idx, secure, result, fi, NULL); if (sattrs.subpage) { result->f.lg_page_size = 0; } @@ -3210,7 +3467,7 @@ static bool get_phys_addr_disabled(CPUARMState *env, break; default: - r_el = regime_el(env, mmu_idx); + r_el = regime_el(mmu_idx); if (arm_el_is_aa64(env, r_el)) { int pamax = arm_pamax(env_archcpu(env)); uint64_t tcr = env->cp15.tcr_el[r_el]; @@ -3318,7 +3575,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ - result->f.prot &= s1_prot; + result->f.prot = s1_prot & result->s2prot; /* If S2 fails, return early. */ if (ret) { @@ -3370,9 +3627,9 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, */ if (in_space == ARMSS_Secure) { result->f.attrs.secure = - !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW)) + !(env->cp15.vstcr_el2 & (R_VSTCR_SA_MASK | R_VSTCR_SW_MASK)) && (ipa_secure - || !(env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW))); + || !(env->cp15.vtcr_el2 & (R_VTCR_NSA_MASK | R_VTCR_NSW_MASK))); result->f.attrs.space = arm_secure_to_space(result->f.attrs.secure); } @@ -3393,6 +3650,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, * cannot upgrade a NonSecure translation regime's attributes * to Secure or Realm. */ + ptw->cur_space = ptw->in_space; result->f.attrs.space = ptw->in_space; result->f.attrs.secure = arm_space_is_secure(ptw->in_space); @@ -3454,7 +3712,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, break; } - result->f.attrs.user = regime_is_user(env, mmu_idx); + result->f.attrs.user = regime_is_user(mmu_idx); /* * Fast Context Switch Extension. This doesn't exist at all in v8. @@ -3462,7 +3720,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, */ if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 && !arm_feature(env, ARM_FEATURE_V8)) { - if (regime_el(env, mmu_idx) == 3) { + if (regime_el(mmu_idx) == 3) { address += env->cp15.fcseidr_s; } else { address += env->cp15.fcseidr_ns; @@ -3528,47 +3786,58 @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, return true; } if (!granule_protection_check(env, result->f.phys_addr, - result->f.attrs.space, fi)) { + result->f.attrs.space, ptw->in_space, fi)) { fi->type = ARMFault_GPCFOnOutput; return true; } return false; } -bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, - MMUAccessType access_type, MemOp memop, - ARMMMUIdx mmu_idx, ARMSecuritySpace space, - GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) +bool get_phys_addr_for_at(CPUARMState *env, vaddr address, + unsigned prot_check, ARMMMUIdx mmu_idx, + ARMSecuritySpace space, GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) { S1Translate ptw = { .in_mmu_idx = mmu_idx, .in_space = space, + .in_at = true, + .in_prot_check = prot_check, }; - return get_phys_addr_nogpc(env, &ptw, address, access_type, - memop, result, fi); + /* + * I_MXTJT: Granule protection checks are not performed on the final + * address of a successful translation. This is a translation not a + * memory reference, so MMU_DATA_LOAD is arbitrary (the exact protection + * check is handled or bypassed by .in_prot_check) and "memop = MO_8" + * bypasses any alignment check. + */ + return get_phys_addr_nogpc(env, &ptw, address, + MMU_DATA_LOAD, MO_8, result, fi); } -bool get_phys_addr(CPUARMState *env, vaddr address, - MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +static ARMSecuritySpace +arm_mmu_idx_to_security_space(CPUARMState *env, ARMMMUIdx mmu_idx) { - S1Translate ptw = { - .in_mmu_idx = mmu_idx, - }; ARMSecuritySpace ss; switch (mmu_idx) { case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_0_GCS: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E10_1_GCS: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_0_GCS: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E20_2_GCS: case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E0_GCS: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_Stage1_E1_GCS: case ARMMMUIdx_E2: + case ARMMMUIdx_E2_GCS: ss = arm_security_space_below_el3(env); break; case ARMMMUIdx_Stage2: @@ -3597,6 +3866,7 @@ bool get_phys_addr(CPUARMState *env, vaddr address, ss = ARMSS_Secure; break; case ARMMMUIdx_E3: + case ARMMMUIdx_E3_GCS: case ARMMMUIdx_E30_0: case ARMMMUIdx_E30_3_PAN: if (arm_feature(env, ARM_FEATURE_AARCH64) && @@ -3616,28 +3886,36 @@ bool get_phys_addr(CPUARMState *env, vaddr address, g_assert_not_reached(); } - ptw.in_space = ss; + return ss; +} + +bool get_phys_addr(CPUARMState *env, vaddr address, + MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), + .in_prot_check = 1 << access_type, + }; + return get_phys_addr_gpc(env, &ptw, address, access_type, memop, result, fi); } -hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, - MemTxAttrs *attrs) +static hwaddr arm_cpu_get_phys_page(CPUARMState *env, vaddr addr, + MemTxAttrs *attrs, ARMMMUIdx mmu_idx) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - ARMMMUIdx mmu_idx = arm_mmu_idx(env); - ARMSecuritySpace ss = arm_security_space(env); S1Translate ptw = { .in_mmu_idx = mmu_idx, - .in_space = ss, + .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), .in_debug = true, + .in_at = true, + .in_prot_check = 0, }; GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - bool ret; - - ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, 0, &res, &fi); + bool ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, 0, &res, &fi); *attrs = res.f.attrs; if (ret) { @@ -3645,3 +3923,33 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, } return res.f.phys_addr; } + +hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, + MemTxAttrs *attrs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + + hwaddr res = arm_cpu_get_phys_page(env, addr, attrs, mmu_idx); + + if (res != -1) { + return res; + } + + /* + * Memory may be accessible for an "unprivileged load/store" variant. + * In this case, get_a64_user_mem_index function generates an op using an + * unprivileged mmu idx, so we need to try with it. + */ + switch (mmu_idx) { + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E10_0); + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E20_0); + default: + return -1; + } +} diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index 3244e07..bff61f0 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -63,6 +63,7 @@ enum arm_exception_class { EC_MOP = 0x27, EC_AA32_FPTRAP = 0x28, EC_AA64_FPTRAP = 0x2c, + EC_GCS = 0x2d, EC_SERROR = 0x2f, EC_BREAKPOINT = 0x30, EC_BREAKPOINT_SAME_EL = 0x31, @@ -80,8 +81,26 @@ typedef enum { SME_ET_Streaming, SME_ET_NotStreaming, SME_ET_InactiveZA, + SME_ET_InaccessibleZT0, } SMEExceptionType; +typedef enum { + GCS_ET_DataCheck, + GCS_ET_EXLOCK, + GCS_ET_GCSSTR_GCSSTTR, +} GCSExceptionType; + +typedef enum { + GCS_IT_RET_nPauth = 0, + GCS_IT_GCSPOPM = 1, + GCS_IT_RET_PauthA = 2, + GCS_IT_RET_PauthB = 3, + GCS_IT_GCSSS1 = 4, + GCS_IT_GCSSS2 = 5, + GCS_IT_GCSPOPCX = 8, + GCS_IT_GCSPOPX = 9, +} GCSInstructionType; + #define ARM_EL_EC_LENGTH 6 #define ARM_EL_EC_SHIFT 26 #define ARM_EL_IL_SHIFT 25 @@ -350,6 +369,23 @@ static inline uint32_t syn_pcalignment(void) return (EC_PCALIGNMENT << ARM_EL_EC_SHIFT) | ARM_EL_IL; } +static inline uint32_t syn_gcs_data_check(GCSInstructionType it, int rn) +{ + return ((EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (GCS_ET_DataCheck << 20) | (rn << 5) | it); +} + +static inline uint32_t syn_gcs_exlock(void) +{ + return (EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | (GCS_ET_EXLOCK << 20); +} + +static inline uint32_t syn_gcs_gcsstr(int ra, int rn) +{ + return ((EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (GCS_ET_GCSSTR_GCSSTTR << 20) | (ra << 10) | (rn << 5)); +} + static inline uint32_t syn_serror(uint32_t extra) { return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra; diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c index 93a15ca..aeeede8 100644 --- a/target/arm/tcg-stubs.c +++ b/target/arm/tcg-stubs.c @@ -16,20 +16,11 @@ void write_v7m_exception(CPUARMState *env, uint32_t new_exc) g_assert_not_reached(); } -void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, +void raise_exception_ra(CPUARMState *env, uint32_t excp, uint64_t syndrome, uint32_t target_el, uintptr_t ra) { g_assert_not_reached(); } -/* Temporarily while cpu_get_tb_cpu_state() is still in common code */ -void assert_hflags_rebuild_correctly(CPUARMState *env) -{ -} - -/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */ -void define_tlb_insn_regs(ARMCPU *cpu) -{ -} /* With KVM, we never use float_status, so these can be no-ops */ void arm_set_default_fp_behaviours(float_status *s) diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 8c798cd..01b1b3e 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -156,6 +156,16 @@ MOVZ . 10 100101 .. ................ ..... @movw_32 MOVK . 11 100101 .. ................ ..... @movw_64 MOVK . 11 100101 .. ................ ..... @movw_32 +# Min/Max (immediate) + +@minmaxi_s sf:1 .. ........... imm:s8 rn:5 rd:5 &rri_sf +@minmaxi_u sf:1 .. ........... imm:8 rn:5 rd:5 &rri_sf + +SMAX_i . 00 1000111 0000 ........ ..... ..... @minmaxi_s +SMIN_i . 00 1000111 0010 ........ ..... ..... @minmaxi_s +UMAX_i . 00 1000111 0001 ........ ..... ..... @minmaxi_u +UMIN_i . 00 1000111 0011 ........ ..... ..... @minmaxi_u + # Bitfield &bitfield rd rn sf immr imms @@ -238,6 +248,7 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111 AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111 ESB 1101 0101 0000 0011 0010 0010 000 11111 + GCSB 1101 0101 0000 0011 0010 0010 011 11111 PACIAZ 1101 0101 0000 0011 0010 0011 000 11111 PACIASP 1101 0101 0000 0011 0010 0011 001 11111 PACIBZ 1101 0101 0000 0011 0010 0011 010 11111 @@ -246,6 +257,7 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB AUTIASP 1101 0101 0000 0011 0010 0011 101 11111 AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111 AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111 + CHKFEAT 1101 0101 0000 0011 0010 0101 000 11111 ] # The canonical NOP has CRm == op2 == 0, but all of the space # that isn't specifically allocated to an instruction must NOP @@ -536,6 +548,13 @@ SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 +# Atomic 128-bit memory operations +&atomic128 rn rt rt2 a r +@atomic128 ........ a:1 r:1 . rt2:5 ...... rn:5 rt:5 &atomic128 +LDCLRP 00011001 . . 1 ..... 000100 ..... ..... @atomic128 +LDSETP 00011001 . . 1 ..... 001100 ..... ..... @atomic128 +SWPP 00011001 . . 1 ..... 100000 ..... ..... @atomic128 + # Load/store register (pointer authentication) # LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous @@ -553,6 +572,9 @@ LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 +# GCSSTR, GCSSTTR +GCSSTR 11011001 000 11111 000 unpriv:1 11 rn:5 rt:5 + # Load/store multiple structures # The 4-bit opcode in [15:12] encodes repeat count and structure elements &ldst_mult rm rn rt sz q p rpt selem @@ -698,6 +720,11 @@ GMI 1 00 11010110 ..... 000101 ..... ..... @rrr PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr +SMAX . 00 11010110 ..... 011000 ..... ..... @rrr_sf +SMIN . 00 11010110 ..... 011010 ..... ..... @rrr_sf +UMAX . 00 11010110 ..... 011001 ..... ..... @rrr_sf +UMIN . 00 11010110 ..... 011011 ..... ..... @rrr_sf + # Data Processing (1-source) @rr . .......... ..... ...... rn:5 rd:5 &rr @@ -711,6 +738,10 @@ REV64 1 10 11010110 00000 000011 ..... ..... @rr CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf CLS . 10 11010110 00000 000101 ..... ..... @rr_sf +CTZ . 10 11010110 00000 000110 ..... ..... @rr_sf +CNT . 10 11010110 00000 000111 ..... ..... @rr_sf +ABS . 10 11010110 00000 001000 ..... ..... @rr_sf + &pacaut rd rn z @pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c index 9a555c7..6701398 100644 --- a/target/arm/tcg/arith_helper.c +++ b/target/arm/tcg/arith_helper.c @@ -6,11 +6,12 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" -#include "cpu.h" -#include "exec/helper-proto.h" #include "qemu/crc32c.h" #include <zlib.h> /* for crc32 */ +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + /* * Note that signed overflow is undefined in C. The following routines are * careful to use unsigned types where modulo arithmetic is required. diff --git a/target/arm/tcg/cpregs-at.c b/target/arm/tcg/cpregs-at.c new file mode 100644 index 0000000..0e8f229 --- /dev/null +++ b/target/arm/tcg/cpregs-at.c @@ -0,0 +1,556 @@ +/* + * System instructions for address translation + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu-features.h" +#include "internals.h" +#include "cpregs.h" + + +static int par_el1_shareability(GetPhysAddrResult *res) +{ + /* + * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC + * memory -- see pseudocode PAREncodeShareability(). + */ + if (((res->cacheattrs.attrs & 0xf0) == 0) || + res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { + return 2; + } + return res->cacheattrs.shareability; +} + +static uint64_t do_ats_write(CPUARMState *env, uint64_t value, + unsigned prot_check, ARMMMUIdx mmu_idx, + ARMSecuritySpace ss) +{ + uint64_t par64; + bool format64 = false; + ARMMMUFaultInfo fi = {}; + GetPhysAddrResult res = {}; + bool ret = get_phys_addr_for_at(env, value, prot_check, + mmu_idx, ss, &res, &fi); + + /* + * ATS operations only do S1 or S1+S2 translations, so we never + * have to deal with the ARMCacheAttrs format for S2 only. + */ + assert(!res.cacheattrs.is_s2_format); + + if (ret) { + /* + * Some kinds of translation fault must cause exceptions rather + * than being reported in the PAR. + */ + int current_el = arm_current_el(env); + int target_el; + uint32_t syn, fsr, fsc; + bool take_exc = false; + + if (fi.s1ptw && current_el == 1 + && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { + /* + * Synchronous stage 2 fault on an access made as part of the + * translation table walk for AT S1E0* or AT S1E1* insn + * executed from NS EL1. If this is a synchronous external abort + * and SCR_EL3.EA == 1, then we take a synchronous external abort + * to EL3. Otherwise the fault is taken as an exception to EL2, + * and HPFAR_EL2 holds the faulting IPA. + */ + if (fi.type == ARMFault_SyncExternalOnWalk && + (env->cp15.scr_el3 & SCR_EA)) { + target_el = 3; + } else { + env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4; + if (arm_is_secure_below_el3(env) && fi.s1ns) { + env->cp15.hpfar_el2 |= HPFAR_NS; + } + target_el = 2; + } + take_exc = true; + } else if (fi.type == ARMFault_SyncExternalOnWalk) { + /* + * Synchronous external aborts during a translation table walk + * are taken as Data Abort exceptions. + */ + if (fi.stage2) { + if (current_el == 3) { + target_el = 3; + } else { + target_el = 2; + } + } else { + target_el = exception_target_el(env); + } + take_exc = true; + } + + if (take_exc) { + /* Construct FSR and FSC using same logic as arm_deliver_fault() */ + if (target_el == 2 || arm_el_is_aa64(env, target_el) || + arm_s1_regime_using_lpae_format(env, mmu_idx)) { + fsr = arm_fi_to_lfsc(&fi); + fsc = extract32(fsr, 0, 6); + } else { + fsr = arm_fi_to_sfsc(&fi); + fsc = 0x3f; + } + /* + * Report exception with ESR indicating a fault due to a + * translation table walk for a cache maintenance instruction. + */ + syn = syn_data_abort_no_iss(current_el == target_el, 0, + fi.ea, 1, fi.s1ptw, 1, fsc); + env->exception.vaddress = value; + env->exception.fsr = fsr; + raise_exception(env, EXCP_DATA_ABORT, syn, target_el); + } + } + + if (is_a64(env)) { + format64 = true; + } else if (arm_feature(env, ARM_FEATURE_LPAE)) { + /* + * ATS1Cxx: + * * TTBCR.EAE determines whether the result is returned using the + * 32-bit or the 64-bit PAR format + * * Instructions executed in Hyp mode always use the 64bit format + * + * ATS1S2NSOxx uses the 64bit format if any of the following is true: + * * The Non-secure TTBCR.EAE bit is set to 1 + * * The implementation includes EL2, and the value of HCR.VM is 1 + * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * + * ATS1Hx always uses the 64bit format. + */ + format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); + + if (arm_feature(env, ARM_FEATURE_EL2)) { + if (mmu_idx == ARMMMUIdx_E10_0 || + mmu_idx == ARMMMUIdx_E10_1 || + mmu_idx == ARMMMUIdx_E10_1_PAN) { + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); + } else { + format64 |= arm_current_el(env) == 2; + } + } + } + + if (format64) { + /* Create a 64-bit PAR */ + par64 = (1 << 11); /* LPAE bit always set */ + if (!ret) { + par64 |= res.f.phys_addr & ~0xfffULL; + if (!res.f.attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ + par64 |= par_el1_shareability(&res) << 7; /* SH */ + } else { + uint32_t fsr = arm_fi_to_lfsc(&fi); + + par64 |= 1; /* F */ + par64 |= (fsr & 0x3f) << 1; /* FS */ + if (fi.stage2) { + par64 |= (1 << 9); /* S */ + } + if (fi.s1ptw) { + par64 |= (1 << 8); /* PTW */ + } + } + } else { + /* + * fsr is a DFSR/IFSR value for the short descriptor + * translation table format (with WnR always clear). + * Convert it to a 32-bit PAR. + */ + if (!ret) { + /* We do not set any attribute bits in the PAR */ + if (res.f.lg_page_size == 24 + && arm_feature(env, ARM_FEATURE_V7)) { + par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); + } else { + par64 = res.f.phys_addr & 0xfffff000; + } + if (!res.f.attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + } else { + uint32_t fsr = arm_fi_to_sfsc(&fi); + + par64 = ((fsr & (1 << 10)) >> 5) | ((fsr & (1 << 12)) >> 6) | + ((fsr & 0xf) << 1) | 1; + } + } + return par64; +} + +static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + uint64_t par64; + ARMMMUIdx mmu_idx; + int el = arm_current_el(env); + ARMSecuritySpace ss = arm_security_space(env); + + switch (ri->opc2 & 6) { + case 0: + /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ + switch (el) { + case 3: + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_E30_3_PAN; + } else { + mmu_idx = ARMMMUIdx_E3; + } + break; + case 2: + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ + /* fall through */ + case 1: + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_Stage1_E1_PAN; + } else { + mmu_idx = ARMMMUIdx_Stage1_E1; + } + break; + default: + g_assert_not_reached(); + } + break; + case 2: + /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_E30_0; + break; + case 2: + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ + mmu_idx = ARMMMUIdx_Stage1_E0; + break; + case 1: + mmu_idx = ARMMMUIdx_Stage1_E0; + break; + default: + g_assert_not_reached(); + } + break; + case 4: + /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ + mmu_idx = ARMMMUIdx_E10_1; + ss = ARMSS_NonSecure; + break; + case 6: + /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ + mmu_idx = ARMMMUIdx_E10_0; + ss = ARMSS_NonSecure; + break; + default: + g_assert_not_reached(); + } + + par64 = do_ats_write(env, value, access_perm, mmu_idx, ss); + + A32_BANKED_CURRENT_REG_SET(env, par, par64); +} + +static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + uint64_t par64; + + /* There is no SecureEL2 for AArch32. */ + par64 = do_ats_write(env, value, access_perm, ARMMMUIdx_E2, + ARMSS_NonSecure); + + A32_BANKED_CURRENT_REG_SET(env, par, par64); +} + +static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* + * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level + * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can + * only happen when executing at EL3 because that combination also causes an + * illegal exception return. We don't need to check FEAT_RME either, because + * scr_write() ensures that the NSE bit is not set otherwise. + */ + if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { + return CP_ACCESS_UNDEFINED; + } + return CP_ACCESS_OK; +} + +static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 3 && + !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { + return CP_ACCESS_UNDEFINED; + } + return at_e012_access(env, ri, isread); +} + +static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) { + return CP_ACCESS_TRAP_EL2; + } + return at_e012_access(env, ri, isread); +} + +static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + ARMMMUIdx mmu_idx; + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); + bool for_el3 = false; + ARMSecuritySpace ss; + + switch (ri->opc2 & 6) { + case 0: + switch (ri->opc1) { + case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = regime_e20 ? + ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN; + } else { + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; + } + break; + case 4: /* AT S1E2R, AT S1E2W */ + mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; + break; + case 6: /* AT S1E3R, AT S1E3W */ + mmu_idx = ARMMMUIdx_E3; + for_el3 = true; + break; + default: + g_assert_not_reached(); + } + break; + case 2: /* AT S1E0R, AT S1E0W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0; + break; + case 4: /* AT S12E1R, AT S12E1W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1; + break; + case 6: /* AT S12E0R, AT S12E0W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0; + break; + default: + g_assert_not_reached(); + } + + ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env); + env->cp15.par_el[1] = do_ats_write(env, value, access_perm, mmu_idx, ss); +} + +static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (ri->opc2 & 4) { + /* + * The ATS12NSO* operations must trap to EL3 or EL2 if executed in + * Secure EL1 (which can only happen if EL3 is AArch64). + * They are simply UNDEF if executed from NS EL1. + * They function normally from EL2 or EL3. + */ + if (arm_current_el(env) == 1) { + if (arm_is_secure_below_el3(env)) { + if (env->cp15.scr_el3 & SCR_EEL2) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_UNDEFINED; + } + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo vapa_ats_reginfo[] = { + /* This underdecoding is safe because the reginfo is NO_RAW. */ + { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_W, .accessfn = ats_access, + .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, +}; + +static const ARMCPRegInfo v8_ats_reginfo[] = { + /* 64 bit address translation operations */ + { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E0R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E0W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ + { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write64 }, + { .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write64 }, +}; + +static const ARMCPRegInfo el2_ats_reginfo[] = { + /* + * Unlike the other EL2-related AT operations, these must + * UNDEF from EL3 if EL2 is not implemented, which is why we + * define them here rather than with the rest of the AT ops. + */ + { .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL2_W, .accessfn = at_s1e2_access, + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, + { .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL2_W, .accessfn = at_s1e2_access, + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, + /* + * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE + * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 + * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose + * to behave as if SCR.NS was 1. + */ + { .name = "ATS1HR", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL2_W, + .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, + { .name = "ATS1HW", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL2_W, + .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, +}; + +static const ARMCPRegInfo ats1e1_reginfo[] = { + { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1RP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1WP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, +}; + +static const ARMCPRegInfo ats1cp_reginfo[] = { + { .name = "ATS1CPRP", + .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write }, + { .name = "ATS1CPWP", + .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write }, +}; + +static void ats_s1e1a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); + ARMMMUIdx mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; + ARMSecuritySpace ss = arm_security_space_below_el3(env); + + env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss); +} + +static void ats_s1e2a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + ARMMMUIdx mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; + ARMSecuritySpace ss = arm_security_space_below_el3(env); + + env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss); +} + +static void ats_s1e3a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + env->cp15.par_el[1] = do_ats_write(env, value, 0, ARMMMUIdx_E3, + arm_security_space(env)); +} + +static const ARMCPRegInfo ats1a_reginfo[] = { + { .name = "AT_S1E1A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1A, + .accessfn = at_s1e01_access, .writefn = ats_s1e1a }, + { .name = "AT_S1E2A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_s1e2_access, .writefn = ats_s1e2a }, + { .name = "AT_S1E3A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_s1e3a }, +}; + +void define_at_insn_regs(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_VAPA)) { + define_arm_cp_regs(cpu, vapa_ats_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V8)) { + define_arm_cp_regs(cpu, v8_ats_reginfo); + } + if (arm_feature(env, ARM_FEATURE_EL2) + || (arm_feature(env, ARM_FEATURE_EL3) + && arm_feature(env, ARM_FEATURE_V8))) { + define_arm_cp_regs(cpu, el2_ats_reginfo); + } + if (cpu_isar_feature(aa64_ats1e1, cpu)) { + define_arm_cp_regs(cpu, ats1e1_reginfo); + } + if (cpu_isar_feature(aa32_ats1e1, cpu)) { + define_arm_cp_regs(cpu, ats1cp_reginfo); + } + if (cpu_isar_feature(aa64_ats1a, cpu)) { + define_arm_cp_regs(cpu, ats1a_reginfo); + } +} diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c index c4dd309..dc249ce 100644 --- a/target/arm/tcg/cpu-v7m.c +++ b/target/arm/tcg/cpu-v7m.c @@ -45,6 +45,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V6); set_feature(&cpu->env, ARM_FEATURE_M); @@ -58,51 +59,53 @@ static void cortex_m0_initfn(Object *obj) * by looking at ID register fields. We use the same values as * for the M3. */ - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m3_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); set_feature(&cpu->env, ARM_FEATURE_M_MAIN); cpu->midr = 0x410fc231; cpu->pmsav7_dregion = 8; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m4_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -113,26 +116,27 @@ static void cortex_m4_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000000; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -143,26 +147,27 @@ static void cortex_m7_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00100030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02112000; - cpu->isar.id_isar2 = 0x20232231; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00100030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02112000); + SET_IDREG(isar, ID_ISAR2, 0x20232231); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m33_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_M); @@ -175,28 +180,29 @@ static void cortex_m33_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00101F40; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000210); + SET_IDREG(isar, ID_DFR0, 0x00200000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00101F40); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); + SET_IDREG(isar, CLIDR, 0x00000000); cpu->ctr = 0x8000c000; } static void cortex_m55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_V8_1M); @@ -212,39 +218,47 @@ static void cortex_m55_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12100211; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x20000030; - cpu->isar.id_pfr1 = 0x00000230; - cpu->isar.id_dfr0 = 0x10200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00111040; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000011; - cpu->isar.id_isar0 = 0x01103110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; /* caches not implemented */ + SET_IDREG(isar, ID_PFR0, 0x20000030); + SET_IDREG(isar, ID_PFR1, 0x00000230); + SET_IDREG(isar, ID_DFR0, 0x10200000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00111040); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000011); + SET_IDREG(isar, ID_ISAR0, 0x01103110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); + SET_IDREG(isar, CLIDR, 0x00000000); /* caches not implemented */ cpu->ctr = 0x8303c003; } static const TCGCPUOps arm_v7m_tcg_ops = { + /* ARM processors have a weak memory model */ + .guest_default_memory_order = 0, + .mttcg_supported = true, + .initialize = arm_translate_init, .translate_code = arm_translate_code, + .get_tb_cpu_state = arm_get_tb_cpu_state, .synchronize_from_tb = arm_cpu_synchronize_from_tb, .debug_excp_handler = arm_debug_excp_handler, .restore_state_to_opc = arm_restore_state_to_opc, + .mmu_index = arm_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = arm_cpu_record_sigsegv, .record_sigbus = arm_cpu_record_sigbus, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, + .cpu_exec_reset = cpu_reset, .do_interrupt = arm_v7m_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, @@ -254,14 +268,13 @@ static const TCGCPUOps arm_v7m_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void arm_v7m_class_init(ObjectClass *oc, void *data) +static void arm_v7m_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); acc->info = data; cc->tcg_ops = &arm_v7m_tcg_ops; - cc->gdb_core_xml_file = "arm-m-profile.xml"; } static const ARMCPUInfo arm_v7m_cpus[] = { diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index 2c45b7e..f076141 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -23,18 +23,19 @@ void aa32_max_features(ARMCPU *cpu) { uint32_t t; + ARMISARegisters *isar = &cpu->isar; /* Add additional features supported by QEMU */ - t = cpu->isar.id_isar5; + t = GET_IDREG(isar, ID_ISAR5); t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */ t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */ t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */ t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */ - cpu->isar.id_isar5 = t; + SET_IDREG(isar, ID_ISAR5, t); - t = cpu->isar.id_isar6; + t = GET_IDREG(isar, ID_ISAR6); t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */ t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */ t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */ @@ -42,7 +43,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */ t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */ t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */ - cpu->isar.id_isar6 = t; + SET_IDREG(isar, ID_ISAR6, t); t = cpu->isar.mvfr1; t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */ @@ -54,38 +55,34 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ cpu->isar.mvfr2 = t; - t = cpu->isar.id_mmfr3; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - cpu->isar.id_mmfr3 = t; + FIELD_DP32_IDREG(isar, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - t = cpu->isar.id_mmfr4; + t = GET_IDREG(isar, ID_MMFR4); t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */ t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */ - cpu->isar.id_mmfr4 = t; + SET_IDREG(isar, ID_MMFR4, t); - t = cpu->isar.id_mmfr5; - t = FIELD_DP32(t, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - cpu->isar.id_mmfr5 = t; + FIELD_DP32_IDREG(isar, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - t = cpu->isar.id_pfr0; + t = GET_IDREG(isar, ID_PFR0); t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CSV2 */ t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */ - cpu->isar.id_pfr0 = t; + SET_IDREG(isar, ID_PFR0, t); - t = cpu->isar.id_pfr2; + t = GET_IDREG(isar, ID_PFR2); t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */ t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */ - cpu->isar.id_pfr2 = t; + SET_IDREG(isar, ID_PFR2, t); - t = cpu->isar.id_dfr0; + t = GET_IDREG(isar, ID_DFR0); t = FIELD_DP32(t, ID_DFR0, COPDBG, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP32(t, ID_DFR0, COPSDBG, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ - cpu->isar.id_dfr0 = t; + SET_IDREG(isar, ID_DFR0, t); /* Debug ID registers. */ @@ -115,9 +112,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, DBGDEVID1, PCSROFFSET, 2); cpu->isar.dbgdevid1 = t; - t = cpu->isar.id_dfr1; - t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_dfr1 = t; + FIELD_DP32_IDREG(isar, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ } /* CPU models. These are not needed for the AArch64 linux-user build. */ @@ -140,7 +135,7 @@ static void arm926_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -182,7 +177,7 @@ static void arm1026_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -206,6 +201,7 @@ static void arm1026_initfn(Object *obj) static void arm1136_r2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an * older core than plain "arm1136". In particular this does not @@ -226,24 +222,25 @@ static void arm1136_r2_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); + SET_IDREG(isar, ID_AFR0, 0x3); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1136_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1136"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -257,24 +254,25 @@ static void arm1136_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); + SET_IDREG(isar, ID_AFR0, 0x3); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1176_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1176"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -289,24 +287,25 @@ static void arm1176_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x33; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222100; - cpu->isar.id_isar0 = 0x0140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231121; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x01141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x33); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222100); + SET_IDREG(isar, ID_ISAR0, 0x0140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231121); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x01141); cpu->reset_auxcr = 7; } static void arm11mpcore_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm11mpcore"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -318,18 +317,18 @@ static void arm11mpcore_initfn(Object *obj) cpu->isar.mvfr0 = 0x11111111; cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0; - cpu->id_afr0 = 0x2; - cpu->isar.id_mmfr0 = 0x01100103; - cpu->isar.id_mmfr1 = 0x10020302; - cpu->isar.id_mmfr2 = 0x01222000; - cpu->isar.id_isar0 = 0x00100011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11221011; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0); + SET_IDREG(isar, ID_AFR0, 0x2); + SET_IDREG(isar, ID_MMFR0, 0x01100103); + SET_IDREG(isar, ID_MMFR1, 0x10020302); + SET_IDREG(isar, ID_MMFR2, 0x01222000); + SET_IDREG(isar, ID_ISAR0, 0x00100011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11221011); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 1; } @@ -343,6 +342,7 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = { static void cortex_a8_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a8"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -357,21 +357,21 @@ static void cortex_a8_initfn(Object *obj) cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x400; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x31100003; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01202000; - cpu->isar.id_mmfr3 = 0x11; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x12112111; - cpu->isar.id_isar2 = 0x21232031; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x400); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x31100003); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01202000); + SET_IDREG(isar, ID_MMFR3, 0x11); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x12112111); + SET_IDREG(isar, ID_ISAR2, 0x21232031); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x15141000; - cpu->clidr = (1 << 27) | (2 << 24) | 3; + SET_IDREG(isar, CLIDR, (1 << 27) | (2 << 24) | 3); cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */ @@ -412,6 +412,7 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = { static void cortex_a9_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a9"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -432,21 +433,21 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x000; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x00100103; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01230000; - cpu->isar.id_mmfr3 = 0x00002111; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x000); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x00100103); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01230000); + SET_IDREG(isar, ID_MMFR3, 0x00002111); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x35141000; - cpu->clidr = (1 << 27) | (1 << 24) | 3; + SET_IDREG(isar, CLIDR, (1 << 27) | (1 << 24) | 3); cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ cpu->isar.reset_pmcr_el0 = 0x41093000; @@ -479,6 +480,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a7"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -497,27 +499,27 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); /* * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f005; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x1; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ @@ -528,6 +530,7 @@ static void cortex_a7_initfn(Object *obj) static void cortex_a15_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a15"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -548,23 +551,23 @@ static void cortex_a15_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f021; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x0; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ @@ -585,27 +588,28 @@ static const ARMCPRegInfo cortexr5_cp_reginfo[] = { static void cortex_r5_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x411fc153; /* r1p3 */ - cpu->isar.id_pfr0 = 0x0131; - cpu->isar.id_pfr1 = 0x001; - cpu->isar.id_dfr0 = 0x010400; - cpu->id_afr0 = 0x0; - cpu->isar.id_mmfr0 = 0x0210030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0x0211; - cpu->isar.id_isar0 = 0x02101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232141; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x0010142; - cpu->isar.id_isar5 = 0x0; - cpu->isar.id_isar6 = 0x0; + SET_IDREG(isar, ID_PFR0, 0x0131); + SET_IDREG(isar, ID_PFR1, 0x001); + SET_IDREG(isar, ID_DFR0, 0x010400); + SET_IDREG(isar, ID_AFR0, 0x0); + SET_IDREG(isar, ID_MMFR0, 0x0210030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0x0211); + SET_IDREG(isar, ID_ISAR0, 0x02101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232141); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x0010142); + SET_IDREG(isar, ID_ISAR5, 0x0); + SET_IDREG(isar, ID_ISAR6, 0x0); cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; cpu->isar.reset_pmcr_el0 = 0x41151800; @@ -720,6 +724,7 @@ static const ARMCPRegInfo cortex_r52_cp_reginfo[] = { static void cortex_r52_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -737,23 +742,23 @@ static void cortex_r52_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8144c004; cpu->reset_sctlr = 0x30c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x10111001; - cpu->isar.id_dfr0 = 0x03010006; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00211040; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0xf0102211; - cpu->isar.id_mmfr4 = 0x00000010; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232142; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x00010001; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x10111001); + SET_IDREG(isar, ID_DFR0, 0x03010006); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00211040); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0xf0102211); + SET_IDREG(isar, ID_MMFR4, 0x00000010); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232142); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x00010001); cpu->isar.dbgdidr = 0x77168000; - cpu->clidr = (1 << 27) | (1 << 24) | 0x3; + SET_IDREG(isar, CLIDR, (1 << 27) | (1 << 24) | 0x3); cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ @@ -802,144 +807,6 @@ static void sa1110_initfn(Object *obj) cpu->reset_sctlr = 0x00000070; } -static void pxa250_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052100; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa255_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d00; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa260_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052903; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa261_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d05; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa262_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d06; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054110; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054111; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054112; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054113; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054114; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c5_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054117; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - #ifndef TARGET_AARCH64 /* * -cpu max: a CPU with as many features enabled as our emulation supports. @@ -949,6 +816,7 @@ static void pxa270c5_initfn(Object *obj) static void arm_max_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* aarch64_a57_initfn, advertising none of the aarch64 features */ cpu->dtb_compatible = "arm,cortex-a57"; @@ -968,23 +836,23 @@ static void arm_max_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); cpu->isar.reset_pmcr_el0 = 0x41013000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ @@ -1026,31 +894,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "ti925t", .initfn = ti925t_initfn }, { .name = "sa1100", .initfn = sa1100_initfn }, { .name = "sa1110", .initfn = sa1110_initfn }, - { .name = "pxa250", .initfn = pxa250_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa255", .initfn = pxa255_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa260", .initfn = pxa260_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa261", .initfn = pxa261_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa262", .initfn = pxa262_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - /* "pxa270" is an alias for "pxa270-a0" */ - { .name = "pxa270", .initfn = pxa270a0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-a0", .initfn = pxa270a0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-a1", .initfn = pxa270a1_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-b0", .initfn = pxa270b0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-b1", .initfn = pxa270b1_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 29ab0ac..1bffe66 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -32,6 +32,7 @@ static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a35"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -48,29 +49,29 @@ static void aarch64_a35_initfn(Object *obj) cpu->midr = 0x411fd040; cpu->revidr = 0; cpu->ctr = 0x84448004; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64pfr1 = 0; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64dfr1 = 0; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64isar1 = 0; - cpu->isar.id_aa64mmfr0 = 0x00101122; - cpu->isar.id_aa64mmfr1 = 0; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64PFR1, 0); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64ISAR1, 0); + SET_IDREG(isar, ID_AA64MMFR0, 0x00101122); + SET_IDREG(isar, ID_AA64MMFR1, 0); + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->dcz_blocksize = 4; /* From B2.4 AArch64 Virtual Memory control registers */ @@ -157,11 +158,9 @@ static bool cpu_arm_get_rme(Object *obj, Error **errp) static void cpu_arm_set_rme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, RME, value); - cpu->isar.id_aa64pfr0 = t; + /* Enable FEAT_RME_GPC2 */ + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, RME, value ? 2 : 0); } static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name, @@ -204,6 +203,7 @@ static const Property arm_cpu_lpa2_property = static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a55"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -217,34 +217,34 @@ static void aarch64_a55_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x0000000010112222ull; - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x0000000010112222ull); + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x412FD050; /* r2p0 */ cpu->revidr = 0; @@ -276,6 +276,7 @@ static void aarch64_a55_initfn(Object *obj) static void aarch64_a72_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a72"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -295,29 +296,29 @@ static void aarch64_a72_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41023000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 48KB L1 dcache */ @@ -335,6 +336,7 @@ static void aarch64_a72_initfn(Object *obj) static void aarch64_a76_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a76"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -348,34 +350,34 @@ static void aarch64_a76_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x8444C004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0b1; /* r4p1 */ cpu->revidr = 0; @@ -405,9 +407,83 @@ static void aarch64_a76_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x410b3000; } +static void aarch64_a78ae_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; + + cpu->dtb_compatible = "arm,cortex-a78ae"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by 3.2.4 AArch64 registers by functional group */ + SET_IDREG(isar, CLIDR, 0x82000023); + cpu->ctr = 0x9444c004; + cpu->dcz_blocksize = 4; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0010100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000001200031ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000100001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); + cpu->midr = 0x410fd423; /* r0p3 */ + cpu->revidr = 0; + + /* From 3.2.33 CCSIDR_EL1 */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7); + /* 64KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2); + /* 512KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 512 * KiB, 7); + + /* From 3.2.118 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From 3.4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + /* From 3.4.8 ICC_CTLR_EL3 */ + cpu->gic_pribits = 5; + + /* From 3.5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From 5.5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x41223000; +} + static void aarch64_a64fx_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,a64fx"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -422,19 +498,19 @@ static void aarch64_a64fx_initfn(Object *obj) cpu->revidr = 0x00000000; cpu->ctr = 0x86668006; cpu->reset_sctlr = 0x30000180; - cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */ - cpu->isar.id_aa64pfr1 = 0x0000000000000000; - cpu->isar.id_aa64dfr0 = 0x0000000010305408; - cpu->isar.id_aa64dfr1 = 0x0000000000000000; - cpu->id_aa64afr0 = 0x0000000000000000; - cpu->id_aa64afr1 = 0x0000000000000000; - cpu->isar.id_aa64mmfr0 = 0x0000000000001122; - cpu->isar.id_aa64mmfr1 = 0x0000000011212100; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011; - cpu->isar.id_aa64isar0 = 0x0000000010211120; - cpu->isar.id_aa64isar1 = 0x0000000000010001; - cpu->isar.id_aa64zfr0 = 0x0000000000000000; - cpu->clidr = 0x0000000080000023; + SET_IDREG(isar, ID_AA64PFR0, 0x0000000101111111); /* No RAS Extensions */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408); + SET_IDREG(isar, ID_AA64DFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64AFR0, 0x0000000000000000); + SET_IDREG(isar, ID_AA64AFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000001122); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000011212100); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000000010211120); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000010001); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000000000000000); + SET_IDREG(isar, CLIDR, 0x0000000080000023); /* 64KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 7); /* 64KB L1 icache */ @@ -581,6 +657,7 @@ static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) static void aarch64_neoverse_n1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -594,34 +671,34 @@ static void aarch64_neoverse_n1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x8444c004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000110305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0c1; /* r4p1 */ cpu->revidr = 0; @@ -656,6 +733,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) static void aarch64_neoverse_v1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-v1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -669,37 +747,37 @@ static void aarch64_neoverse_v1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by 3.2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0xb444c004; /* With DIC and IDC set */ cpu->dcz_blocksize = 4; - cpu->id_aa64afr0 = 0x00000000; - cpu->id_aa64afr1 = 0x00000000; - cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; - cpu->isar.id_aa64dfr1 = 0x00000000; - cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0011100001211032ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; - cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x15011099; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; - cpu->isar.id_isar6 = 0x01100111; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64AFR0, 0x00000000); + SET_IDREG(isar, ID_AA64AFR1, 0x00000000); + SET_IDREG(isar, ID_AA64DFR0, 0x000001f210305519ull); + SET_IDREG(isar, ID_AA64DFR1, 0x00000000); + SET_IDREG(isar, ID_AA64ISAR0, 0x1011111110212120ull); /* with FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011000001211032ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0220011102101011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1101110120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x15011099); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); + SET_IDREG(isar, ID_ISAR6, 0x01100111); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x411FD402; /* r1p2 */ cpu->revidr = 0; @@ -735,7 +813,7 @@ static void aarch64_neoverse_v1_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; /* From 3.7.5 ID_AA64ZFR0_EL1 */ - cpu->isar.id_aa64zfr0 = 0x0000100000100000; + SET_IDREG(isar, ID_AA64ZFR0, 0x0000100000100000); cpu->sve_vq.supported = (1 << 0) /* 128bit */ | (1 << 1); /* 256bit */ @@ -882,6 +960,7 @@ static const ARMCPRegInfo cortex_a710_cp_reginfo[] = { static void aarch64_a710_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a710"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -897,39 +976,39 @@ static void aarch64_a710_initfn(Object *obj) /* Ordered by Section B.4: AArch64 registers */ cpu->midr = 0x412FD471; /* r2p1 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x21021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x21021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f010305619ull; - cpu->isar.id_aa64dfr1 = 0; - cpu->id_aa64afr0 = 0; - cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */ - cpu->isar.id_aa64isar1 = 0x0010111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull; - cpu->clidr = 0x0000001482000023ull; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f010305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64AFR0, 0); + SET_IDREG(isar, ID_AA64AFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x0221111110212120ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0010111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011110101011ull); + SET_IDREG(isar, CLIDR, 0x0000001482000023ull); cpu->gm_blocksize = 4; cpu->ctr = 0x000000049444c004ull; cpu->dcz_blocksize = 4; @@ -983,6 +1062,7 @@ static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = { static void aarch64_neoverse_n2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n2"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -998,39 +1078,39 @@ static void aarch64_neoverse_n2_initfn(Object *obj) /* Ordered by Section B.5: AArch64 ID registers */ cpu->midr = 0x410FD493; /* r0p3 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f210305619ull; - cpu->isar.id_aa64dfr1 = 0; - cpu->id_aa64afr0 = 0; - cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0011111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull; - cpu->clidr = 0x0000001482000023ull; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f210305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64AFR0, 0); + SET_IDREG(isar, ID_AA64AFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x1221111110212120ull); /* with Crypto and FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011112101011ull); + SET_IDREG(isar, CLIDR, 0x0000001482000023ull); cpu->gm_blocksize = 4; cpu->ctr = 0x00000004b444c004ull; cpu->dcz_blocksize = 4; @@ -1083,6 +1163,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) void aarch64_max_tcg_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; uint64_t t; uint32_t u; @@ -1118,10 +1199,10 @@ void aarch64_max_tcg_initfn(Object *obj) * We're going to set FEAT_S2FWB, which mandates that CLIDR_EL1.{LoUU,LoUIS} * are zero. */ - u = cpu->clidr; + u = GET_IDREG(isar, CLIDR); u = FIELD_DP32(u, CLIDR_EL1, LOUIS, 0); u = FIELD_DP32(u, CLIDR_EL1, LOUU, 0); - cpu->clidr = u; + SET_IDREG(isar, CLIDR, u); /* * Set CTR_EL0.DIC and IDC to tell the guest it doesnt' need to @@ -1133,12 +1214,12 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, CTR_EL0, DIC, 1); cpu->ctr = t; - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */ t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); /* FEAT_CRC32 */ - t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); /* FEAT_LSE */ + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 3); /* FEAT_LSE, FEAT_LSE128 */ t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); /* FEAT_RDM */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); /* FEAT_SHA3 */ t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); /* FEAT_SM3 */ @@ -1148,9 +1229,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */ t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */ t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */ - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */ t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED); t = FIELD_DP64(t, ID_AA64ISAR1, API, 1); @@ -1164,16 +1245,18 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */ t = FIELD_DP64(t, ID_AA64ISAR1, XS, 1); /* FEAT_XS */ - cpu->isar.id_aa64isar1 = t; + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64isar2; + t = GET_IDREG(isar, ID_AA64ISAR2); t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ - cpu->isar.id_aa64isar2 = t; + t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 1); /* FEAT_CSSC */ + t = FIELD_DP64(t, ID_AA64ISAR2, ATS1A, 1); /* FEAT_ATS1A */ + SET_IDREG(isar, ID_AA64ISAR2, t); - t = cpu->isar.id_aa64pfr0; + t = GET_IDREG(isar, ID_AA64PFR0); t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */ @@ -1182,9 +1265,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */ - cpu->isar.id_aa64pfr0 = t; + SET_IDREG(isar, ID_AA64PFR0, t); - t = cpu->isar.id_aa64pfr1; + t = GET_IDREG(isar, ID_AA64PFR1); t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */ t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */ /* @@ -1194,12 +1277,13 @@ void aarch64_max_tcg_initfn(Object *obj) */ t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ - t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64PFR1, SME, 2); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */ - cpu->isar.id_aa64pfr1 = t; + t = FIELD_DP64(t, ID_AA64PFR1, GCS, 1); /* FEAT_GCS */ + SET_IDREG(isar, ID_AA64PFR1, t); - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */ @@ -1207,9 +1291,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */ t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */ t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(isar, ID_AA64MMFR0, t); - t = cpu->isar.id_aa64mmfr1; + t = GET_IDREG(isar, ID_AA64MMFR1); t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ @@ -1222,9 +1306,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ - cpu->isar.id_aa64mmfr1 = t; + SET_IDREG(isar, ID_AA64MMFR1, t); - t = cpu->isar.id_aa64mmfr2; + t = GET_IDREG(isar, ID_AA64MMFR2); t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */ t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */ @@ -1238,39 +1322,50 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ - cpu->isar.id_aa64mmfr2 = t; + SET_IDREG(isar, ID_AA64MMFR2, t); - t = cpu->isar.id_aa64mmfr3; + t = GET_IDREG(isar, ID_AA64MMFR3); + t = FIELD_DP64(t, ID_AA64MMFR3, TCRX, 1); /* FEAT_TCR2 */ + t = FIELD_DP64(t, ID_AA64MMFR3, SCTLRX, 1); /* FEAT_SCTLR2 */ + t = FIELD_DP64(t, ID_AA64MMFR3, MEC, 1); /* FEAT_MEC */ t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */ - cpu->isar.id_aa64mmfr3 = t; + t = FIELD_DP64(t, ID_AA64MMFR3, S1PIE, 1); /* FEAT_S1PIE */ + t = FIELD_DP64(t, ID_AA64MMFR3, S2PIE, 1); /* FEAT_S2PIE */ + SET_IDREG(isar, ID_AA64MMFR3, t); - t = cpu->isar.id_aa64zfr0; - t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); + t = GET_IDREG(isar, ID_AA64ZFR0); + t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 2); /* FEAT_SVE2p1 */ t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */ t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */ t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */ + t = FIELD_DP64(t, ID_AA64ZFR0, B16B16, 1); /* FEAT_SVE_B16B16 */ t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */ t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */ t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */ - cpu->isar.id_aa64zfr0 = t; + SET_IDREG(isar, ID_AA64ZFR0, t); - t = cpu->isar.id_aa64dfr0; + t = GET_IDREG(isar, ID_AA64DFR0); t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_aa64dfr0 = t; + SET_IDREG(isar, ID_AA64DFR0, t); - t = cpu->isar.id_aa64smfr0; + t = GET_IDREG(isar, ID_AA64SMFR0); t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, BI32I32, 1); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, F16F16, 1); /* FEAT_SME_F16F16 */ + t = FIELD_DP64(t, ID_AA64SMFR0, B16B16, 1); /* FEAT_SME_B16B16 */ + t = FIELD_DP64(t, ID_AA64SMFR0, I16I32, 5); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */ + t = FIELD_DP64(t, ID_AA64SMFR0, SMEVER, 2); /* FEAT_SME2p1 */ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */ - cpu->isar.id_aa64smfr0 = t; + SET_IDREG(isar, ID_AA64SMFR0, t); /* Replicate the same data to the 32-bit id registers. */ aa32_max_features(cpu); @@ -1304,6 +1399,11 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + /* + * The Cortex-A78AE differs slightly from the plain Cortex-A78. We don't + * currently model the latter. + */ + { .name = "cortex-a78ae", .initfn = aarch64_a78ae_initfn }, { .name = "cortex-a710", .initfn = aarch64_a710_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, @@ -1316,7 +1416,7 @@ static void aarch64_cpu_register_types(void) size_t i; for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { - aarch64_cpu_register(&aarch64_cpus[i]); + arm_cpu_register(&aarch64_cpus[i]); } } diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c index 7cadd61..3428bd1 100644 --- a/target/arm/tcg/crypto_helper.c +++ b/target/arm/tcg/crypto_helper.c @@ -10,14 +10,16 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" -#include "cpu.h" -#include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "crypto/aes-round.h" #include "crypto/sm4.h" #include "vec_internal.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + union CRYPTO_STATE { uint8_t bytes[16]; uint32_t words[4]; diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c index 2617cde..2429cab 100644 --- a/target/arm/tcg/gengvec64.c +++ b/target/arm/tcg/gengvec64.c @@ -369,3 +369,14 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } + +void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, + gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, + }; + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 9244848..ba1d775 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -29,8 +29,11 @@ #include "internals.h" #include "qemu/crc32c.h" #include "exec/cpu-common.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "fpu/softfloat.h" @@ -399,6 +402,8 @@ AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) +AH_MINMAX_HELPER(sme2_ah_fmax_b16, bfloat16, bfloat16, max) +AH_MINMAX_HELPER(sme2_ah_fmin_b16, bfloat16, bfloat16, min) /* 64-bit versions of the CRC helpers. Note that although the operation * (and the prototypes of crc32c() and crc32() mean that only the bottom @@ -571,6 +576,7 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) return ret; } +#ifndef CONFIG_USER_ONLY static int el_from_spsr(uint32_t spsr) { /* Return the exception level that this SPSR is requesting a return to, @@ -609,32 +615,12 @@ static int el_from_spsr(uint32_t spsr) } } -static void cpsr_write_from_spsr_elx(CPUARMState *env, - uint32_t val) -{ - uint32_t mask; - - /* Save SPSR_ELx.SS into PSTATE. */ - env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); - val &= ~PSTATE_SS; - - /* Move DIT to the correct location for CPSR */ - if (val & PSTATE_DIT) { - val &= ~PSTATE_DIT; - val |= CPSR_DIT; - } - - mask = aarch32_cpsr_valid_mask(env->features, \ - &env_archcpu(env)->isar); - cpsr_write(env, val, mask, CPSRWriteRaw); -} - void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) { ARMCPU *cpu = env_archcpu(env); int cur_el = arm_current_el(env); unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); - uint32_t spsr = env->banked_spsr[spsr_idx]; + uint64_t spsr = env->banked_spsr[spsr_idx]; int new_el; bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; @@ -653,15 +639,6 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) spsr &= ~PSTATE_SS; } - /* - * FEAT_RME forbids return from EL3 with an invalid security state. - * We don't need an explicit check for FEAT_RME here because we enforce - * in scr_write() that you can't set the NSE bit without it. - */ - if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { - goto illegal_return; - } - new_el = el_from_spsr(spsr); if (new_el == -1) { goto illegal_return; @@ -673,6 +650,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + /* + * FEAT_RME forbids return from EL3 to a lower exception level + * with an invalid security state. + * We don't need an explicit check for FEAT_RME here because we enforce + * in scr_write() that you can't set the NSE bit without it. + */ + if (cur_el == 3 && new_el < 3 && + (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { + goto illegal_return; + } + if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { /* Return to an EL which is configured for a different register width */ goto illegal_return; @@ -687,6 +675,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + /* + * If GetCurrentEXLOCKEN, the exception return path must use GCSPOPCX, + * which will set PSTATE.EXLOCK. We need not explicitly check FEAT_GCS, + * because GCSCR_ELx cannot be set without it. + */ + if (new_el == cur_el && + (env->cp15.gcscr_el[cur_el] & GCSCR_EXLOCKEN) && + !(env->pstate & PSTATE_EXLOCK)) { + goto illegal_return; + } + bql_lock(); arm_call_pre_el_change_hook(cpu); bql_unlock(); @@ -780,6 +779,7 @@ illegal_return: qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); } +#endif /* !CONFIG_USER_ONLY */ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) { @@ -1147,7 +1147,6 @@ static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) @@ -1547,7 +1546,6 @@ static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 8502346..b6008b5 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -80,7 +80,6 @@ DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) -DEF_HELPER_2(exception_return, void, env, i64) DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64) @@ -145,3 +144,7 @@ DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32 DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +#ifndef CONFIG_USER_ONLY +DEF_HELPER_2(exception_return, void, env, i64) +#endif diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h index 858d691..c551797 100644 --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -33,101 +33,147 @@ DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_7(sme_fmopa_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_bfmopa_w, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sme_fmops_w_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_fmops_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_fmops_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_fmops_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_bfmops_w, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_bfmops, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sme_ah_fmops_w_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_bfmops_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_ah_bfmops, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG, @@ -144,3 +190,168 @@ DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sme2_bmopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme2_smopa2_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme2_umopa2_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(sme2_fdot_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_fdot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_fvdot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_suvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_usvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_smlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_usmlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_smlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_sumlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_scvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_ucvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_3(sme2_sqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_zip2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_uzp2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_zip4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_uzp4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_sclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_uclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_fclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_fclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_fclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_bfclamp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sme2_sel_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index 0b1b588..c3541a8 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -676,11 +676,21 @@ DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) @@ -701,12 +711,22 @@ DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -937,10 +957,17 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) +DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32) DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32) DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_while2l, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_while2g, TCG_CALL_NO_RWG, i32, ptr, i32, i32) + +DEF_HELPER_FLAGS_3(sve_whilecl, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_whilecg, TCG_CALL_NO_RWG, i32, ptr, i32, i32) + DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(sve_subri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) @@ -1071,6 +1098,55 @@ DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fminqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, i64, i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, @@ -1120,6 +1196,8 @@ DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fadd_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, @@ -1127,6 +1205,8 @@ DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fsub_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, @@ -1134,6 +1214,8 @@ DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmul_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG, @@ -1148,6 +1230,8 @@ DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, @@ -1155,6 +1239,8 @@ DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, @@ -1162,6 +1248,8 @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, @@ -1169,6 +1257,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, @@ -1176,6 +1266,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fminnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, @@ -1183,6 +1275,8 @@ DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmaxnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG, @@ -1447,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1454,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1461,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1468,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1475,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1482,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1489,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1547,945 +1655,1015 @@ DEF_HELPER_FLAGS_4(sve2_usubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_usubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_usubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -2922,3 +3100,69 @@ DEF_HELPER_FLAGS_4(sve2_sqshlu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_addqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_sminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_uminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pext, TCG_CALL_NO_RWG, void, ptr, i32, i32) + +DEF_HELPER_FLAGS_4(sve2p1_orqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_eorqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_andqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pmov_pv_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_pv_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_pv_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pmov_vp_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_vp_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_vp_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ld1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) + +DEF_HELPER_FLAGS_5(sve2p1_st1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h new file mode 100644 index 0000000..4636d1b --- /dev/null +++ b/target/arm/tcg/helper.h @@ -0,0 +1,1123 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) + +DEF_HELPER_3(add_setq, i32, env, i32, i32) +DEF_HELPER_3(add_saturate, i32, env, i32, i32) +DEF_HELPER_3(sub_saturate, i32, env, i32, i32) +DEF_HELPER_3(add_usaturate, i32, env, i32, i32) +DEF_HELPER_3(sub_usaturate, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32) +DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32) + +#define PAS_OP(pfx) \ + DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr) + +PAS_OP(s) +PAS_OP(u) +#undef PAS_OP + +#define PAS_OP(pfx) \ + DEF_HELPER_2(pfx ## add8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## add16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \ + DEF_HELPER_2(pfx ## subaddx, i32, i32, i32) +PAS_OP(q) +PAS_OP(sh) +PAS_OP(uq) +PAS_OP(uh) +#undef PAS_OP + +DEF_HELPER_3(ssat, i32, env, i32, i32) +DEF_HELPER_3(usat, i32, env, i32, i32) +DEF_HELPER_3(ssat16, i32, env, i32, i32) +DEF_HELPER_3(usat16, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) + +DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, + i32, i32, i32, i32) +DEF_HELPER_2(exception_internal, noreturn, env, i32) +DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32) +DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32) +DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32) +DEF_HELPER_2(exception_swstep, noreturn, env, i32) +DEF_HELPER_2(exception_pc_alignment, noreturn, env, vaddr) +DEF_HELPER_1(setend, void, env) +DEF_HELPER_2(wfi, void, env, i32) +DEF_HELPER_1(wfe, void, env) +DEF_HELPER_2(wfit, void, env, i64) +DEF_HELPER_1(yield, void, env) +DEF_HELPER_1(pre_hvc, void, env) +DEF_HELPER_2(pre_smc, void, env, i32) +DEF_HELPER_1(vesb, void, env) + +DEF_HELPER_3(cpsr_write, void, env, i32, i32) +DEF_HELPER_2(cpsr_write_eret, void, env, i32) +DEF_HELPER_1(cpsr_read, i32, env) + +DEF_HELPER_3(v7m_msr, void, env, i32, i32) +DEF_HELPER_2(v7m_mrs, i32, env, i32) + +DEF_HELPER_2(v7m_bxns, void, env, i32) +DEF_HELPER_2(v7m_blxns, void, env, i32) + +DEF_HELPER_3(v7m_tt, i32, env, i32, i32) + +DEF_HELPER_1(v7m_preserve_fp_state, void, env) + +DEF_HELPER_2(v7m_vlstm, void, env, i32) +DEF_HELPER_2(v7m_vlldm, void, env, i32) + +DEF_HELPER_2(v8m_stackcheck, void, env, i32) + +DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) + +DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) +DEF_HELPER_2(get_cp_reg, i32, env, cptr) +DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) +DEF_HELPER_2(get_cp_reg64, i64, env, cptr) + +DEF_HELPER_2(get_r13_banked, i32, env, i32) +DEF_HELPER_3(set_r13_banked, void, env, i32, i32) + +DEF_HELPER_3(mrs_banked, i32, env, i32, i32) +DEF_HELPER_4(msr_banked, void, env, i32, i32, i32) + +DEF_HELPER_2(get_user_reg, i32, env, i32) +DEF_HELPER_3(set_user_reg, void, env, i32, i32) + +DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) + +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, vaddr, i32, i32, i32) + +DEF_HELPER_1(vfp_get_fpscr, i32, env) +DEF_HELPER_2(vfp_set_fpscr, void, env, i32) + +DEF_HELPER_3(vfp_addh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_adds, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_addd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_subh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_subs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_subd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_mulh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_muls, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_muld, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_divh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_divs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_divd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_maxh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_maxs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_maxd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_minh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_mins, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_mind, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_maxnums, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_minnumh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_minnums, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_minnumd, f64, f64, f64, fpst) +DEF_HELPER_2(vfp_sqrth, f16, f16, fpst) +DEF_HELPER_2(vfp_sqrts, f32, f32, fpst) +DEF_HELPER_2(vfp_sqrtd, f64, f64, fpst) +DEF_HELPER_3(vfp_cmph, void, f16, f16, env) +DEF_HELPER_3(vfp_cmps, void, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) +DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env) +DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) +DEF_HELPER_3(vfp_cmped, void, f64, f64, env) + +DEF_HELPER_2(vfp_fcvtds, f64, f32, fpst) +DEF_HELPER_2(vfp_fcvtsd, f32, f64, fpst) +DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, fpst) +DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, fpst) + +DEF_HELPER_2(vfp_uitoh, f16, i32, fpst) +DEF_HELPER_2(vfp_uitos, f32, i32, fpst) +DEF_HELPER_2(vfp_uitod, f64, i32, fpst) +DEF_HELPER_2(vfp_sitoh, f16, i32, fpst) +DEF_HELPER_2(vfp_sitos, f32, i32, fpst) +DEF_HELPER_2(vfp_sitod, f64, i32, fpst) + +DEF_HELPER_2(vfp_touih, i32, f16, fpst) +DEF_HELPER_2(vfp_touis, i32, f32, fpst) +DEF_HELPER_2(vfp_touid, i32, f64, fpst) +DEF_HELPER_2(vfp_touizh, i32, f16, fpst) +DEF_HELPER_2(vfp_touizs, i32, f32, fpst) +DEF_HELPER_2(vfp_touizd, i32, f64, fpst) +DEF_HELPER_2(vfp_tosih, s32, f16, fpst) +DEF_HELPER_2(vfp_tosis, s32, f32, fpst) +DEF_HELPER_2(vfp_tosid, s32, f64, fpst) +DEF_HELPER_2(vfp_tosizh, s32, f16, fpst) +DEF_HELPER_2(vfp_tosizs, s32, f32, fpst) +DEF_HELPER_2(vfp_tosizd, s32, f64, fpst) + +DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toshh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toulh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toslh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_touqh, i64, f16, i32, fpst) +DEF_HELPER_3(vfp_tosqh, i64, f16, i32, fpst) +DEF_HELPER_3(vfp_toshs, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosls, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosqs, i64, f32, i32, fpst) +DEF_HELPER_3(vfp_touhs, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touls, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touqs, i64, f32, i32, fpst) +DEF_HELPER_3(vfp_toshd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosld, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosqd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tould, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touqd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_shtos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sltos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sqtos, f32, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_ultos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_uqtos, f32, i64, i32, fpst) +DEF_HELPER_3(vfp_shtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sltod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sqtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_ultod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uqtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_shtoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sltoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_ultoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, fpst) +DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, fpst) + +DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, fpst) + +DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, fpst) + +DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, fpst, i32) + +DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, fpst) +DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, fpst) +DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) + +DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64) + +DEF_HELPER_3(shl_cc, i32, env, i32, i32) +DEF_HELPER_3(shr_cc, i32, env, i32, i32) +DEF_HELPER_3(sar_cc, i32, env, i32, i32) +DEF_HELPER_3(ror_cc, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, fpst) + +DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env) +DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, fpst) + +DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) + +/* neon_helper.c */ +DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) + +DEF_HELPER_2(neon_shl_u16, i32, i32, i32) +DEF_HELPER_2(neon_shl_s16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) +DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) +DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) +DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_2(neon_add_u8, i32, i32, i32) +DEF_HELPER_2(neon_add_u16, i32, i32, i32) +DEF_HELPER_2(neon_sub_u8, i32, i32, i32) +DEF_HELPER_2(neon_sub_u16, i32, i32, i32) +DEF_HELPER_2(neon_mul_u8, i32, i32, i32) +DEF_HELPER_2(neon_mul_u16, i32, i32, i32) + +DEF_HELPER_2(neon_tst_u8, i32, i32, i32) +DEF_HELPER_2(neon_tst_u16, i32, i32, i32) +DEF_HELPER_2(neon_tst_u32, i32, i32, i32) + +DEF_HELPER_1(neon_clz_u8, i32, i32) +DEF_HELPER_1(neon_clz_u16, i32, i32) +DEF_HELPER_1(neon_cls_s8, i32, i32) +DEF_HELPER_1(neon_cls_s16, i32, i32) +DEF_HELPER_1(neon_cls_s32, i32, i32) +DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) +DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32) +DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32) +DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) +DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32) +DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32) + +DEF_HELPER_1(neon_narrow_u8, i64, i64) +DEF_HELPER_1(neon_narrow_u16, i64, i64) +DEF_HELPER_2(neon_unarrow_sat8, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u8, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s8, i64, env, i64) +DEF_HELPER_2(neon_unarrow_sat16, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u16, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s16, i64, env, i64) +DEF_HELPER_2(neon_unarrow_sat32, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u32, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s32, i64, env, i64) +DEF_HELPER_1(neon_narrow_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_high_u16, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64) +DEF_HELPER_1(neon_widen_u8, i64, i32) +DEF_HELPER_1(neon_widen_s8, i64, i32) +DEF_HELPER_1(neon_widen_u16, i64, i32) +DEF_HELPER_1(neon_widen_s16, i64, i32) + +DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) +DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u64, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s64, i64, i32, i32) +DEF_HELPER_2(neon_mull_u8, i64, i32, i32) +DEF_HELPER_2(neon_mull_s8, i64, i32, i32) +DEF_HELPER_2(neon_mull_u16, i64, i32, i32) +DEF_HELPER_2(neon_mull_s16, i64, i32, i32) + +DEF_HELPER_1(neon_negl_u16, i64, i64) +DEF_HELPER_1(neon_negl_u32, i64, i64) + +DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) + +DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_cge_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acge_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst) +DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst) + +DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) + +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) +DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) + +DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_idx_4b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_4b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_idx_4h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_4h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sudot_idx_4b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usdot_idx_4b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_idx_2h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_2h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fadd_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fsub_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmul_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmla_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmul_idx_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmla_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmls_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmls_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, fpst) + +DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_bfvdot_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlsl, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlsl_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_4b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index 8d79b8b..5c9b9be 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -9,9 +9,13 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/helper-proto.h" +#include "exec/translation-block.h" +#include "accel/tcg/cpu-ops.h" #include "cpregs.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + static inline bool fgt_svc(CPUARMState *env, int el) { /* @@ -210,6 +214,31 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } +/* + * Return the exception level to which exceptions should be taken for ZT0. + * C.f. the ARM pseudocode function CheckSMEZT0Enabled, after the ZA check. + */ +static int zt0_exception_el(CPUARMState *env, int el) +{ +#ifndef CONFIG_USER_ONLY + if (el <= 1 + && !el_is_in_host(env, el) + && !FIELD_EX64(env->vfp.smcr_el[1], SMCR, EZT0)) { + return 1; + } + if (el <= 2 + && arm_is_el2_enabled(env) + && !FIELD_EX64(env->vfp.smcr_el[2], SMCR, EZT0)) { + return 2; + } + if (arm_feature(env, ARM_FEATURE_EL3) + && !FIELD_EX64(env->vfp.smcr_el[3], SMCR, EZT0)) { + return 3; + } +#endif + return 0; +} + static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, ARMMMUIdx mmu_idx) { @@ -229,6 +258,11 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, TBII, tbii); DP_TBFLAG_A64(flags, TBID, tbid); + /* E2H is used by both VHE and NV2. */ + if (hcr & HCR_E2H) { + DP_TBFLAG_A64(flags, E2H, 1); + } + if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { int sve_el = sve_exception_el(env, el); @@ -265,7 +299,14 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, PSTATE_SM, 1); DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el)); } - DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA)); + + if (FIELD_EX64(env->svcr, SVCR, ZA)) { + DP_TBFLAG_A64(flags, PSTATE_ZA, 1); + if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) { + int zt0_el = zt0_exception_el(env, el); + DP_TBFLAG_A64(flags, ZT0EXC_EL, zt0_el); + } + } } sctlr = regime_sctlr(env, stage1); @@ -354,9 +395,6 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } if (hcr & HCR_NV2) { DP_TBFLAG_A64(flags, NV2, 1); - if (hcr & HCR_E2H) { - DP_TBFLAG_A64(flags, NV2_MEM_E20, 1); - } if (env->cp15.sctlr_el[2] & SCTLR_EE) { DP_TBFLAG_A64(flags, NV2_MEM_BE, 1); } @@ -413,6 +451,44 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } + if (cpu_isar_feature(aa64_gcs, env_archcpu(env))) { + /* C.f. GCSEnabled */ + if (env->cp15.gcscr_el[el] & GCSCR_PCRSEL) { + switch (el) { + default: + if (!el_is_in_host(env, el) + && !(arm_hcrx_el2_eff(env) & HCRX_GCSEN)) { + break; + } + /* fall through */ + case 2: + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_GCSEN)) { + break; + } + /* fall through */ + case 3: + DP_TBFLAG_A64(flags, GCS_EN, 1); + break; + } + } + + /* C.f. GCSReturnValueCheckEnabled */ + if (env->cp15.gcscr_el[el] & GCSCR_RVCHKEN) { + DP_TBFLAG_A64(flags, GCS_RVCEN, 1); + } + + /* C.f. CheckGCSSTREnabled */ + if (!(env->cp15.gcscr_el[el] & GCSCR_STREN)) { + DP_TBFLAG_A64(flags, GCSSTR_EL, el ? el : 1); + } else if (el == 1 + && EX_TBFLAG_ANY(flags, FGT_ACTIVE) + && !FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], + HFGITR_EL2, NGCSSTR_EL1)) { + DP_TBFLAG_A64(flags, GCSSTR_EL, 2); + } + } + if (env->vfp.fpcr & FPCR_AH) { DP_TBFLAG_A64(flags, AH, 1); } @@ -498,7 +574,7 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx); } -void assert_hflags_rebuild_correctly(CPUARMState *env) +static void assert_hflags_rebuild_correctly(CPUARMState *env) { #ifdef CONFIG_DEBUG_TCG CPUARMTBFlags c = env->hflags; @@ -506,10 +582,116 @@ void assert_hflags_rebuild_correctly(CPUARMState *env) if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { fprintf(stderr, "TCG hflags mismatch " - "(current:(0x%08x,0x" TARGET_FMT_lx ")" - " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", + "(current:(0x%08x,0x%016" PRIx64 ")" + " rebuilt:(0x%08x,0x%016" PRIx64 ")\n", c.flags, c.flags2, r.flags, r.flags2); abort(); } #endif } + +static bool mve_no_pred(CPUARMState *env) +{ + /* + * Return true if there is definitely no predication of MVE + * instructions by VPR or LTPSIZE. (Returning false even if there + * isn't any predication is OK; generated code will just be + * a little worse.) + * If the CPU does not implement MVE then this TB flag is always 0. + * + * NOTE: if you change this logic, the "recalculate s->mve_no_pred" + * logic in gen_update_fp_context() needs to be updated to match. + * + * We do not include the effect of the ECI bits here -- they are + * tracked in other TB flags. This simplifies the logic for + * "when did we emit code that changes the MVE_NO_PRED TB flag + * and thus need to end the TB?". + */ + if (cpu_isar_feature(aa32_mve, env_archcpu(env))) { + return false; + } + if (env->v7m.vpr) { + return false; + } + if (env->v7m.ltpsize < 4) { + return false; + } + return true; +} + +TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs) +{ + CPUARMState *env = cpu_env(cs); + CPUARMTBFlags flags; + vaddr pc; + + assert_hflags_rebuild_correctly(env); + flags = env->hflags; + + if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { + pc = env->pc; + if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { + DP_TBFLAG_A64(flags, BTYPE, env->btype); + } + } else { + pc = env->regs[15]; + + if (arm_feature(env, ARM_FEATURE_M)) { + if (arm_feature(env, ARM_FEATURE_M_SECURITY) && + FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) + != env->v7m.secure) { + DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); + } + + if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && + (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) || + (env->v7m.secure && + !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) { + /* + * ASPEN is set, but FPCA/SFPA indicate that there is no + * active FP context; we must create a new FP context before + * executing any FP insn. + */ + DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); + } + + bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; + if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { + DP_TBFLAG_M32(flags, LSPACT, 1); + } + + if (mve_no_pred(env)) { + DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); + } + } else { + /* Note that VECLEN+VECSTRIDE are RES0 for M-profile. */ + DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); + DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { + DP_TBFLAG_A32(flags, VFPEN, 1); + } + } + + DP_TBFLAG_AM32(flags, THUMB, env->thumb); + DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); + } + + /* + * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. + */ + if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { + DP_TBFLAG_ANY(flags, PSTATE__SS, 1); + } + + return (TCGTBCPUState){ + .pc = pc, + .flags = flags.flags, + .cs_base = flags.flags2, + }; +} diff --git a/target/arm/tcg/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c deleted file mode 100644 index 610b1b2..0000000 --- a/target/arm/tcg/iwmmxt_helper.c +++ /dev/null @@ -1,670 +0,0 @@ -/* - * iwMMXt micro operations for XScale. - * - * Copyright (c) 2007 OpenedHand, Ltd. - * Written by Andrzej Zaborowski <andrew@openedhand.com> - * Copyright (c) 2008 CodeSourcery - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" - -#include "cpu.h" -#include "exec/helper-proto.h" - -/* iwMMXt macros extracted from GNU gdb. */ - -/* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations. */ -#define SIMD8_SET(v, n, b) ((v != 0) << ((((b) + 1) * 4) + (n))) -#define SIMD16_SET(v, n, h) ((v != 0) << ((((h) + 1) * 8) + (n))) -#define SIMD32_SET(v, n, w) ((v != 0) << ((((w) + 1) * 16) + (n))) -#define SIMD64_SET(v, n) ((v != 0) << (32 + (n))) -/* Flags to pass as "n" above. */ -#define SIMD_NBIT -1 -#define SIMD_ZBIT -2 -#define SIMD_CBIT -3 -#define SIMD_VBIT -4 -/* Various status bit macros. */ -#define NBIT8(x) ((x) & 0x80) -#define NBIT16(x) ((x) & 0x8000) -#define NBIT32(x) ((x) & 0x80000000) -#define NBIT64(x) ((x) & 0x8000000000000000ULL) -#define ZBIT8(x) (((x) & 0xff) == 0) -#define ZBIT16(x) (((x) & 0xffff) == 0) -#define ZBIT32(x) (((x) & 0xffffffff) == 0) -#define ZBIT64(x) (x == 0) -/* Sign extension macros. */ -#define EXTEND8H(a) ((uint16_t) (int8_t) (a)) -#define EXTEND8(a) ((uint32_t) (int8_t) (a)) -#define EXTEND16(a) ((uint32_t) (int16_t) (a)) -#define EXTEND16S(a) ((int32_t) (int16_t) (a)) -#define EXTEND32(a) ((uint64_t) (int32_t) (a)) - -uint64_t HELPER(iwmmxt_maddsq)(uint64_t a, uint64_t b) -{ - a = (( - EXTEND16S((a >> 0) & 0xffff) * EXTEND16S((b >> 0) & 0xffff) + - EXTEND16S((a >> 16) & 0xffff) * EXTEND16S((b >> 16) & 0xffff) - ) & 0xffffffff) | ((uint64_t) ( - EXTEND16S((a >> 32) & 0xffff) * EXTEND16S((b >> 32) & 0xffff) + - EXTEND16S((a >> 48) & 0xffff) * EXTEND16S((b >> 48) & 0xffff) - ) << 32); - return a; -} - -uint64_t HELPER(iwmmxt_madduq)(uint64_t a, uint64_t b) -{ - a = (( - ((a >> 0) & 0xffff) * ((b >> 0) & 0xffff) + - ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff) - ) & 0xffffffff) | (( - ((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) + - ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff) - ) << 32); - return a; -} - -uint64_t HELPER(iwmmxt_sadb)(uint64_t a, uint64_t b) -{ -#define abs(x) (((x) >= 0) ? x : -x) -#define SADB(SHR) abs((int) ((a >> SHR) & 0xff) - (int) ((b >> SHR) & 0xff)) - return - SADB(0) + SADB(8) + SADB(16) + SADB(24) + - SADB(32) + SADB(40) + SADB(48) + SADB(56); -#undef SADB -} - -uint64_t HELPER(iwmmxt_sadw)(uint64_t a, uint64_t b) -{ -#define SADW(SHR) \ - abs((int) ((a >> SHR) & 0xffff) - (int) ((b >> SHR) & 0xffff)) - return SADW(0) + SADW(16) + SADW(32) + SADW(48); -#undef SADW -} - -uint64_t HELPER(iwmmxt_mulslw)(uint64_t a, uint64_t b) -{ -#define MULS(SHR) ((uint64_t) ((( \ - EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ - ) >> 0) & 0xffff) << SHR) - return MULS(0) | MULS(16) | MULS(32) | MULS(48); -#undef MULS -} - -uint64_t HELPER(iwmmxt_mulshw)(uint64_t a, uint64_t b) -{ -#define MULS(SHR) ((uint64_t) ((( \ - EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ - ) >> 16) & 0xffff) << SHR) - return MULS(0) | MULS(16) | MULS(32) | MULS(48); -#undef MULS -} - -uint64_t HELPER(iwmmxt_mululw)(uint64_t a, uint64_t b) -{ -#define MULU(SHR) ((uint64_t) ((( \ - ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ - ) >> 0) & 0xffff) << SHR) - return MULU(0) | MULU(16) | MULU(32) | MULU(48); -#undef MULU -} - -uint64_t HELPER(iwmmxt_muluhw)(uint64_t a, uint64_t b) -{ -#define MULU(SHR) ((uint64_t) ((( \ - ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ - ) >> 16) & 0xffff) << SHR) - return MULU(0) | MULU(16) | MULU(32) | MULU(48); -#undef MULU -} - -uint64_t HELPER(iwmmxt_macsw)(uint64_t a, uint64_t b) -{ -#define MACS(SHR) ( \ - EXTEND16((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff)) - return (int64_t) (MACS(0) + MACS(16) + MACS(32) + MACS(48)); -#undef MACS -} - -uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b) -{ -#define MACU(SHR) ( \ - (uint32_t) ((a >> SHR) & 0xffff) * \ - (uint32_t) ((b >> SHR) & 0xffff)) - return MACU(0) + MACU(16) + MACU(32) + MACU(48); -#undef MACU -} - -#define NZBIT8(x, i) \ - SIMD8_SET(NBIT8((x) & 0xff), SIMD_NBIT, i) | \ - SIMD8_SET(ZBIT8((x) & 0xff), SIMD_ZBIT, i) -#define NZBIT16(x, i) \ - SIMD16_SET(NBIT16((x) & 0xffff), SIMD_NBIT, i) | \ - SIMD16_SET(ZBIT16((x) & 0xffff), SIMD_ZBIT, i) -#define NZBIT32(x, i) \ - SIMD32_SET(NBIT32((x) & 0xffffffff), SIMD_NBIT, i) | \ - SIMD32_SET(ZBIT32((x) & 0xffffffff), SIMD_ZBIT, i) -#define NZBIT64(x) \ - SIMD64_SET(NBIT64(x), SIMD_NBIT) | \ - SIMD64_SET(ZBIT64(x), SIMD_ZBIT) -#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3) \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) | \ - (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) | \ - (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) | \ - (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xffff) << 0) | \ - (((b >> SH0) & 0xffff) << 16) | \ - (((a >> SH2) & 0xffff) << 32) | \ - (((b >> SH2) & 0xffff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) | \ - NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xffffffff) << 0) | \ - (((b >> SH0) & 0xffffffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - (((x >> SH0) & 0xff) << 0) | \ - (((x >> SH1) & 0xff) << 16) | \ - (((x >> SH2) & 0xff) << 32) | \ - (((x >> SH3) & 0xff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - (((x >> SH0) & 0xffff) << 0) | \ - (((x >> SH2) & 0xffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = (((x >> SH0) & 0xffffffff) << 0); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) | \ - ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) | \ - ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) | \ - ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) | \ - ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = EXTEND32((x >> SH0) & 0xffffffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ - return x; \ -} -IWMMXT_OP_UNPACK(l, 0, 8, 16, 24) -IWMMXT_OP_UNPACK(h, 32, 40, 48, 56) - -#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O) \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) | \ - CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) | \ - CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) | \ - CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) | \ - CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | \ - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = CMP(0, Tl, O, 0xffffffff) | \ - CMP(32, Tl, O, 0xffffffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ - return a; \ -} -#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ - (TYPE) ((b >> SHR) & MASK)) ? (uint64_t) MASK : 0) << SHR) -IWMMXT_OP_CMP(cmpeq, uint8_t, uint16_t, uint32_t, ==) -IWMMXT_OP_CMP(cmpgts, int8_t, int16_t, int32_t, >) -IWMMXT_OP_CMP(cmpgtu, uint8_t, uint16_t, uint32_t, >) -#undef CMP -#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ - (TYPE) ((b >> SHR) & MASK)) ? a : b) & ((uint64_t) MASK << SHR)) -IWMMXT_OP_CMP(mins, int8_t, int16_t, int32_t, <) -IWMMXT_OP_CMP(minu, uint8_t, uint16_t, uint32_t, <) -IWMMXT_OP_CMP(maxs, int8_t, int16_t, int32_t, >) -IWMMXT_OP_CMP(maxu, uint8_t, uint16_t, uint32_t, >) -#undef CMP -#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ - OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) -IWMMXT_OP_CMP(subn, uint8_t, uint16_t, uint32_t, -) -IWMMXT_OP_CMP(addn, uint8_t, uint16_t, uint32_t, +) -#undef CMP -/* TODO Signed- and Unsigned-Saturation */ -#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ - OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) -IWMMXT_OP_CMP(subu, uint8_t, uint16_t, uint32_t, -) -IWMMXT_OP_CMP(addu, uint8_t, uint16_t, uint32_t, +) -IWMMXT_OP_CMP(subs, int8_t, int16_t, int32_t, -) -IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +) -#undef CMP -#undef IWMMXT_OP_CMP - -#define AVGB(SHR) ((( \ - ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR) -#define IWMMXT_OP_AVGB(r) \ -uint64_t HELPER(iwmmxt_avgb##r)(CPUARMState *env, uint64_t a, uint64_t b) \ -{ \ - const int round = r; \ - a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) | \ - AVGB(32) | AVGB(40) | AVGB(48) | AVGB(56); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - SIMD8_SET(ZBIT8((a >> 0) & 0xff), SIMD_ZBIT, 0) | \ - SIMD8_SET(ZBIT8((a >> 8) & 0xff), SIMD_ZBIT, 1) | \ - SIMD8_SET(ZBIT8((a >> 16) & 0xff), SIMD_ZBIT, 2) | \ - SIMD8_SET(ZBIT8((a >> 24) & 0xff), SIMD_ZBIT, 3) | \ - SIMD8_SET(ZBIT8((a >> 32) & 0xff), SIMD_ZBIT, 4) | \ - SIMD8_SET(ZBIT8((a >> 40) & 0xff), SIMD_ZBIT, 5) | \ - SIMD8_SET(ZBIT8((a >> 48) & 0xff), SIMD_ZBIT, 6) | \ - SIMD8_SET(ZBIT8((a >> 56) & 0xff), SIMD_ZBIT, 7); \ - return a; \ -} -IWMMXT_OP_AVGB(0) -IWMMXT_OP_AVGB(1) -#undef IWMMXT_OP_AVGB -#undef AVGB - -#define AVGW(SHR) ((( \ - ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR) -#define IWMMXT_OP_AVGW(r) \ -uint64_t HELPER(iwmmxt_avgw##r)(CPUARMState *env, uint64_t a, uint64_t b) \ -{ \ - const int round = r; \ - a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - SIMD16_SET(ZBIT16((a >> 0) & 0xffff), SIMD_ZBIT, 0) | \ - SIMD16_SET(ZBIT16((a >> 16) & 0xffff), SIMD_ZBIT, 1) | \ - SIMD16_SET(ZBIT16((a >> 32) & 0xffff), SIMD_ZBIT, 2) | \ - SIMD16_SET(ZBIT16((a >> 48) & 0xffff), SIMD_ZBIT, 3); \ - return a; \ -} -IWMMXT_OP_AVGW(0) -IWMMXT_OP_AVGW(1) -#undef IWMMXT_OP_AVGW -#undef AVGW - -uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n) -{ - a >>= n << 3; - a |= b << (64 - (n << 3)); - return a; -} - -uint64_t HELPER(iwmmxt_insr)(uint64_t x, uint32_t a, uint32_t b, uint32_t n) -{ - x &= ~((uint64_t) b << n); - x |= (uint64_t) (a & b) << n; - return x; -} - -uint32_t HELPER(iwmmxt_setpsr_nz)(uint64_t x) -{ - return SIMD64_SET((x == 0), SIMD_ZBIT) | - SIMD64_SET((x & (1ULL << 63)), SIMD_NBIT); -} - -uint64_t HELPER(iwmmxt_bcstb)(uint32_t arg) -{ - arg &= 0xff; - return - ((uint64_t) arg << 0 ) | ((uint64_t) arg << 8 ) | - ((uint64_t) arg << 16) | ((uint64_t) arg << 24) | - ((uint64_t) arg << 32) | ((uint64_t) arg << 40) | - ((uint64_t) arg << 48) | ((uint64_t) arg << 56); -} - -uint64_t HELPER(iwmmxt_bcstw)(uint32_t arg) -{ - arg &= 0xffff; - return - ((uint64_t) arg << 0 ) | ((uint64_t) arg << 16) | - ((uint64_t) arg << 32) | ((uint64_t) arg << 48); -} - -uint64_t HELPER(iwmmxt_bcstl)(uint32_t arg) -{ - return arg | ((uint64_t) arg << 32); -} - -uint64_t HELPER(iwmmxt_addcb)(uint64_t x) -{ - return - ((x >> 0) & 0xff) + ((x >> 8) & 0xff) + - ((x >> 16) & 0xff) + ((x >> 24) & 0xff) + - ((x >> 32) & 0xff) + ((x >> 40) & 0xff) + - ((x >> 48) & 0xff) + ((x >> 56) & 0xff); -} - -uint64_t HELPER(iwmmxt_addcw)(uint64_t x) -{ - return - ((x >> 0) & 0xffff) + ((x >> 16) & 0xffff) + - ((x >> 32) & 0xffff) + ((x >> 48) & 0xffff); -} - -uint64_t HELPER(iwmmxt_addcl)(uint64_t x) -{ - return (x & 0xffffffff) + (x >> 32); -} - -uint32_t HELPER(iwmmxt_msbb)(uint64_t x) -{ - return - ((x >> 7) & 0x01) | ((x >> 14) & 0x02) | - ((x >> 21) & 0x04) | ((x >> 28) & 0x08) | - ((x >> 35) & 0x10) | ((x >> 42) & 0x20) | - ((x >> 49) & 0x40) | ((x >> 56) & 0x80); -} - -uint32_t HELPER(iwmmxt_msbw)(uint64_t x) -{ - return - ((x >> 15) & 0x01) | ((x >> 30) & 0x02) | - ((x >> 45) & 0x04) | ((x >> 52) & 0x08); -} - -uint32_t HELPER(iwmmxt_msbl)(uint64_t x) -{ - return ((x >> 31) & 0x01) | ((x >> 62) & 0x02); -} - -/* FIXME: Split wCASF setting into a separate op to avoid env use. */ -uint64_t HELPER(iwmmxt_srlw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) | - (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) | - (((x & (0xffffll << 32)) >> n) & (0xffffll << 32)) | - (((x & (0xffffll << 48)) >> n) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_srll)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x & (0xffffffffll << 0)) >> n) | - ((x >> n) & (0xffffffffll << 32)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_srlq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x >>= n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_sllw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) | - (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) | - (((x & (0xffffll << 32)) << n) & (0xffffll << 32)) | - (((x & (0xffffll << 48)) << n) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_slll)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x << n) & (0xffffffffll << 0)) | - ((x & (0xffffffffll << 32)) << n); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_sllq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x <<= n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_sraw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) | - ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) | - ((uint64_t) ((EXTEND16(x >> 32) >> n) & 0xffff) << 32) | - ((uint64_t) ((EXTEND16(x >> 48) >> n) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_sral)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) | - (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_sraq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (int64_t) x >> n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_rorw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((((x & (0xffffll << 0)) >> n) | - ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) | - ((((x & (0xffffll << 16)) >> n) | - ((x & (0xffffll << 16)) << (16 - n))) & (0xffffll << 16)) | - ((((x & (0xffffll << 32)) >> n) | - ((x & (0xffffll << 32)) << (16 - n))) & (0xffffll << 32)) | - ((((x & (0xffffll << 48)) >> n) | - ((x & (0xffffll << 48)) << (16 - n))) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x & (0xffffffffll << 0)) >> n) | - ((x >> n) & (0xffffffffll << 32)) | - ((x << (32 - n)) & (0xffffffffll << 0)) | - ((x & (0xffffffffll << 32)) << (32 - n)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ror64(x, n); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_shufh)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) | - (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) | - (((x >> ((n << 0) & 0x30)) & 0xffff) << 32) | - (((x >> ((n >> 2) & 0x30)) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -/* TODO: Unsigned-Saturation */ -uint64_t HELPER(iwmmxt_packuw)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | - (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | - (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | - (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); - return a; -} - -uint64_t HELPER(iwmmxt_packul)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | - (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); - return a; -} - -uint64_t HELPER(iwmmxt_packuq)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); - return a; -} - -/* TODO: Signed-Saturation */ -uint64_t HELPER(iwmmxt_packsw)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | - (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | - (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | - (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); - return a; -} - -uint64_t HELPER(iwmmxt_packsl)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | - (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); - return a; -} - -uint64_t HELPER(iwmmxt_packsq)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); - return a; -} - -uint64_t HELPER(iwmmxt_muladdsl)(uint64_t c, uint32_t a, uint32_t b) -{ - return c + ((int32_t) EXTEND32(a) * (int32_t) EXTEND32(b)); -} - -uint64_t HELPER(iwmmxt_muladdsw)(uint64_t c, uint32_t a, uint32_t b) -{ - c += EXTEND32(EXTEND16S((a >> 0) & 0xffff) * - EXTEND16S((b >> 0) & 0xffff)); - c += EXTEND32(EXTEND16S((a >> 16) & 0xffff) * - EXTEND16S((b >> 16) & 0xffff)); - return c; -} - -uint64_t HELPER(iwmmxt_muladdswl)(uint64_t c, uint32_t a, uint32_t b) -{ - return c + (EXTEND32(EXTEND16S(a & 0xffff) * - EXTEND16S(b & 0xffff))); -} diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index f7354f3..d856e3b 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -15,10 +15,9 @@ #include "qemu/main-loop.h" #include "qemu/bitops.h" #include "qemu/log.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #ifdef CONFIG_TCG -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "semihosting/common-semi.h" #endif #if !defined(CONFIG_USER_ONLY) @@ -633,8 +632,11 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) } /* Note that these stores can throw exceptions on MPU faults */ - cpu_stl_data_ra(env, sp, nextinst, GETPC()); - cpu_stl_data_ra(env, sp + 4, saved_psr, GETPC()); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); + cpu_stl_mmu(env, sp, nextinst, oi, GETPC()); + cpu_stl_mmu(env, sp + 4, saved_psr, oi, GETPC()); env->regs[13] = sp; env->regs[14] = 0xfeffffff; @@ -1049,6 +1051,9 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK; uintptr_t ra = GETPC(); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); assert(env->v7m.secure); @@ -1074,7 +1079,7 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) * Note that we do not use v7m_stack_write() here, because the * accesses should not set the FSR bits for stacking errors if they * fail. (In pseudocode terms, they are AccType_NORMAL, not AccType_STACK - * or AccType_LAZYFP). Faults in cpu_stl_data_ra() will throw exceptions + * or AccType_LAZYFP). Faults in cpu_stl_mmu() will throw exceptions * and longjmp out. */ if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) { @@ -1090,12 +1095,12 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) if (i >= 16) { faddr += 8; /* skip the slot for the FPSCR */ } - cpu_stl_data_ra(env, faddr, slo, ra); - cpu_stl_data_ra(env, faddr + 4, shi, ra); + cpu_stl_mmu(env, faddr, slo, oi, ra); + cpu_stl_mmu(env, faddr + 4, shi, oi, ra); } - cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra); + cpu_stl_mmu(env, fptr + 0x40, vfp_get_fpscr(env), oi, ra); if (cpu_isar_feature(aa32_mve, cpu)) { - cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra); + cpu_stl_mmu(env, fptr + 0x44, env->v7m.vpr, oi, ra); } /* @@ -1122,6 +1127,9 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) { ARMCPU *cpu = env_archcpu(env); uintptr_t ra = GETPC(); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); /* fptr is the value of Rn, the frame pointer we load the FP regs from */ assert(env->v7m.secure); @@ -1156,16 +1164,16 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) faddr += 8; /* skip the slot for the FPSCR and VPR */ } - slo = cpu_ldl_data_ra(env, faddr, ra); - shi = cpu_ldl_data_ra(env, faddr + 4, ra); + slo = cpu_ldl_mmu(env, faddr, oi, ra); + shi = cpu_ldl_mmu(env, faddr + 4, oi, ra); dn = (uint64_t) shi << 32 | slo; *aa32_vfp_dreg(env, i / 2) = dn; } - fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra); + fpscr = cpu_ldl_mmu(env, fptr + 0x40, oi, ra); vfp_set_fpscr(env, fpscr); if (cpu_isar_feature(aa32_mve, cpu)) { - env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra); + env->v7m.vpr = cpu_ldl_mmu(env, fptr + 0x44, oi, ra); } } @@ -1938,7 +1946,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) * do them as secure, so work out what MMU index that is. */ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); - oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx)); + oi = make_memop_idx(MO_LEUL | MO_ALIGN, arm_to_core_mmu_idx(mmu_idx)); newpc = cpu_ldl_mmu(env, frameptr, oi, 0); newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0); @@ -2821,8 +2829,8 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) ARMMMUFaultInfo fi = {}; /* We can ignore the return value as prot is always set */ - pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, targetsec, - &res, &fi, &mregion); + pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, PAGE_READ, mmu_idx, + targetsec, &res, &fi, &mregion); if (mregion == -1) { mrvalid = false; mregion = 0; diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index dd12cce..1b11565 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -30,18 +30,10 @@ arm_ss.add(files( 'translate-mve.c', 'translate-neon.c', 'translate-vfp.c', - 'crypto_helper.c', - 'hflags.c', - 'iwmmxt_helper.c', 'm_helper.c', 'mve_helper.c', - 'neon_helper.c', 'op_helper.c', - 'tlb_helper.c', 'vec_helper.c', - 'tlb-insns.c', - 'arith_helper.c', - 'vfp_helper.c', )) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( @@ -63,3 +55,25 @@ arm_system_ss.add(files( arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) + +arm_common_ss.add(zlib) + +arm_common_ss.add(files( + 'arith_helper.c', + 'crypto_helper.c', +)) + +arm_common_system_ss.add(files( + 'cpregs-at.c', + 'hflags.c', + 'neon_helper.c', + 'tlb_helper.c', + 'tlb-insns.c', + 'vfp_helper.c', +)) +arm_user_ss.add(files( + 'hflags.c', + 'neon_helper.c', + 'tlb_helper.c', + 'vfp_helper.c', +)) diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index 5d6d8a1..bb48fe3 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -21,22 +21,23 @@ #include "qemu/log.h" #include "cpu.h" #include "internals.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "exec/page-protection.h" #ifdef CONFIG_USER_ONLY #include "user/cpu_loop.h" #include "user/page-protection.h" #else -#include "exec/ram_addr.h" +#include "system/physmem.h" #endif -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/helper-proto.h" +#include "exec/tlb-flags.h" #include "accel/tcg/cpu-ops.h" #include "qapi/error.h" #include "qemu/guest-random.h" #include "mte_helper.h" - static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) { if (exclude == 0xffff) { @@ -62,6 +63,7 @@ uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, bool probe, uintptr_t ra) { #ifdef CONFIG_USER_ONLY + const size_t page_data_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1); uint64_t clean_ptr = useronly_clean_ptr(ptr); int flags = page_get_flags(clean_ptr); uint8_t *tags; @@ -82,7 +84,7 @@ uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, return NULL; } - tags = page_get_target_data(clean_ptr); + tags = page_get_target_data(clean_ptr, page_data_size); index = extract32(ptr, LOG2_TAG_GRANULE + 1, TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1); @@ -187,7 +189,7 @@ uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, */ if (tag_access == MMU_DATA_STORE) { ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; - cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); } return memory_region_get_ram_ptr(mr) + xlat; @@ -590,7 +592,7 @@ static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, * which is rather sooner than "normal". But the alternative * is waiting until the next syscall. */ - qemu_cpu_kick(env_cpu(env)); + cpu_exit(env_cpu(env)); #endif } @@ -603,7 +605,7 @@ void mte_check_fail(CPUARMState *env, uint32_t desc, int el, reg_el, tcf; uint64_t sctlr; - reg_el = regime_el(env, arm_mmu_idx); + reg_el = regime_el(arm_mmu_idx); sctlr = env->cp15.sctlr_el[reg_el]; switch (arm_mmu_idx) { diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c index 274003e..63ddcf3 100644 --- a/target/arm/tcg/mve_helper.c +++ b/target/arm/tcg/mve_helper.c @@ -22,8 +22,7 @@ #include "internals.h" #include "vec_internal.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/tcg.h" #include "fpu/softfloat.h" #include "crypto/clmul.h" @@ -149,13 +148,15 @@ static void mve_advance_vpt(CPUARMState *env) } /* For loads, predicated lanes are zeroed instead of keeping their old values */ -#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \ +#define DO_VLDR(OP, MFLAG, MSIZE, MTYPE, LDTYPE, ESIZE, TYPE) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \ { \ TYPE *d = vd; \ uint16_t mask = mve_element_mask(env); \ uint16_t eci_mask = mve_eci_mask(env); \ unsigned b, e; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ /* \ * R_SXTM allows the dest reg to become UNKNOWN for abandoned \ * beats so we don't care if we update part of the dest and \ @@ -164,46 +165,48 @@ static void mve_advance_vpt(CPUARMState *env) for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \ if (eci_mask & (1 << b)) { \ d[H##ESIZE(e)] = (mask & (1 << b)) ? \ - cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \ + (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0;\ } \ addr += MSIZE; \ } \ mve_advance_vpt(env); \ } -#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \ +#define DO_VSTR(OP, MFLAG, MSIZE, STTYPE, ESIZE, TYPE) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \ { \ TYPE *d = vd; \ uint16_t mask = mve_element_mask(env); \ unsigned b, e; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \ if (mask & (1 << b)) { \ - cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \ + cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \ } \ addr += MSIZE; \ } \ mve_advance_vpt(env); \ } -DO_VLDR(vldrb, 1, ldub, 1, uint8_t) -DO_VLDR(vldrh, 2, lduw, 2, uint16_t) -DO_VLDR(vldrw, 4, ldl, 4, uint32_t) +DO_VLDR(vldrb, MO_UB, 1, uint8_t, ldb, 1, uint8_t) +DO_VLDR(vldrh, MO_TEUW, 2, uint16_t, ldw, 2, uint16_t) +DO_VLDR(vldrw, MO_TEUL, 4, uint32_t, ldl, 4, uint32_t) -DO_VSTR(vstrb, 1, stb, 1, uint8_t) -DO_VSTR(vstrh, 2, stw, 2, uint16_t) -DO_VSTR(vstrw, 4, stl, 4, uint32_t) +DO_VSTR(vstrb, MO_UB, 1, stb, 1, uint8_t) +DO_VSTR(vstrh, MO_TEUW, 2, stw, 2, uint16_t) +DO_VSTR(vstrw, MO_TEUL, 4, stl, 4, uint32_t) -DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t) -DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t) -DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t) -DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t) -DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t) -DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t) +DO_VLDR(vldrb_sh, MO_SB, 1, int8_t, ldb, 2, int16_t) +DO_VLDR(vldrb_sw, MO_SB, 1, int8_t, ldb, 4, int32_t) +DO_VLDR(vldrb_uh, MO_UB, 1, uint8_t, ldb, 2, uint16_t) +DO_VLDR(vldrb_uw, MO_UB, 1, uint8_t, ldb, 4, uint32_t) +DO_VLDR(vldrh_sw, MO_TESW, 2, int16_t, ldw, 4, int32_t) +DO_VLDR(vldrh_uw, MO_TEUW, 2, uint16_t, ldw, 4, uint32_t) -DO_VSTR(vstrb_h, 1, stb, 2, int16_t) -DO_VSTR(vstrb_w, 1, stb, 4, int32_t) -DO_VSTR(vstrh_w, 2, stw, 4, int32_t) +DO_VSTR(vstrb_h, MO_UB, 1, stb, 2, int16_t) +DO_VSTR(vstrb_w, MO_UB, 1, stb, 4, int32_t) +DO_VSTR(vstrh_w, MO_TEUW, 2, stw, 4, int32_t) #undef DO_VLDR #undef DO_VSTR @@ -215,7 +218,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) * For loads, predicated lanes are zeroed instead of retaining * their previous values. */ -#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \ +#define DO_VLDR_SG(OP, MFLAG, MTYPE, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)\ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \ uint32_t base) \ { \ @@ -225,13 +228,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H##ESIZE(e)]); \ d[H##ESIZE(e)] = (mask & 1) ? \ - cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \ + (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0; \ if (WB) { \ m[H##ESIZE(e)] = addr; \ } \ @@ -240,7 +245,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) } /* We know here TYPE is unsigned so always the same as the offset type */ -#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \ +#define DO_VSTR_SG(OP, MFLAG, STTYPE, ESIZE, TYPE, ADDRFN, WB) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \ uint32_t base) \ { \ @@ -250,13 +255,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H##ESIZE(e)]); \ if (mask & 1) { \ - cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \ + cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \ } \ if (WB) { \ m[H##ESIZE(e)] = addr; \ @@ -283,13 +290,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H4(e & ~1)]); \ addr += 4 * (e & 1); \ - d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \ + d[H4(e)] = (mask & 1) ? cpu_ldl_mmu(env, addr, oi, GETPC()) : 0; \ if (WB && (e & 1)) { \ m[H4(e & ~1)] = addr - 4; \ } \ @@ -307,6 +316,8 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \ if (!(eci_mask & 1)) { \ continue; \ @@ -314,7 +325,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) addr = ADDRFN(base, m[H4(e & ~1)]); \ addr += 4 * (e & 1); \ if (mask & 1) { \ - cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \ + cpu_stl_mmu(env, addr, d[H4(e)], oi, GETPC()); \ } \ if (WB && (e & 1)) { \ m[H4(e & ~1)] = addr - 4; \ @@ -328,40 +339,44 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) #define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2)) #define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3)) -DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_sh, MO_SB, int8_t, ldb, 2, int16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_sw, MO_SB, int8_t, ldb, 4, int32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_sw, MO_TESW, int16_t, ldw, 4, int32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_ub, MO_UB, uint8_t, ldb, 1, uint8_t, uint8_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_uh, MO_UB, uint8_t, ldb, 2, uint16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_uw, MO_UB, uint8_t, ldb, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_uh, MO_TEUW, uint16_t, ldw, 2, uint16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_uw, MO_TEUW, uint16_t, ldw, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrw_sg_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false) DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false) +DO_VLDR_SG(vldrh_sg_os_sw, MO_TESW, int16_t, ldw, 4, + int32_t, uint32_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrh_sg_os_uh, MO_TEUW, uint16_t, ldw, 2, + uint16_t, uint16_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrh_sg_os_uw, MO_TEUW, uint16_t, ldw, 4, + uint32_t, uint32_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrw_sg_os_uw, MO_TEUL, uint32_t, ldl, 4, + uint32_t, uint32_t, ADDR_ADD_OSW, false) DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false) -DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false) -DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false) -DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false) -DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_ub, MO_UB, stb, 1, uint8_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_uh, MO_UB, stb, 2, uint16_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_uw, MO_UB, stb, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrh_sg_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD, false) +DO_VSTR_SG(vstrh_sg_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrw_sg_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, false) DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false) -DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false) -DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false) +DO_VSTR_SG(vstrh_sg_os_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD_OSH, false) +DO_VSTR_SG(vstrh_sg_os_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD_OSH, false) +DO_VSTR_SG(vstrw_sg_os_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD_OSW, false) DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false) -DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true) +DO_VLDR_SG(vldrw_sg_wb_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true) DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true) -DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true) +DO_VSTR_SG(vstrw_sg_wb_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, true) DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) /* @@ -388,13 +403,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint16_t mask = mve_eci_mask(env); \ static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 4; e++, data >>= 8) { \ uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \ qd[H1(off[beat])] = data; \ @@ -412,13 +429,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint32_t addr, data; \ int y; /* y counts 0 2 0 2 */ \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 8 + (beat & 1) * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \ qd[H2(off[beat])] = data; \ data >>= 16; \ @@ -437,13 +456,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint32_t addr, data; \ uint32_t *qd; \ int y; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ y = (beat + (O1 & 2)) & 3; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \ qd[H4(off[beat] >> 2)] = data; \ @@ -474,13 +495,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint8_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 2; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 4; e++, data >>= 8) { \ qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \ qd[H1(off[beat] + (e >> 1))] = data; \ @@ -498,13 +521,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) uint32_t addr, data; \ int e; \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 2; e++, data >>= 16) { \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \ qd[H2(off[beat])] = data; \ @@ -521,13 +546,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint32_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat]; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \ qd[H4(off[beat] >> 3)] = data; \ } \ @@ -550,6 +577,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint16_t mask = mve_eci_mask(env); \ static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -561,7 +590,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \ data = (data << 8) | qd[H1(off[beat])]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -575,6 +604,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint32_t addr, data; \ int y; /* y counts 0 2 0 2 */ \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -585,7 +616,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) data = qd[H2(off[beat])]; \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \ data |= qd[H2(off[beat])] << 16; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -599,6 +630,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint32_t addr, data; \ uint32_t *qd; \ int y; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -608,7 +641,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) y = (beat + (O1 & 2)) & 3; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \ data = qd[H4(off[beat] >> 2)]; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -636,6 +669,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint8_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -647,7 +682,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \ data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -661,6 +696,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) uint32_t addr, data; \ int e; \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -672,7 +709,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \ data = (data << 16) | qd[H2(off[beat])]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -685,6 +722,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint32_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -693,7 +732,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) addr = base + off[beat]; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \ data = qd[H4(off[beat] >> 3)]; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -2165,27 +2204,6 @@ DO_VSHLL_ALL(vshllt, true) DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \ DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN) -static inline uint64_t do_urshr(uint64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else if (sh == 64) { - return x >> 63; - } else { - return 0; - } -} - -static inline int64_t do_srshr(int64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else { - /* Rounding the sign bit always produces 0. */ - return 0; - } -} - DO_VSHRN_ALL(vshrn, DO_SHR) DO_VSHRN_ALL(vrshrn, do_urshr) diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index e2cc7cf..8d288f3 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -9,11 +9,13 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "vec_internal.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) @@ -227,15 +229,30 @@ NEON_GVEC_VOP2(gvec_srshl_h, int16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, src2, 16, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) NEON_GVEC_VOP2(gvec_srshl_s, int32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, src2, 32, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_d(src1, (int8_t)src2, true, NULL)) NEON_GVEC_VOP2(gvec_srshl_d, int64_t) #undef NEON_FN +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, src2, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_d, int64_t) +#undef NEON_FN + uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift) { return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL); @@ -259,15 +276,30 @@ NEON_GVEC_VOP2(gvec_urshl_h, uint16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int16_t)src2, 16, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) NEON_GVEC_VOP2(gvec_urshl_s, int32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, src2, 32, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_d(src1, (int8_t)src2, true, NULL)) NEON_GVEC_VOP2(gvec_urshl_d, int64_t) #undef NEON_FN +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, src2, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_d, int64_t) +#undef NEON_FN + uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift) { return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL); diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index 30786fd..4fbd219 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -20,10 +20,11 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "cpregs.h" #define SIGNBIT (uint32_t)0x80000000 @@ -45,7 +46,7 @@ int exception_target_el(CPUARMState *env) } void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) + uint64_t syndrome, uint32_t target_el) { CPUState *cs = env_cpu(env); @@ -69,7 +70,7 @@ void raise_exception(CPUARMState *env, uint32_t excp, cpu_loop_exit(cs); } -void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, +void raise_exception_ra(CPUARMState *env, uint32_t excp, uint64_t syndrome, uint32_t target_el, uintptr_t ra) { CPUState *cs = env_cpu(env); @@ -767,12 +768,6 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, assert(ri != NULL); - if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14 - && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) { - res = CP_ACCESS_UNDEFINED; - goto fail; - } - if (ri->accessfn) { res = ri->accessfn(env, ri, isread); } @@ -886,6 +881,13 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, } syndrome = syn_uncategorized(); break; + case CP_ACCESS_EXLOCK: + /* + * CP_ACCESS_EXLOCK is always directed to the current EL, + * which is going to be the same as the usual target EL. + */ + syndrome = syn_gcs_exlock(); + break; default: g_assert_not_reached(); } @@ -1221,7 +1223,7 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) } } -void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, +void HELPER(probe_access)(CPUARMState *env, vaddr ptr, uint32_t access_type, uint32_t mmu_idx, uint32_t size) { diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c index c4b1430..c591c30 100644 --- a/target/arm/tcg/pauth_helper.c +++ b/target/arm/tcg/pauth_helper.c @@ -21,8 +21,7 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "qemu/xxhash.h" diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode index 628804e..6bb9aa2 100644 --- a/target/arm/tcg/sme.decode +++ b/target/arm/tcg/sme.decode @@ -22,30 +22,139 @@ ### SME Misc ZERO 11000000 00 001 00000000000 imm:8 +ZERO_zt0 11000000 01 001 00000000000 00000001 ### SME Move into/from Array %mova_rs 13:2 !function=plus_12 -&mova esz rs pg zr za_imm v:bool to_vec:bool +%mova_rv 13:2 !function=plus_8 +&mova_a rv zr off +&mova_p esz rs pg zr za off v:bool +&mova_t esz rs zr za off v:bool -MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \ - &mova to_vec=0 rs=%mova_rs -MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \ - &mova to_vec=0 rs=%mova_rs esz=4 +MOVA_tz 11000000 00 00000 0 v:1 .. pg:3 zr:5 0 off:4 \ + &mova_p rs=%mova_rs esz=0 za=0 +MOVA_tz 11000000 01 00000 0 v:1 .. pg:3 zr:5 0 za:1 off:3 \ + &mova_p rs=%mova_rs esz=1 +MOVA_tz 11000000 10 00000 0 v:1 .. pg:3 zr:5 0 za:2 off:2 \ + &mova_p rs=%mova_rs esz=2 +MOVA_tz 11000000 11 00000 0 v:1 .. pg:3 zr:5 0 za:3 off:1 \ + &mova_p rs=%mova_rs esz=3 +MOVA_tz 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za:4 \ + &mova_p rs=%mova_rs esz=4 off=0 -MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \ - &mova to_vec=1 rs=%mova_rs -MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \ - &mova to_vec=1 rs=%mova_rs esz=4 +MOVA_zt 11000000 00 00001 0 v:1 .. pg:3 0 off:4 zr:5 \ + &mova_p rs=%mova_rs esz=0 za=0 +MOVA_zt 11000000 01 00001 0 v:1 .. pg:3 0 za:1 off:3 zr:5 \ + &mova_p rs=%mova_rs esz=1 +MOVA_zt 11000000 10 00001 0 v:1 .. pg:3 0 za:2 off:2 zr:5 \ + &mova_p rs=%mova_rs esz=2 +MOVA_zt 11000000 11 00001 0 v:1 .. pg:3 0 za:3 off:1 zr:5 \ + &mova_p rs=%mova_rs esz=3 +MOVA_zt 11000000 11 00001 1 v:1 .. pg:3 0 za:4 zr:5 \ + &mova_p rs=%mova_rs esz=4 off=0 + +MOVA_tz2 11000000 00 00010 0 v:1 .. 000 zr:4 0 00 off:3 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_tz2 11000000 01 00010 0 v:1 .. 000 zr:4 0 00 za:1 off:2 \ + &mova_t rs=%mova_rs esz=1 +MOVA_tz2 11000000 10 00010 0 v:1 .. 000 zr:4 0 00 za:2 off:1 \ + &mova_t rs=%mova_rs esz=2 +MOVA_tz2 11000000 11 00010 0 v:1 .. 000 zr:4 0 00 za:3 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_zt2 11000000 00 00011 0 v:1 .. 000 00 off:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_zt2 11000000 01 00011 0 v:1 .. 000 00 za:1 off:2 zr:4 0 \ + &mova_t rs=%mova_rs esz=1 +MOVA_zt2 11000000 10 00011 0 v:1 .. 000 00 za:2 off:1 zr:4 0 \ + &mova_t rs=%mova_rs esz=2 +MOVA_zt2 11000000 11 00011 0 v:1 .. 000 00 za:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_tz4 11000000 00 00010 0 v:1 .. 001 zr:3 00 000 off:2 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_tz4 11000000 01 00010 0 v:1 .. 001 zr:3 00 000 za:1 off:1 \ + &mova_t rs=%mova_rs esz=1 +MOVA_tz4 11000000 10 00010 0 v:1 .. 001 zr:3 00 000 za:2 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVA_tz4 11000000 11 00010 0 v:1 .. 001 zr:3 00 00 za:3 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_zt4 11000000 00 00011 0 v:1 .. 001 000 off:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_zt4 11000000 01 00011 0 v:1 .. 001 000 za:1 off:1 zr:3 00 \ + &mova_t rs=%mova_rs esz=1 +MOVA_zt4 11000000 10 00011 0 v:1 .. 001 000 za:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVA_zt4 11000000 11 00011 0 v:1 .. 001 00 za:3 zr:3 00 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_az2 11000000 00 00010 00 .. 010 zr:4 000 off:3 \ + &mova_a rv=%mova_rv +MOVA_az4 11000000 00 00010 00 .. 011 zr:3 0000 off:3 \ + &mova_a rv=%mova_rv + +MOVA_za2 11000000 00 00011 00 .. 010 00 off:3 zr:4 0 \ + &mova_a rv=%mova_rv +MOVA_za4 11000000 00 00011 00 .. 011 00 off:3 zr:3 00 \ + &mova_a rv=%mova_rv + +### SME Move and Zero + +MOVAZ_za2 11000000 00000110 0 .. 01010 off:3 zr:4 0 \ + &mova_a rv=%mova_rv +MOVAZ_za4 11000000 00000110 0 .. 01110 off:3 zr:3 00 \ + &mova_a rv=%mova_rv + +MOVAZ_zt 11000000 00 00001 0 v:1 .. 0001 off:4 zr:5 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt 11000000 01 00001 0 v:1 .. 0001 za:1 off:3 zr:5 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt 11000000 10 00001 0 v:1 .. 0001 za:2 off:2 zr:5 \ + &mova_t rs=%mova_rs esz=2 +MOVAZ_zt 11000000 11 00001 0 v:1 .. 0001 za:3 off:1 zr:5 \ + &mova_t rs=%mova_rs esz=3 +MOVAZ_zt 11000000 11 00001 1 v:1 .. 0001 za:4 zr:5 \ + &mova_t rs=%mova_rs esz=4 off=0 + +MOVAZ_zt2 11000000 00 00011 0 v:1 .. 00010 off:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt2 11000000 01 00011 0 v:1 .. 00010 za:1 off:2 zr:4 0 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt2 11000000 10 00011 0 v:1 .. 00010 za:2 off:1 zr:4 0 \ + &mova_t rs=%mova_rs esz=2 +MOVAZ_zt2 11000000 11 00011 0 v:1 .. 00010 za:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVAZ_zt4 11000000 00 00011 0 v:1 .. 001100 off:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt4 11000000 01 00011 0 v:1 .. 001100 za:1 off:1 zr:3 00 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt4 11000000 10 00011 0 v:1 .. 001100 za:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVAZ_zt4 11000000 11 00011 0 v:1 .. 00110 za:3 zr:3 00 \ + &mova_t rs=%mova_rs esz=3 off=0 + +### SME Move into/from ZT0 + +MOVT_rzt 1100 0000 0100 1100 0 off:3 00 11111 rt:5 +MOVT_ztr 1100 0000 0100 1110 0 off:3 00 11111 rt:5 ### SME Memory -&ldst esz rs pg rn rm za_imm v:bool st:bool +&ldst esz rs pg rn rm za off v:bool st:bool -LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ - &ldst rs=%mova_rs -LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ - &ldst esz=4 rs=%mova_rs +LDST1 1110000 0 00 st:1 rm:5 v:1 .. pg:3 rn:5 0 off:4 \ + &ldst rs=%mova_rs esz=0 za=0 +LDST1 1110000 0 01 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:1 off:3 \ + &ldst rs=%mova_rs esz=1 +LDST1 1110000 0 10 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:2 off:2 \ + &ldst rs=%mova_rs esz=2 +LDST1 1110000 0 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:3 off:1 \ + &ldst rs=%mova_rs esz=3 +LDST1 1110000 1 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:4 \ + &ldst rs=%mova_rs esz=4 off=0 &ldstr rv rn imm @ldstr ....... ... . ...... .. ... rn:5 . imm:4 \ @@ -54,6 +163,12 @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr +&ldstzt0 rn +@ldstzt0 ....... ... . ...... .. ... rn:5 ..... &ldstzt0 + +LDR_zt0 1110000 100 0 111111 00 000 ..... 00000 @ldstzt0 +STR_zt0 1110000 100 1 111111 00 000 ..... 00000 @ldstzt0 + ### SME Add Vector to Array &adda zad zn pm pn @@ -68,14 +183,18 @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 ### SME Outer Product &op zad zn zm pm pn sub:bool +@op_16 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 ... zad:1 &op @op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op @op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op +FMOPA_h 10000001 100 ..... ... ... ..... . 100 . @op_16 FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 -BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 -FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 +BFMOPA 10000001 101 ..... ... ... ..... . 100 . @op_16 + +BFMOPA_w 10000001 100 ..... ... ... ..... . 00 .. @op_32 +FMOPA_w_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32 SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32 @@ -86,3 +205,789 @@ SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64 SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64 USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64 UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64 + +BMOPA 1000000 0 10 0 ..... ... ... ..... . 10 .. @op_32 +SMOPA2_s 1010000 0 10 0 ..... ... ... ..... . 10 .. @op_32 +UMOPA2_s 1010000 1 10 0 ..... ... ... ..... . 10 .. @op_32 + +### SME2 Multi-vector Multiple and Single SVE Destructive + +%zd_ax2 1:4 !function=times_2 +%zd_ax4 2:3 !function=times_4 + +&z2z_en zdn zm esz n +@z2z_2x1 ....... . esz:2 .. zm:4 ....0. ..... .... . \ + &z2z_en n=2 zdn=%zd_ax2 +@z2z_4x1 ....... . esz:2 .. zm:4 ....1. ..... ...0 . \ + &z2z_en n=4 zdn=%zd_ax4 + +SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_2x1 +SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_4x1 +UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_2x1 +UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_4x1 +SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_2x1 +SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_4x1 +UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_2x1 +UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_4x1 + +FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_2x1 +FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_4x1 +FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_2x1 +FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_4x1 +FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_2x1 +FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_4x1 +FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_2x1 +FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_4x1 + +SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_2x1 +SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_4x1 +URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_2x1 +URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_4x1 + +ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_2x1 +ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_4x1 + +SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_2x1 +SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_4x1 + +### SME2 Multi-vector Multiple Vectors SVE Destructive + +%zm_ax2 17:4 !function=times_2 +%zm_ax4 18:3 !function=times_4 + +@z2z_2x2 ....... . esz:2 . ....0 ....0. ..... .... . \ + &z2z_en n=2 zdn=%zd_ax2 zm=%zm_ax2 +@z2z_4x4 ....... . esz:2 . ...00 ....1. ..... ...0 . \ + &z2z_en n=4 zdn=%zd_ax4 zm=%zm_ax4 + +SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_2x2 +SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_4x4 +UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_2x2 +UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_4x4 +SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_2x2 +SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_4x4 +UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_2x2 +UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_4x4 + +FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_2x2 +FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_4x4 +FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_2x2 +FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_4x4 +FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_2x2 +FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_4x4 +FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_2x2 +FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_4x4 + +SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_2x2 +SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_4x4 +URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_2x2 +URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_4x4 + +SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_2x2 +SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_4x4 + +### SME2 Multi-vector Multiple and Single Array Vectors + +&azz_n n off rv zn zm +@azz_nx1_o3 ........ .... zm:4 ...... zn:5 .. off:3 &azz_n rv=%mova_rv + +ADD_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2 +ADD_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4 +ADD_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2 +ADD_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4 + +SUB_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2 +SUB_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4 +SUB_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2 +SUB_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4 + +%off3_x2 0:3 !function=times_2 +%off2_x2 0:2 !function=times_2 + +@azz_nx1_o3x2 ........ ... . zm:4 . .. ... zn:5 .. ... \ + &azz_n off=%off3_x2 rv=%mova_rv +@azz_nx1_o2x2 ........ ... . zm:4 . .. ... zn:5 ... .. \ + &azz_n off=%off2_x2 rv=%mova_rv + +FMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1 +FMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2 +FMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4 + +FMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1 +FMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2 +FMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4 + +BFMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1 +BFMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2 +BFMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4 + +BFMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1 +BFMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2 +BFMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4 + +FDOT_n1 11000001 001 0 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=2 +FDOT_n1 11000001 001 1 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=4 + +BFDOT_n1 11000001 001 0 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=2 +BFDOT_n1 11000001 001 1 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=4 + +USDOT_n1 11000001 001 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2 +USDOT_n1 11000001 001 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4 + +SUDOT_n1 11000001 001 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2 +SUDOT_n1 11000001 001 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4 + +SDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2 +SDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4 +SDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2 +SDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4 +SDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2 +SDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4 + +UDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2 +UDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4 +UDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2 +UDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4 +UDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2 +UDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4 + +SMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1 +SMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2 +SMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4 + +SMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1 +SMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2 +SMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4 + +UMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1 +UMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2 +UMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4 + +UMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1 +UMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2 +UMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4 + +%off2_x4 0:2 !function=times_4 +%off1_x4 0:1 !function=times_4 + +@azz_nx1_o2x4 ........ ... . zm:4 . .. ... zn:5 ... .. \ + &azz_n off=%off2_x4 rv=%mova_rv +@azz_nx1_o1x4 ........ ... . zm:4 . .. ... zn:5 .... . \ + &azz_n off=%off1_x4 rv=%mova_rv + +SMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1 +SMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1 +SMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2 +SMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2 +SMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4 +SMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4 + +SMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1 +SMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1 +SMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2 +SMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2 +SMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4 +SMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4 + +UMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1 +UMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1 +UMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2 +UMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2 +UMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4 +UMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4 + +UMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1 +UMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1 +UMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2 +UMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2 +UMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4 +UMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4 + +USMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 001 .. @azz_nx1_o2x4 n=1 +USMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=2 +USMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4 + +SUMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2 +SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4 + +BFMLA_n1 11000001 011 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_h 11000001 001 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_s 11000001 001 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_d 11000001 011 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2 + +BFMLA_n1 11000001 011 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_h 11000001 001 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_s 11000001 001 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_d 11000001 011 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4 + +BFMLS_n1 11000001 011 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_h 11000001 001 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_s 11000001 001 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_d 11000001 011 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2 + +BFMLS_n1 11000001 011 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_h 11000001 001 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_s 11000001 001 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_d 11000001 011 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4 + +### SME2 Multi-vector Multiple Array Vectors + +%zn_ax2 6:4 !function=times_2 +%zn_ax4 7:3 !function=times_4 + +@azz_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 +@azz_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 + +ADD_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3 +ADD_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3 +ADD_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3 +ADD_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3 + +SUB_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3 +SUB_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3 +SUB_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3 +SUB_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3 + +@azz_2x2_o2x2 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off2_x2 +@azz_4x4_o2x2 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off2_x2 + +FMLAL_nn 11000001 101 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2 +FMLAL_nn 11000001 101 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2 + +FMLSL_nn 11000001 101 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2 +FMLSL_nn 11000001 101 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2 + +BFMLAL_nn 11000001 101 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2 +BFMLAL_nn 11000001 101 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2 + +BFMLSL_nn 11000001 101 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2 +BFMLSL_nn 11000001 101 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2 + +FDOT_nn 11000001 101 ....0 0 .. 100 ....0 00 ... @azz_2x2_o3 +FDOT_nn 11000001 101 ...01 0 .. 100 ...00 00 ... @azz_4x4_o3 + +BFDOT_nn 11000001 101 ....0 0 .. 100 ....0 10 ... @azz_2x2_o3 +BFDOT_nn 11000001 101 ...01 0 .. 100 ...00 10 ... @azz_4x4_o3 + +USDOT_nn 11000001 101 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3 +USDOT_nn 11000001 101 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3 + +SDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3 +SDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3 +SDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3 +SDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3 +SDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3 +SDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3 + +UDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3 +UDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3 +UDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3 +UDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3 +UDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 11 ... @azz_2x2_o3 +UDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 11 ... @azz_4x4_o3 + +SMLAL_nn 11000001 111 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2 +SMLAL_nn 11000001 111 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2 + +SMLSL_nn 11000001 111 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2 +SMLSL_nn 11000001 111 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2 + +UMLAL_nn 11000001 111 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2 +UMLAL_nn 11000001 111 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2 + +UMLSL_nn 11000001 111 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2 +UMLSL_nn 11000001 111 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2 + +@azz_2x2_o1x4 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off1_x4 +@azz_4x4_o1x4 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off1_x4 + +SMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4 +SMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4 +SMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4 +SMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4 + +SMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4 +SMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4 +SMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4 +SMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4 + +UMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4 +UMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4 +UMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4 +UMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4 + +UMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4 +UMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4 +UMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4 +UMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4 + +USMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0010 . @azz_2x2_o1x4 +USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0010 . @azz_4x4_o1x4 + +BFMLA_nn 11000001 111 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3 +FMLA_nn_h 11000001 101 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3 +FMLA_nn_s 11000001 101 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3 +FMLA_nn_d 11000001 111 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3 + +BFMLA_nn 11000001 111 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3 +FMLA_nn_h 11000001 101 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3 +FMLA_nn_s 11000001 101 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3 +FMLA_nn_d 11000001 111 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3 + +BFMLS_nn 11000001 111 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3 +FMLS_nn_h 11000001 101 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3 +FMLS_nn_s 11000001 101 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3 +FMLS_nn_d 11000001 111 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3 + +BFMLS_nn 11000001 111 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3 +FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3 +FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3 +FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3 + +&az_n n off rv zm +@az_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &az_n n=2 rv=%mova_rv zm=%zn_ax2 +@az_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &az_n n=4 rv=%mova_rv zm=%zn_ax4 + +FADD_nn_h 11000001 101 00100 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_s 11000001 101 00000 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_d 11000001 111 00000 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_h 11000001 101 00101 0 .. 111 ...00 00 ... @az_4x4_o3 +FADD_nn_s 11000001 101 00001 0 .. 111 ...00 00 ... @az_4x4_o3 +FADD_nn_d 11000001 111 00001 0 .. 111 ...00 00 ... @az_4x4_o3 + +FSUB_nn_h 11000001 101 00100 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_s 11000001 101 00000 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_d 11000001 111 00000 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_h 11000001 101 00101 0 .. 111 ...00 01 ... @az_4x4_o3 +FSUB_nn_s 11000001 101 00001 0 .. 111 ...00 01 ... @az_4x4_o3 +FSUB_nn_d 11000001 111 00001 0 .. 111 ...00 01 ... @az_4x4_o3 + +BFADD_nn 11000001 111 00100 0 .. 111 ....0 00 ... @az_2x2_o3 +BFADD_nn 11000001 111 00101 0 .. 111 ...00 00 ... @az_4x4_o3 +BFSUB_nn 11000001 111 00100 0 .. 111 ....0 01 ... @az_2x2_o3 +BFSUB_nn 11000001 111 00101 0 .. 111 ...00 01 ... @az_4x4_o3 + +### SME2 Multi-vector Indexed + +&azx_n n off rv zn zm idx + +%idx3_15_10 15:1 10:2 +%idx2_10_2 10:2 2:1 + +@azx_1x1_o3x2 ........ .... zm:4 . .. . .. zn:5 .. ... \ + &azx_n n=1 rv=%mova_rv off=%off3_x2 idx=%idx3_15_10 +@azx_2x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \ + &azx_n n=2 rv=%mova_rv off=%off2_x2 zn=%zn_ax2 idx=%idx2_10_2 +@azx_4x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \ + &azx_n n=4 rv=%mova_rv off=%off2_x2 zn=%zn_ax4 idx=%idx2_10_2 + +FMLAL_nx 11000001 1000 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2 +FMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2 +FMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2 + +FMLSL_nx 11000001 1000 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2 +FMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2 +FMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2 + +BFMLAL_nx 11000001 1000 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2 +BFMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2 +BFMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2 + +BFMLSL_nx 11000001 1000 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2 +BFMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2 +BFMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2 + +@azx_2x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 +@azx_4x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 +@azx_2x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 +@azx_4x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 + +FDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_i2_o3 +FDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_i2_o3 + +BFDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_i2_o3 +BFDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_i2_o3 + +FVDOT 11000001 0101 .... 0 .. 0 .. ....0 01 ... @azx_2x1_i2_o3 +BFVDOT 11000001 0101 .... 0 .. 0 .. ....0 11 ... @azx_2x1_i2_o3 + +SDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_i2_o3 +SDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_i2_o3 +SDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 00 ... @azx_2x1_i2_o3 +SDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 00 ... @azx_4x1_i2_o3 +SDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 01 ... @azx_2x1_i1_o3 +SDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 01 ... @azx_4x1_i1_o3 + +UDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_i2_o3 +UDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_i2_o3 +UDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 10 ... @azx_2x1_i2_o3 +UDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 10 ... @azx_4x1_i2_o3 +UDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 11 ... @azx_2x1_i1_o3 +UDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 11 ... @azx_4x1_i1_o3 + +USDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 01 ... @azx_2x1_i2_o3 +USDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 01 ... @azx_4x1_i2_o3 + +SUDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 11 ... @azx_2x1_i2_o3 +SUDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 11 ... @azx_4x1_i2_o3 + +SVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 00 ... @azx_2x1_i2_o3 +SVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 00 ... @azx_4x1_i2_o3 +SVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 01 ... @azx_4x1_i1_o3 + +UVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 10 ... @azx_2x1_i2_o3 +UVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 10 ... @azx_4x1_i2_o3 +UVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 11 ... @azx_4x1_i1_o3 + +SUVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 11 ... @azx_4x1_i2_o3 +USVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 01 ... @azx_4x1_i2_o3 + +SMLAL_nx 11000001 1100 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2 +SMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2 +SMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2 + +SMLSL_nx 11000001 1100 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2 +SMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2 +SMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2 + +UMLAL_nx 11000001 1100 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2 +UMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2 +UMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2 + +UMLSL_nx 11000001 1100 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2 +UMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2 +UMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2 + +%idx4_15_10 15:1 10:3 +%idx4_10_1 10:2 1:2 +%idx3_10_1 10:1 1:2 + +@azx_1x1_i4_o2 ........ .... zm:4 . .. ... zn:5 ... .. \ + &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx4_15_10 +@azx_1x1_i3_o2 ........ .... zm:4 . .. ... zn:5 ... .. \ + &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx3_15_10 +@azx_2x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx4_10_1 +@azx_2x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx3_10_1 +@azx_4x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx4_10_1 +@azx_4x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx3_10_1 + +SMLALL_nx_s 11000001 0000 .... . .. ... ..... 000 .. @azx_1x1_i4_o2 +SMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 000 .. @azx_1x1_i3_o2 +SMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 00 ... @azx_2x1_i4_o1 +SMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 00 ... @azx_2x1_i3_o1 +SMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 00 ... @azx_4x1_i4_o1 +SMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 00 ... @azx_4x1_i3_o1 + +SMLSLL_nx_s 11000001 0000 .... . .. ... ..... 010 .. @azx_1x1_i4_o2 +SMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 010 .. @azx_1x1_i3_o2 +SMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 01 ... @azx_2x1_i4_o1 +SMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 01 ... @azx_2x1_i3_o1 +SMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 01 ... @azx_4x1_i4_o1 +SMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 01 ... @azx_4x1_i3_o1 + +UMLALL_nx_s 11000001 0000 .... . .. ... ..... 100 .. @azx_1x1_i4_o2 +UMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 100 .. @azx_1x1_i3_o2 +UMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 10 ... @azx_2x1_i4_o1 +UMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 10 ... @azx_2x1_i3_o1 +UMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 10 ... @azx_4x1_i4_o1 +UMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 10 ... @azx_4x1_i3_o1 + +UMLSLL_nx_s 11000001 0000 .... . .. ... ..... 110 .. @azx_1x1_i4_o2 +UMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 110 .. @azx_1x1_i3_o2 +UMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 11 ... @azx_2x1_i4_o1 +UMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 11 ... @azx_2x1_i3_o1 +UMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 11 ... @azx_4x1_i4_o1 +UMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 11 ... @azx_4x1_i3_o1 + +USMLALL_nx_s 11000001 0000 .... . .. ... ..... 001 .. @azx_1x1_i4_o2 +USMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 00 ... @azx_2x1_i4_o1 +USMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 00 ... @azx_4x1_i4_o1 + +SUMLALL_nx_s 11000001 0000 .... . .. ... ..... 101 .. @azx_1x1_i4_o2 +SUMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 10 ... @azx_2x1_i4_o1 +SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1 + +%idx3_10_3 10:2 3:1 +@azx_2x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 idx=%idx3_10_3 +@azx_4x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 idx=%idx3_10_3 + +BFMLA_nx 11000001 0001 .... 0 .. 1.. ....1 0 .... @azx_2x1_i3_o3 +FMLA_nx_h 11000001 0001 .... 0 .. 1.. ....0 0 .... @azx_2x1_i3_o3 +FMLA_nx_s 11000001 0101 .... 0 .. 0.. ....0 00 ... @azx_2x1_i2_o3 +FMLA_nx_d 11000001 1101 .... 0 .. 00. ....0 00 ... @azx_2x1_i1_o3 + +BFMLA_nx 11000001 0001 .... 1 .. 1.. ...01 0 .... @azx_4x1_i3_o3 +FMLA_nx_h 11000001 0001 .... 1 .. 1.. ...00 0 .... @azx_4x1_i3_o3 +FMLA_nx_s 11000001 0101 .... 1 .. 0.. ...00 00 ... @azx_4x1_i2_o3 +FMLA_nx_d 11000001 1101 .... 1 .. 00. ...00 00 ... @azx_4x1_i1_o3 + +BFMLS_nx 11000001 0001 .... 0 .. 1.. ....1 1 .... @azx_2x1_i3_o3 +FMLS_nx_h 11000001 0001 .... 0 .. 1.. ....0 1 .... @azx_2x1_i3_o3 +FMLS_nx_s 11000001 0101 .... 0 .. 0.. ....0 10 ... @azx_2x1_i2_o3 +FMLS_nx_d 11000001 1101 .... 0 .. 00. ....0 10 ... @azx_2x1_i1_o3 + +BFMLS_nx 11000001 0001 .... 1 .. 1.. ...01 1 .... @azx_4x1_i3_o3 +FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3 +FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3 +FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3 + +### SME2 Add / Sub array accumulators + +ADD_aaz_s 11000001 101 000000 .. 111 ....0 10 ... @az_2x2_o3 +ADD_aaz_s 11000001 101 000010 .. 111 ...00 10 ... @az_4x4_o3 +ADD_aaz_d 11000001 111 000000 .. 111 ....0 10 ... @az_2x2_o3 +ADD_aaz_d 11000001 111 000010 .. 111 ...00 10 ... @az_4x4_o3 + +SUB_aaz_s 11000001 101 000000 .. 111 ....0 11 ... @az_2x2_o3 +SUB_aaz_s 11000001 101 000010 .. 111 ...00 11 ... @az_4x4_o3 +SUB_aaz_d 11000001 111 000000 .. 111 ....0 11 ... @az_2x2_o3 +SUB_aaz_d 11000001 111 000010 .. 111 ...00 11 ... @az_4x4_o3 + +### SME2 Multi-vector SVE Constructive Unary + +&zz_e zd zn esz +&zz_n zd zn n +@zz_1x2 ........ ... ..... ...... ..... zd:5 \ + &zz_n n=1 zn=%zn_ax2 +@zz_1x4 ........ ... ..... ...... ..... zd:5 \ + &zz_n n=1 zn=%zn_ax4 +@zz_2x1 ........ ... ..... ...... zn:5 ..... \ + &zz_n n=1 zd=%zd_ax2 +@zz_2x2 ........ ... ..... ...... .... . ..... \ + &zz_n n=2 zd=%zd_ax2 zn=%zn_ax2 +@zz_4x4 ........ ... ..... ...... .... . ..... \ + &zz_n n=4 zd=%zd_ax4 zn=%zn_ax4 +@zz_4x2_n1 ........ ... ..... ...... .... . ..... \ + &zz_n n=1 zd=%zd_ax4 zn=%zn_ax2 + +BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2 +BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2 + +FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2 +FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2 + +FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1 +FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1 + +FCVTZS 11000001 001 00001 111000 ....0 ....0 @zz_2x2 +FCVTZS 11000001 001 10001 111000 ...00 ...00 @zz_4x4 +FCVTZU 11000001 001 00001 111000 ....1 ....0 @zz_2x2 +FCVTZU 11000001 001 10001 111000 ...01 ...00 @zz_4x4 + +SCVTF 11000001 001 00010 111000 ....0 ....0 @zz_2x2 +SCVTF 11000001 001 10010 111000 ...00 ...00 @zz_4x4 +UCVTF 11000001 001 00010 111000 ....1 ....0 @zz_2x2 +UCVTF 11000001 001 10010 111000 ...01 ...00 @zz_4x4 + +FRINTN 11000001 101 01000 111000 ....0 ....0 @zz_2x2 +FRINTN 11000001 101 11000 111000 ...00 ...00 @zz_4x4 +FRINTP 11000001 101 01001 111000 ....0 ....0 @zz_2x2 +FRINTP 11000001 101 11001 111000 ...00 ...00 @zz_4x4 +FRINTM 11000001 101 01010 111000 ....0 ....0 @zz_2x2 +FRINTM 11000001 101 11010 111000 ...00 ...00 @zz_4x4 +FRINTA 11000001 101 01100 111000 ....0 ....0 @zz_2x2 +FRINTA 11000001 101 11100 111000 ...00 ...00 @zz_4x4 + +SQCVT_sh 11000001 001 00011 111000 ....0 ..... @zz_1x2 +UQCVT_sh 11000001 001 00011 111000 ....1 ..... @zz_1x2 +SQCVTU_sh 11000001 011 00011 111000 ....0 ..... @zz_1x2 + +SQCVT_sb 11000001 001 10011 111000 ...00 ..... @zz_1x4 +UQCVT_sb 11000001 001 10011 111000 ...01 ..... @zz_1x4 +SQCVTU_sb 11000001 011 10011 111000 ...00 ..... @zz_1x4 + +SQCVT_dh 11000001 101 10011 111000 ...00 ..... @zz_1x4 +UQCVT_dh 11000001 101 10011 111000 ...01 ..... @zz_1x4 +SQCVTU_dh 11000001 111 10011 111000 ...00 ..... @zz_1x4 + +SQCVTN_sb 11000001 001 10011 111000 ...10 ..... @zz_1x4 +UQCVTN_sb 11000001 001 10011 111000 ...11 ..... @zz_1x4 +SQCVTUN_sb 11000001 011 10011 111000 ...10 ..... @zz_1x4 + +SQCVTN_dh 11000001 101 10011 111000 ...10 ..... @zz_1x4 +UQCVTN_dh 11000001 101 10011 111000 ...11 ..... @zz_1x4 +SQCVTUN_dh 11000001 111 10011 111000 ...10 ..... @zz_1x4 + +SUNPK_2bh 11000001 011 00101 111000 ..... ....0 @zz_2x1 +SUNPK_2hs 11000001 101 00101 111000 ..... ....0 @zz_2x1 +SUNPK_2sd 11000001 111 00101 111000 ..... ....0 @zz_2x1 + +UUNPK_2bh 11000001 011 00101 111000 ..... ....1 @zz_2x1 +UUNPK_2hs 11000001 101 00101 111000 ..... ....1 @zz_2x1 +UUNPK_2sd 11000001 111 00101 111000 ..... ....1 @zz_2x1 + +SUNPK_4bh 11000001 011 10101 111000 ....0 ...00 @zz_4x2_n1 +SUNPK_4hs 11000001 101 10101 111000 ....0 ...00 @zz_4x2_n1 +SUNPK_4sd 11000001 111 10101 111000 ....0 ...00 @zz_4x2_n1 + +UUNPK_4bh 11000001 011 10101 111000 ....0 ...01 @zz_4x2_n1 +UUNPK_4hs 11000001 101 10101 111000 ....0 ...01 @zz_4x2_n1 +UUNPK_4sd 11000001 111 10101 111000 ....0 ...01 @zz_4x2_n1 + +ZIP_4 11000001 esz:2 1 10110 111000 ...00 ... 00 \ + &zz_e zd=%zd_ax4 zn=%zn_ax4 +ZIP_4 11000001 001 10111 111000 ...00 ... 00 \ + &zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4 + +UZP_4 11000001 esz:2 1 10110 111000 ...00 ... 10 \ + &zz_e zd=%zd_ax4 zn=%zn_ax4 +UZP_4 11000001 001 10111 111000 ...00 ... 10 \ + &zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4 + +### SME2 Multi-vector SVE Constructive Binary + +&rshr zd zn shift + +%rshr_sh_shift 16:4 !function=rsub_16 +%rshr_sb_shift 16:5 !function=rsub_32 +%rshr_dh_shift 22:1 16:5 !function=rsub_64 + +@rshr_sh ........ .... .... ...... ..... zd:5 \ + &rshr zn=%zn_ax2 shift=%rshr_sh_shift +@rshr_sb ........ ... ..... ...... ..... zd:5 \ + &rshr zn=%zn_ax4 shift=%rshr_sb_shift +@rshr_dh ........ ... ..... ...... ..... zd:5 \ + &rshr zn=%zn_ax4 shift=%rshr_dh_shift + +SQRSHR_sh 11000001 1110 .... 110101 ....0 ..... @rshr_sh +UQRSHR_sh 11000001 1110 .... 110101 ....1 ..... @rshr_sh +SQRSHRU_sh 11000001 1111 .... 110101 ....0 ..... @rshr_sh + +SQRSHR_sb 11000001 011 ..... 110110 ...00 ..... @rshr_sb +SQRSHR_dh 11000001 1.1 ..... 110110 ...00 ..... @rshr_dh +UQRSHR_sb 11000001 011 ..... 110110 ...01 ..... @rshr_sb +UQRSHR_dh 11000001 1.1 ..... 110110 ...01 ..... @rshr_dh +SQRSHRU_sb 11000001 011 ..... 110110 ...10 ..... @rshr_sb +SQRSHRU_dh 11000001 1.1 ..... 110110 ...10 ..... @rshr_dh + +SQRSHRN_sh 01000101 1011 .... 001010 ....0 ..... @rshr_sh +UQRSHRN_sh 01000101 1011 .... 001110 ....0 ..... @rshr_sh +SQRSHRUN_sh 01000101 1011 .... 000010 ....0 ..... @rshr_sh + +SQRSHRN_sb 11000001 011 ..... 110111 ...00 ..... @rshr_sb +SQRSHRN_dh 11000001 1.1 ..... 110111 ...00 ..... @rshr_dh +UQRSHRN_sb 11000001 011 ..... 110111 ...01 ..... @rshr_sb +UQRSHRN_dh 11000001 1.1 ..... 110111 ...01 ..... @rshr_dh +SQRSHRUN_sb 11000001 011 ..... 110111 ...10 ..... @rshr_sb +SQRSHRUN_dh 11000001 1.1 ..... 110111 ...10 ..... @rshr_dh + +&zzz_e zd zn zm esz + +ZIP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 0 \ + &zzz_e zd=%zd_ax2 +ZIP_2 11000001 00 1 zm:5 110101 zn:5 .... 0 \ + &zzz_e zd=%zd_ax2 esz=4 + +UZP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 1 \ + &zzz_e zd=%zd_ax2 +UZP_2 11000001 00 1 zm:5 110101 zn:5 .... 1 \ + &zzz_e zd=%zd_ax2 esz=4 + +&zzz_en zd zn zm esz n + +FCLAMP 11000001 esz:2 1 zm:5 110000 zn:5 .... 0 \ + &zzz_en zd=%zd_ax2 n=2 +FCLAMP 11000001 esz:2 1 zm:5 110010 zn:5 ...0 0 \ + &zzz_en zd=%zd_ax4 n=4 + +SCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 0 \ + &zzz_en zd=%zd_ax2 n=2 +SCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 0 \ + &zzz_en zd=%zd_ax4 n=4 + +UCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 1 \ + &zzz_en zd=%zd_ax2 n=2 +UCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 1 \ + &zzz_en zd=%zd_ax4 n=4 + +### SME2 Multi-vector SVE Select + +%sel_pg 10:3 !function=plus_8 + +SEL 11000001 esz:2 1 ....0 100 ... ....0 ....0 \ + n=2 zd=%zd_ax2 zn=%zn_ax2 zm=%zm_ax2 pg=%sel_pg +SEL 11000001 esz:2 1 ...01 100 ... ...00 ...00 \ + n=4 zd=%zd_ax4 zn=%zn_ax4 zm=%zm_ax4 pg=%sel_pg + +### SME Multiple Zero + +&zero_za rv off ngrp nvec + +ZERO_za 11000000 000011 000 .. 0000000000 off:3 \ + &zero_za ngrp=2 nvec=1 rv=%mova_rv +ZERO_za 11000000 000011 100 .. 0000000000 off:3 \ + &zero_za ngrp=4 nvec=1 rv=%mova_rv + +ZERO_za 11000000 000011 001 .. 0000000000 ... \ + &zero_za ngrp=1 nvec=2 rv=%mova_rv off=%off3_x2 +ZERO_za 11000000 000011 010 .. 0000000000 0.. \ + &zero_za ngrp=2 nvec=2 rv=%mova_rv off=%off2_x2 +ZERO_za 11000000 000011 011 .. 0000000000 0.. \ + &zero_za ngrp=4 nvec=2 rv=%mova_rv off=%off2_x2 + +ZERO_za 11000000 000011 101 .. 0000000000 0.. \ + &zero_za ngrp=1 nvec=4 rv=%mova_rv off=%off2_x4 +ZERO_za 11000000 000011 110 .. 0000000000 00. \ + &zero_za ngrp=2 nvec=4 rv=%mova_rv off=%off1_x4 +ZERO_za 11000000 000011 111 .. 0000000000 00. \ + &zero_za ngrp=4 nvec=4 rv=%mova_rv off=%off1_x4 + +### SME Lookup Table Read + +&lut zd zn idx + +# LUTI2, consecutive +LUTI2_c_1b 1100 0000 1100 11 idx:4 00 00 zn:5 zd:5 &lut +LUTI2_c_1h 1100 0000 1100 11 idx:4 01 00 zn:5 zd:5 &lut +LUTI2_c_1s 1100 0000 1100 11 idx:4 10 00 zn:5 zd:5 &lut + +LUTI2_c_2b 1100 0000 1000 11 idx:3 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI2_c_2h 1100 0000 1000 11 idx:3 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI2_c_2s 1100 0000 1000 11 idx:3 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2 + +LUTI2_c_4b 1100 0000 1000 11 idx:2 10 00 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI2_c_4h 1100 0000 1000 11 idx:2 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI2_c_4s 1100 0000 1000 11 idx:2 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4 + +# LUTI2, strided (must check zd alignment) +LUTI2_s_2b 1100 0000 1001 11 idx:3 1 00 00 zn:5 zd:5 &lut +LUTI2_s_2h 1100 0000 1001 11 idx:3 1 01 00 zn:5 zd:5 &lut + +LUTI2_s_4b 1100 0000 1001 11 idx:2 10 00 00 zn:5 zd:5 &lut +LUTI2_s_4h 1100 0000 1001 11 idx:2 10 01 00 zn:5 zd:5 &lut + +# LUTI4, consecutive +LUTI4_c_1b 1100 0000 1100 101 idx:3 00 00 zn:5 zd:5 &lut +LUTI4_c_1h 1100 0000 1100 101 idx:3 01 00 zn:5 zd:5 &lut +LUTI4_c_1s 1100 0000 1100 101 idx:3 10 00 zn:5 zd:5 &lut + +LUTI4_c_2b 1100 0000 1000 101 idx:2 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI4_c_2h 1100 0000 1000 101 idx:2 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI4_c_2s 1100 0000 1000 101 idx:2 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2 + +LUTI4_c_4h 1100 0000 1000 101 idx:1 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI4_c_4s 1100 0000 1000 101 idx:1 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4 + +# LUTI4, strided (must check zd alignment) +LUTI4_s_2b 1100 0000 1001 101 idx:2 1 00 00 zn:5 zd:5 &lut +LUTI4_s_2h 1100 0000 1001 101 idx:2 1 01 00 zn:5 zd:5 &lut + +LUTI4_s_4h 1100 0000 1001 101 idx:1 10 01 00 zn:5 zd:5 &lut diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index dcc48e4..075360d 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -22,13 +22,20 @@ #include "internals.h" #include "tcg/tcg-gvec-desc.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" #include "qemu/int128.h" #include "fpu/softfloat.h" #include "vec_internal.h" #include "sve_ldst_internal.h" + +static bool vectors_overlap(ARMVectorReg *x, unsigned nx, + ARMVectorReg *y, unsigned ny) +{ + return !(x + nx <= y || y + ny <= x); +} + void helper_set_svcr(CPUARMState *env, uint32_t val, uint32_t mask) { aarch64_set_svcr(env, val, mask); @@ -39,12 +46,12 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) uint32_t i; /* - * Special case clearing the entire ZA space. + * Special case clearing the entire ZArray. * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any * parts of the ZA storage outside of SVL. */ if (imm == 0xff) { - memset(env->zarray, 0, sizeof(env->zarray)); + memset(env->za_state.za, 0, sizeof(env->za_state.za)); return; } @@ -54,7 +61,7 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) */ for (i = 0; i < svl; i++) { if (imm & (1 << (i % 8))) { - memset(&env->zarray[i], 0, svl); + memset(&env->za_state.za[i], 0, svl); } } } @@ -206,6 +213,110 @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc) #undef DO_MOVA_Z +void HELPER(sme2_mova_zc_b)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint8_t *src = vsrc; + uint8_t *dst = vdst; + size_t i, n = simd_oprsz(desc); + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_h)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint16_t *src = vsrc; + uint16_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 2; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_s)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint32_t *src = vsrc; + uint32_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 4; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_d)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint64_t *src = vsrc; + uint64_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 8; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2p1_movaz_zc_b)(void *vdst, void *vsrc, uint32_t desc) +{ + uint8_t *src = vsrc; + uint8_t *dst = vdst; + size_t i, n = simd_oprsz(desc); + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_h)(void *vdst, void *vsrc, uint32_t desc) +{ + uint16_t *src = vsrc; + uint16_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 2; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_s)(void *vdst, void *vsrc, uint32_t desc) +{ + uint32_t *src = vsrc; + uint32_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 4; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_d)(void *vdst, void *vsrc, uint32_t desc) +{ + uint64_t *src = vsrc; + uint64_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 8; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_q)(void *vdst, void *vsrc, uint32_t desc) +{ + Int128 *src = vsrc; + Int128 *dst = vdst; + size_t i, n = simd_oprsz(desc) / 16; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + memset(&src[tile_vslice_index(i)], 0, 16); + } +} + /* * Clear elements in a tile slice comprising len bytes. */ @@ -314,6 +425,26 @@ static void copy_vertical_q(void *vdst, const void *vsrc, size_t len) } } +void HELPER(sme2_mova_cz_b)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_b(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_h)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_h(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_s)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_s(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_d)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_d(vdst, vsrc, simd_oprsz(desc)); +} + /* * Host and TLB primitives for vertical tile slice addressing. */ @@ -344,54 +475,22 @@ static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ TLB(env, useronly_clean_ptr(addr), val, ra); \ } -/* - * The ARMVectorReg elements are stored in host-endian 64-bit units. - * For 128-bit quantities, the sequence defined by the Elem[] pseudocode - * corresponds to storing the two 64-bit pieces in little-endian order. - */ -#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \ -static inline void HNAME##_host(void *za, intptr_t off, void *host) \ -{ \ - uint64_t val0 = HOST(host), val1 = HOST(host + 8); \ - uint64_t *ptr = za + off; \ - ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ -} \ +#define DO_LDQ(HNAME, VNAME) \ static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ { \ HNAME##_host(za, tile_vslice_offset(off), host); \ } \ -static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \ - uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \ - uint64_t *ptr = za + off; \ - ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ -} \ static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ target_ulong addr, uintptr_t ra) \ { \ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ } -#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \ -static inline void HNAME##_host(void *za, intptr_t off, void *host) \ -{ \ - uint64_t *ptr = za + off; \ - HOST(host, ptr[BE]); \ - HOST(host + 8, ptr[!BE]); \ -} \ +#define DO_STQ(HNAME, VNAME) \ static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ { \ HNAME##_host(za, tile_vslice_offset(off), host); \ } \ -static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - uint64_t *ptr = za + off; \ - TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \ - TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \ -} \ static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ target_ulong addr, uintptr_t ra) \ { \ @@ -406,8 +505,8 @@ DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra) DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra) DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra) -DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra) -DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra) +DO_LDQ(sve_ld1qq_be, sme_ld1q_be) +DO_LDQ(sve_ld1qq_le, sme_ld1q_le) DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra) DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra) @@ -417,8 +516,8 @@ DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra) DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra) DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra) -DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra) -DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra) +DO_STQ(sve_st1qq_be, sme_st1q_be) +DO_STQ(sve_st1qq_le, sme_st1q_le) #undef DO_LD #undef DO_ST @@ -567,19 +666,16 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, - target_ulong addr, uint32_t desc, uintptr_t ra, + target_ulong addr, uint64_t desc, uintptr_t ra, const int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn, ClearFn *clr_fn, CopyFn *cpy_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -592,28 +688,28 @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, #define DO_LD(L, END, ESZ) \ void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ clear_horizontal, copy_horizontal); \ } \ void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ clear_vertical_##L, copy_vertical_##L); \ } \ void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ clear_horizontal, copy_horizontal); \ } \ void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ @@ -755,16 +851,13 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, - uint32_t desc, uintptr_t ra, int esz, bool vertical, + uint64_t desc, uintptr_t ra, int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -777,25 +870,25 @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, #define DO_ST(L, END, ESZ) \ void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ } \ void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ } \ void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ } \ void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ @@ -903,28 +996,69 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, } } -void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, float_status *fpst_in, uint32_t desc) +static void do_fmopa_h(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint16_t negx, int negf) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) << 31; - uint16_t *pn = vpn, *pm = vpm; - float_status fpst; - /* - * Make a copy of float_status because this operation does not - * update the cumulative fp exception status. It also produces - * default nans. - */ - fpst = *fpst_in; - set_default_nan_mode(true, &fpst); + for (row = 0; row < oprsz; ) { + uint16_t pa = pn[H2(row >> 4)]; + do { + if (pa & 1) { + void *vza_row = vza + tile_vslice_offset(row); + uint16_t n = *(uint32_t *)(vzn + H1_2(row)) ^ negx; + + for (col = 0; col < oprsz; ) { + uint16_t pb = pm[H2(col >> 4)]; + do { + if (pb & 1) { + uint16_t *a = vza_row + H1_2(col); + uint16_t *m = vzm + H1_2(col); + *a = float16_muladd(n, *m, *a, negf, fpst); + } + col += 2; + pb >>= 2; + } while (col & 15); + } + } + row += 2; + pa >>= 2; + } while (row & 15); + } +} + +void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_fmops_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 15, 0); +} + +void HELPER(sme_ah_fmops_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_fmopa_s(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint32_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); for (row = 0; row < oprsz; ) { uint16_t pa = pn[H2(row >> 4)]; do { if (pa & 1) { void *vza_row = vza + tile_vslice_offset(row); - uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg; + uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ negx; for (col = 0; col < oprsz; ) { uint16_t pb = pm[H2(col >> 4)]; @@ -932,7 +1066,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, if (pb & 1) { uint32_t *a = vza_row + H1_4(col); uint32_t *m = vzm + H1_4(col); - *a = float32_muladd(n, *m, *a, 0, &fpst); + *a = float32_muladd(n, *m, *a, negf, fpst); } col += 4; pb >>= 4; @@ -945,32 +1079,116 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, } } -void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, float_status *fpst_in, uint32_t desc) +void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) { - intptr_t row, col, oprsz = simd_oprsz(desc) / 8; - uint64_t neg = (uint64_t)simd_data(desc) << 63; - uint64_t *za = vza, *zn = vzn, *zm = vzm; - uint8_t *pn = vpn, *pm = vpm; - float_status fpst = *fpst_in; + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} - set_default_nan_mode(true, &fpst); +void HELPER(sme_fmops_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 31, 0); +} + +void HELPER(sme_ah_fmops_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_fmopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm, uint8_t *pn, + uint8_t *pm, float_status *fpst, uint32_t desc, + uint64_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; for (row = 0; row < oprsz; ++row) { if (pn[H1(row)] & 1) { uint64_t *za_row = &za[tile_vslice_index(row)]; - uint64_t n = zn[row] ^ neg; + uint64_t n = zn[row] ^ negx; for (col = 0; col < oprsz; ++col) { if (pm[H1(col)] & 1) { uint64_t *a = &za_row[col]; - *a = float64_muladd(n, zm[col], *a, 0, &fpst); + *a = float64_muladd(n, zm[col], *a, negf, fpst); } } } } } +void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_fmops_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 1ull << 63, 0); +} + +void HELPER(sme_ah_fmops_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_bfmopa(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint16_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); + + for (row = 0; row < oprsz; ) { + uint16_t pa = pn[H2(row >> 4)]; + do { + if (pa & 1) { + void *vza_row = vza + tile_vslice_offset(row); + uint16_t n = *(uint32_t *)(vzn + H1_2(row)) ^ negx; + + for (col = 0; col < oprsz; ) { + uint16_t pb = pm[H2(col >> 4)]; + do { + if (pb & 1) { + uint16_t *a = vza_row + H1_2(col); + uint16_t *m = vzm + H1_2(col); + *a = bfloat16_muladd(n, *m, *a, negf, fpst); + } + col += 2; + pb >>= 2; + } while (col & 15); + } + } + row += 2; + pa >>= 2; + } while (row & 15); + } +} + +void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_bfmops)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 15, 0); +} + +void HELPER(sme_ah_bfmops)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + /* * Alter PAIR as needed for controlling predicates being false, * and for NEG on an enabled row element. @@ -991,6 +1209,20 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) return pair; } +static inline uint32_t f16mop_ah_neg_adj_pair(uint32_t pair, uint32_t pg) +{ + uint32_t l = pg & 1 ? float16_ah_chs(pair) : 0; + uint32_t h = pg & 4 ? float16_ah_chs(pair >> 16) : 0; + return l | (h << 16); +} + +static inline uint32_t bf16mop_ah_neg_adj_pair(uint32_t pair, uint32_t pg) +{ + uint32_t l = pg & 1 ? bfloat16_ah_chs(pair) : 0; + uint32_t h = pg & 4 ? bfloat16_ah_chs(pair >> 16) : 0; + return l | (h << 16); +} + static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *s_f16, float_status *s_std, float_status *s_odd) @@ -1005,49 +1237,67 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, * - we have pre-set-up copy of s_std which is set to round-to-odd, * for the multiply (see below) */ - float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16); - float64 e1c = float16_to_float64(e1 >> 16, true, s_f16); - float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16); - float64 e2c = float16_to_float64(e2 >> 16, true, s_f16); - float64 t64; + float16 h1r = e1 & 0xffff; + float16 h1c = e1 >> 16; + float16 h2r = e2 & 0xffff; + float16 h2c = e2 >> 16; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, s_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); + /* C.f. FPProcessNaNs4 */ + if (float16_is_any_nan(h1r) || float16_is_any_nan(h1c) || + float16_is_any_nan(h2r) || float16_is_any_nan(h2c)) { + float16 t16; + + if (float16_is_signaling_nan(h1r, s_f16)) { + t16 = h1r; + } else if (float16_is_signaling_nan(h1c, s_f16)) { + t16 = h1c; + } else if (float16_is_signaling_nan(h2r, s_f16)) { + t16 = h2r; + } else if (float16_is_signaling_nan(h2c, s_f16)) { + t16 = h2c; + } else if (float16_is_any_nan(h1r)) { + t16 = h1r; + } else if (float16_is_any_nan(h1c)) { + t16 = h1c; + } else if (float16_is_any_nan(h2r)) { + t16 = h2r; + } else { + t16 = h2c; + } + t32 = float16_to_float32(t16, true, s_f16); + } else { + float64 e1r = float16_to_float64(h1r, true, s_f16); + float64 e1c = float16_to_float64(h1c, true, s_f16); + float64 e2r = float16_to_float64(h2r, true, s_f16); + float64 e2c = float16_to_float64(h2c, true, s_f16); + float64 t64; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, s_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, s_std); + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, s_std); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, s_std); } -void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, CPUARMState *env, uint32_t desc) +static void do_fmopa_w_h(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, CPUARMState *env, uint32_t desc, + uint32_t negx, bool ah_neg) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) * 0x80008000u; - uint16_t *pn = vpn, *pm = vpm; - float_status fpst_odd, fpst_std, fpst_f16; + float_status fpst_odd = env->vfp.fp_status[FPST_ZA]; - /* - * Make copies of the fp status fields we use, because this operation - * does not update the cumulative fp exception status. It also - * produces default NaNs. We also need a second copy of fp_status with - * round-to-odd -- see above. - */ - fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; - fpst_std = env->vfp.fp_status[FPST_A64]; - set_default_nan_mode(true, &fpst_std); - set_default_nan_mode(true, &fpst_f16); - fpst_odd = fpst_std; set_float_rounding_mode(float_round_to_odd, &fpst_odd); for (row = 0; row < oprsz; ) { @@ -1056,7 +1306,11 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = f16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1067,7 +1321,9 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, m = f16mop_adj_pair(m, pcol, 0); *a = f16_dotadd(*a, n, m, - &fpst_f16, &fpst_std, &fpst_odd); + &env->vfp.fp_status[FPST_ZA_F16], + &env->vfp.fp_status[FPST_ZA], + &fpst_odd); } col += 4; pcol >>= 4; @@ -1079,12 +1335,103 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, } } -void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, - void *vpn, void *vpm, CPUARMState *env, uint32_t desc) +void HELPER(sme_fmopa_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0, false); +} + +void HELPER(sme_fmops_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0x80008000u, false); +} + +void HELPER(sme_ah_fmops_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0, true); +} + +void HELPER(sme2_fdot_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, oprsz = simd_maxsz(desc); + bool za = extract32(desc, SIMD_DATA_SHIFT, 1); + float_status *fpst_std = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; + float_status *fpst_f16 = &env->vfp.fp_status[za ? FPST_ZA_F16 : FPST_A64_F16]; + float_status fpst_odd = *fpst_std; + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = vm; + + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < oprsz / sizeof(float32); ++i) { + d[H4(i)] = f16_dotadd(a[H4(i)], n[H4(i)], m[H4(i)], + fpst_f16, fpst_std, &fpst_odd); + } +} + +void HELPER(sme2_fdot_idx_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, oprsz = simd_maxsz(desc); + intptr_t elements = oprsz / sizeof(float32); + intptr_t eltspersegment = MIN(4, elements); + int idx = extract32(desc, SIMD_DATA_SHIFT, 2); + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status *fpst_std = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; + float_status *fpst_f16 = &env->vfp.fp_status[za ? FPST_ZA_F16 : FPST_A64_F16]; + float_status fpst_odd = *fpst_std; + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = (uint32_t *)vm + H4(idx); + + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < elements; i += eltspersegment) { + uint32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[H4(i + j)] = f16_dotadd(a[H4(i + j)], n[H4(i + j)], mm, + fpst_f16, fpst_std, &fpst_odd); + } + } +} + +void HELPER(sme2_fvdot_idx_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, oprsz = simd_maxsz(desc); + intptr_t elements = oprsz / sizeof(float32); + intptr_t eltspersegment = MIN(4, elements); + int idx = extract32(desc, SIMD_DATA_SHIFT, 2); + int sel = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status fpst_odd, *fpst_std, *fpst_f16; + float32 *d = vd, *a = va; + uint16_t *n0 = vn; + uint16_t *n1 = vn + sizeof(ARMVectorReg); + uint32_t *m = (uint32_t *)vm + H4(idx); + + fpst_std = &env->vfp.fp_status[FPST_ZA]; + fpst_f16 = &env->vfp.fp_status[FPST_ZA_F16]; + fpst_odd = *fpst_std; + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < elements; i += eltspersegment) { + uint32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = f16_dotadd(a[i + H4(j)], nn, mm, + fpst_f16, fpst_std, &fpst_odd); + } + } +} + +static void do_bfmopa_w(void *vza, void *vzn, void *vzm, + uint16_t *pn, uint16_t *pm, CPUARMState *env, + uint32_t desc, uint32_t negx, bool ah_neg) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) * 0x80008000u; - uint16_t *pn = vpn, *pm = vpm; float_status fpst, fpst_odd; if (is_ebf(env, &fpst, &fpst_odd)) { @@ -1094,7 +1441,11 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = bf16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1121,7 +1472,11 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = bf16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1144,6 +1499,24 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, } } +void HELPER(sme_bfmopa_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0, false); +} + +void HELPER(sme_bfmops_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0x80008000u, false); +} + +void HELPER(sme_ah_bfmops_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0, true); +} + typedef uint32_t IMOPFn32(uint32_t, uint32_t, uint32_t, uint8_t, bool); static inline void do_imopa_s(uint32_t *za, uint32_t *zn, uint32_t *zm, uint8_t *pn, uint8_t *pm, @@ -1188,7 +1561,7 @@ static inline void do_imopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm, } } -#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \ +#define DEF_IMOP_8x4_32(NAME, NTYPE, MTYPE) \ static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ { \ uint32_t sum = 0; \ @@ -1201,7 +1574,7 @@ static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ return neg ? a - sum : a + sum; \ } -#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \ +#define DEF_IMOP_16x4_64(NAME, NTYPE, MTYPE) \ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ { \ uint64_t sum = 0; \ @@ -1214,27 +1587,1070 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ return neg ? a - sum : a + sum; \ } -DEF_IMOP_32(smopa_s, int8_t, int8_t) -DEF_IMOP_32(umopa_s, uint8_t, uint8_t) -DEF_IMOP_32(sumopa_s, int8_t, uint8_t) -DEF_IMOP_32(usmopa_s, uint8_t, int8_t) +DEF_IMOP_8x4_32(smopa_s, int8_t, int8_t) +DEF_IMOP_8x4_32(umopa_s, uint8_t, uint8_t) +DEF_IMOP_8x4_32(sumopa_s, int8_t, uint8_t) +DEF_IMOP_8x4_32(usmopa_s, uint8_t, int8_t) -DEF_IMOP_64(smopa_d, int16_t, int16_t) -DEF_IMOP_64(umopa_d, uint16_t, uint16_t) -DEF_IMOP_64(sumopa_d, int16_t, uint16_t) -DEF_IMOP_64(usmopa_d, uint16_t, int16_t) +DEF_IMOP_16x4_64(smopa_d, int16_t, int16_t) +DEF_IMOP_16x4_64(umopa_d, uint16_t, uint16_t) +DEF_IMOP_16x4_64(sumopa_d, int16_t, uint16_t) +DEF_IMOP_16x4_64(usmopa_d, uint16_t, int16_t) -#define DEF_IMOPH(NAME, S) \ - void HELPER(sme_##NAME##_##S)(void *vza, void *vzn, void *vzm, \ +#define DEF_IMOPH(P, NAME, S) \ + void HELPER(P##_##NAME##_##S)(void *vza, void *vzn, void *vzm, \ void *vpn, void *vpm, uint32_t desc) \ { do_imopa_##S(vza, vzn, vzm, vpn, vpm, desc, NAME##_##S); } -DEF_IMOPH(smopa, s) -DEF_IMOPH(umopa, s) -DEF_IMOPH(sumopa, s) -DEF_IMOPH(usmopa, s) +DEF_IMOPH(sme, smopa, s) +DEF_IMOPH(sme, umopa, s) +DEF_IMOPH(sme, sumopa, s) +DEF_IMOPH(sme, usmopa, s) + +DEF_IMOPH(sme, smopa, d) +DEF_IMOPH(sme, umopa, d) +DEF_IMOPH(sme, sumopa, d) +DEF_IMOPH(sme, usmopa, d) + +static uint32_t bmopa_s(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) +{ + uint32_t sum = ctpop32(~(n ^ m)); + if (neg) { + sum = -sum; + } + if (!(p & 1)) { + sum = 0; + } + return a + sum; +} + +DEF_IMOPH(sme2, bmopa, s) + +#define DEF_IMOP_16x2_32(NAME, NTYPE, MTYPE) \ +static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ +{ \ + uint32_t sum = 0; \ + /* Apply P to N as a mask, making the inactive elements 0. */ \ + n &= expand_pred_h(p); \ + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ + sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ + return neg ? a - sum : a + sum; \ +} + +DEF_IMOP_16x2_32(smopa2_s, int16_t, int16_t) +DEF_IMOP_16x2_32(umopa2_s, uint16_t, uint16_t) + +DEF_IMOPH(sme2, smopa2, s) +DEF_IMOPH(sme2, umopa2, s) + +#define DO_VDOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD, HN) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t svl = simd_oprsz(desc); \ + intptr_t elements = svl / sizeof(TYPED); \ + intptr_t eltperseg = 16 / sizeof(TYPED); \ + intptr_t nreg = sizeof(TYPED) / sizeof(TYPEN); \ + intptr_t vstride = (svl / nreg) * sizeof(ARMVectorReg); \ + intptr_t zstride = sizeof(ARMVectorReg) / sizeof(TYPEN); \ + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT, 2); \ + TYPEN *n = vn; \ + TYPEM *m = vm; \ + for (intptr_t r = 0; r < nreg; r++) { \ + TYPED *d = vd + r * vstride; \ + for (intptr_t seg = 0; seg < elements; seg += eltperseg) { \ + intptr_t s = seg + idx; \ + for (intptr_t e = seg; e < seg + eltperseg; e++) { \ + TYPED sum = d[HD(e)]; \ + for (intptr_t i = 0; i < nreg; i++) { \ + TYPED nn = n[i * zstride + HN(nreg * e + r)]; \ + TYPED mm = m[HN(nreg * s + i)]; \ + sum += nn * mm; \ + } \ + d[HD(e)] = sum; \ + } \ + } \ + } \ +} + +DO_VDOT_IDX(sme2_svdot_idx_4b, int32_t, int8_t, int8_t, H4, H1) +DO_VDOT_IDX(sme2_uvdot_idx_4b, uint32_t, uint8_t, uint8_t, H4, H1) +DO_VDOT_IDX(sme2_suvdot_idx_4b, int32_t, int8_t, uint8_t, H4, H1) +DO_VDOT_IDX(sme2_usvdot_idx_4b, int32_t, uint8_t, int8_t, H4, H1) + +DO_VDOT_IDX(sme2_svdot_idx_4h, int64_t, int16_t, int16_t, H8, H2) +DO_VDOT_IDX(sme2_uvdot_idx_4h, uint64_t, uint16_t, uint16_t, H8, H2) + +DO_VDOT_IDX(sme2_svdot_idx_2h, int32_t, int16_t, int16_t, H4, H2) +DO_VDOT_IDX(sme2_uvdot_idx_2h, uint32_t, uint16_t, uint16_t, H4, H2) + +#undef DO_VDOT_IDX + +#define DO_MLALL(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \ + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \ + TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \ + for (intptr_t i = 0; i < elements; ++i) { \ + TYPEW nn = n[HN(i * 4 + sel)]; \ + TYPEM mm = m[HN(i * 4 + sel)]; \ + d[HW(i)] = a[HW(i)] OP (nn * mm); \ + } \ +} + +DO_MLALL(sme2_smlall_s, int32_t, int8_t, int8_t, H4, H1, +) +DO_MLALL(sme2_smlall_d, int64_t, int16_t, int16_t, H8, H2, +) +DO_MLALL(sme2_smlsll_s, int32_t, int8_t, int8_t, H4, H1, -) +DO_MLALL(sme2_smlsll_d, int64_t, int16_t, int16_t, H8, H2, -) + +DO_MLALL(sme2_umlall_s, uint32_t, uint8_t, uint8_t, H4, H1, +) +DO_MLALL(sme2_umlall_d, uint64_t, uint16_t, uint16_t, H8, H2, +) +DO_MLALL(sme2_umlsll_s, uint32_t, uint8_t, uint8_t, H4, H1, -) +DO_MLALL(sme2_umlsll_d, uint64_t, uint16_t, uint16_t, H8, H2, -) + +DO_MLALL(sme2_usmlall_s, uint32_t, uint8_t, int8_t, H4, H1, +) + +#undef DO_MLALL + +#define DO_MLALL_IDX(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \ + intptr_t eltspersegment = 16 / sizeof(TYPEW); \ + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \ + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 4); \ + TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \ + for (intptr_t i = 0; i < elements; i += eltspersegment) { \ + TYPEW mm = m[HN(i * 4 + idx)]; \ + for (intptr_t j = 0; j < eltspersegment; ++j) { \ + TYPEN nn = n[HN((i + j) * 4 + sel)]; \ + d[HW(i + j)] = a[HW(i + j)] OP (nn * mm); \ + } \ + } \ +} + +DO_MLALL_IDX(sme2_smlall_idx_s, int32_t, int8_t, int8_t, H4, H1, +) +DO_MLALL_IDX(sme2_smlall_idx_d, int64_t, int16_t, int16_t, H8, H2, +) +DO_MLALL_IDX(sme2_smlsll_idx_s, int32_t, int8_t, int8_t, H4, H1, -) +DO_MLALL_IDX(sme2_smlsll_idx_d, int64_t, int16_t, int16_t, H8, H2, -) + +DO_MLALL_IDX(sme2_umlall_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, +) +DO_MLALL_IDX(sme2_umlall_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, +) +DO_MLALL_IDX(sme2_umlsll_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, -) +DO_MLALL_IDX(sme2_umlsll_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, -) + +DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +) +DO_MLALL_IDX(sme2_sumlall_idx_s, uint32_t, int8_t, uint8_t, H4, H1, +) + +#undef DO_MLALL_IDX + +/* Convert and compress */ +void HELPER(sme2_bfcvt)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + if (vd == s1) { + s1 = memcpy(&scratch, s1, oprsz); + } + + for (i = 0; i < n; ++i) { + d[H2(i)] = float32_to_bfloat16(s0[H4(i)], fpst); + } + for (i = 0; i < n; ++i) { + d[H2(i) + n] = float32_to_bfloat16(s1[H4(i)], fpst); + } +} -DEF_IMOPH(smopa, d) -DEF_IMOPH(umopa, d) -DEF_IMOPH(sumopa, d) -DEF_IMOPH(usmopa, d) +void HELPER(sme2_fcvt_n)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + float16 *d = vd; + + if (vd == s1) { + s1 = memcpy(&scratch, s1, oprsz); + } + + for (i = 0; i < n; ++i) { + d[H2(i)] = sve_f32_to_f16(s0[H4(i)], fpst); + } + for (i = 0; i < n; ++i) { + d[H2(i) + n] = sve_f32_to_f16(s1[H4(i)], fpst); + } +} + +#define SQCVT2(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(s0[HW(i)]); \ + d[HN(i + n)] = SAT(s1[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVT2(sme2_sqcvt_sh, int32_t, int16_t, H4, H2, do_ssat_h) +SQCVT2(sme2_uqcvt_sh, uint32_t, uint16_t, H4, H2, do_usat_h) +SQCVT2(sme2_sqcvtu_sh, int32_t, uint16_t, H4, H2, do_usat_h) + +#undef SQCVT2 + +#define SQCVT4(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(s0[HW(i)]); \ + d[HN(i + n)] = SAT(s1[HW(i)]); \ + d[HN(i + 2 * n)] = SAT(s2[HW(i)]); \ + d[HN(i + 3 * n)] = SAT(s3[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVT4(sme2_sqcvt_sb, int32_t, int8_t, H4, H2, do_ssat_b) +SQCVT4(sme2_uqcvt_sb, uint32_t, uint8_t, H4, H2, do_usat_b) +SQCVT4(sme2_sqcvtu_sb, int32_t, uint8_t, H4, H2, do_usat_b) + +SQCVT4(sme2_sqcvt_dh, int64_t, int16_t, H8, H2, do_ssat_h) +SQCVT4(sme2_uqcvt_dh, uint64_t, uint16_t, H8, H2, do_usat_h) +SQCVT4(sme2_sqcvtu_dh, int64_t, uint16_t, H8, H2, do_usat_h) + +#undef SQCVT4 + +#define SQRSHR2(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHR2(sme2_sqrshr_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h) +SQRSHR2(sme2_uqrshr_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h) +SQRSHR2(sme2_sqrshru_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h) + +#undef SQRSHR2 + +#define SQRSHR4(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \ + d[HN(i + 2 * n)] = SAT(RSHR(s2[HW(i)], shift)); \ + d[HN(i + 3 * n)] = SAT(RSHR(s3[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHR4(sme2_sqrshr_sb, int32_t, int8_t, H4, H2, do_srshr, do_ssat_b) +SQRSHR4(sme2_uqrshr_sb, uint32_t, uint8_t, H4, H2, do_urshr, do_usat_b) +SQRSHR4(sme2_sqrshru_sb, int32_t, uint8_t, H4, H2, do_srshr, do_usat_b) + +SQRSHR4(sme2_sqrshr_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h) +SQRSHR4(sme2_uqrshr_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h) +SQRSHR4(sme2_sqrshru_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h) + +#undef SQRSHR4 + +/* Convert and interleave */ +void HELPER(sme2_bfcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + for (i = 0; i < n; ++i) { + bfloat16 d0 = float32_to_bfloat16(s0[H4(i)], fpst); + bfloat16 d1 = float32_to_bfloat16(s1[H4(i)], fpst); + d[H2(i * 2 + 0)] = d0; + d[H2(i * 2 + 1)] = d1; + } +} + +void HELPER(sme2_fcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + for (i = 0; i < n; ++i) { + bfloat16 d0 = sve_f32_to_f16(s0[H4(i)], fpst); + bfloat16 d1 = sve_f32_to_f16(s1[H4(i)], fpst); + d[H2(i * 2 + 0)] = d0; + d[H2(i * 2 + 1)] = d1; + } +} + +#define SQCVTN2(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(2 * i + 0)] = SAT(s0[HW(i)]); \ + d[HN(2 * i + 1)] = SAT(s1[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVTN2(sme2_sqcvtn_sh, int32_t, int16_t, H4, H2, do_ssat_h) +SQCVTN2(sme2_uqcvtn_sh, uint32_t, uint16_t, H4, H2, do_usat_h) +SQCVTN2(sme2_sqcvtun_sh, int32_t, uint16_t, H4, H2, do_usat_h) + +#undef SQCVTN2 + +#define SQCVTN4(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(4 * i + 0)] = SAT(s0[HW(i)]); \ + d[HN(4 * i + 1)] = SAT(s1[HW(i)]); \ + d[HN(4 * i + 2)] = SAT(s2[HW(i)]); \ + d[HN(4 * i + 3)] = SAT(s3[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVTN4(sme2_sqcvtn_sb, int32_t, int8_t, H4, H1, do_ssat_b) +SQCVTN4(sme2_uqcvtn_sb, uint32_t, uint8_t, H4, H1, do_usat_b) +SQCVTN4(sme2_sqcvtun_sb, int32_t, uint8_t, H4, H1, do_usat_b) + +SQCVTN4(sme2_sqcvtn_dh, int64_t, int16_t, H8, H2, do_ssat_h) +SQCVTN4(sme2_uqcvtn_dh, uint64_t, uint16_t, H8, H2, do_usat_h) +SQCVTN4(sme2_sqcvtun_dh, int64_t, uint16_t, H8, H2, do_usat_h) + +#undef SQCVTN4 + +#define SQRSHRN2(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(2 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(2 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHRN2(sme2_sqrshrn_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h) +SQRSHRN2(sme2_uqrshrn_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h) +SQRSHRN2(sme2_sqrshrun_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h) + +#undef SQRSHRN2 + +#define SQRSHRN4(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(4 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(4 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \ + d[HN(4 * i + 2)] = SAT(RSHR(s2[HW(i)], shift)); \ + d[HN(4 * i + 3)] = SAT(RSHR(s3[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHRN4(sme2_sqrshrn_sb, int32_t, int8_t, H4, H1, do_srshr, do_ssat_b) +SQRSHRN4(sme2_uqrshrn_sb, uint32_t, uint8_t, H4, H1, do_urshr, do_usat_b) +SQRSHRN4(sme2_sqrshrun_sb, int32_t, uint8_t, H4, H1, do_srshr, do_usat_b) + +SQRSHRN4(sme2_sqrshrn_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h) +SQRSHRN4(sme2_uqrshrn_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h) +SQRSHRN4(sme2_sqrshrun_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h) + +#undef SQRSHRN4 + +/* Expand and convert */ +void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + if (vectors_overlap(vd, 1, vs, 2)) { + s = memcpy(&scratch, s, oprsz); + } + + for (i = 0; i < n; ++i) { + d0[H4(i)] = sve_f16_to_f32(s[H2(i)], fpst); + } + for (i = 0; i < n; ++i) { + d1[H4(i)] = sve_f16_to_f32(s[H2(n + i)], fpst); + } +} + +#define UNPK(NAME, SREG, TW, TN, HW, HN) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[SREG]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t n = oprsz / sizeof(TW); \ + if (vectors_overlap(vd, 2 * SREG, vs, SREG)) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + for (size_t r = 0; r < SREG; ++r) { \ + TN *s = vs + r * sizeof(ARMVectorReg); \ + for (size_t i = 0; i < 2; ++i) { \ + TW *d = vd + (2 * r + i) * sizeof(ARMVectorReg); \ + for (size_t e = 0; e < n; ++e) { \ + d[HW(e)] = s[HN(i * n + e)]; \ + } \ + } \ + } \ +} + +UNPK(sme2_sunpk2_bh, 1, int16_t, int8_t, H2, H1) +UNPK(sme2_sunpk2_hs, 1, int32_t, int16_t, H4, H2) +UNPK(sme2_sunpk2_sd, 1, int64_t, int32_t, H8, H4) + +UNPK(sme2_sunpk4_bh, 2, int16_t, int8_t, H2, H1) +UNPK(sme2_sunpk4_hs, 2, int32_t, int16_t, H4, H2) +UNPK(sme2_sunpk4_sd, 2, int64_t, int32_t, H8, H4) + +UNPK(sme2_uunpk2_bh, 1, uint16_t, uint8_t, H2, H1) +UNPK(sme2_uunpk2_hs, 1, uint32_t, uint16_t, H4, H2) +UNPK(sme2_uunpk2_sd, 1, uint64_t, uint32_t, H8, H4) + +UNPK(sme2_uunpk4_bh, 2, uint16_t, uint8_t, H2, H1) +UNPK(sme2_uunpk4_hs, 2, uint32_t, uint16_t, H4, H2) +UNPK(sme2_uunpk4_sd, 2, uint64_t, uint32_t, H8, H4) + +#undef UNPK + +/* Deinterleave and convert. */ +void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + for (i = 0; i < n; ++i) { + float32 v0 = sve_f16_to_f32(s[H2(i * 2 + 0)], fpst); + float32 v1 = sve_f16_to_f32(s[H2(i * 2 + 1)], fpst); + d0[H4(i)] = v0; + d1[H4(i)] = v1; + } +} + +void HELPER(sme2_scvtf)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + int32_t *d = vd; + float32 *s = vs; + + for (i = 0; i < n; ++i) { + d[i] = int32_to_float32(s[i], fpst); + } +} + +void HELPER(sme2_ucvtf)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + uint32_t *d = vd; + float32 *s = vs; + + for (i = 0; i < n; ++i) { + d[i] = uint32_to_float32(s[i], fpst); + } +} + +#define ZIP2(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch[2]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t pairs = oprsz / (sizeof(TYPE) * 2); \ + TYPE *n = vn, *m = vm; \ + if (vectors_overlap(vd, 2, vn, 1)) { \ + n = memcpy(&scratch[0], vn, oprsz); \ + } \ + if (vectors_overlap(vd, 2, vm, 1)) { \ + m = memcpy(&scratch[1], vm, oprsz); \ + } \ + for (size_t r = 0; r < 2; ++r) { \ + TYPE *d = vd + r * sizeof(ARMVectorReg); \ + size_t base = r * pairs; \ + for (size_t p = 0; p < pairs; ++p) { \ + d[H(2 * p + 0)] = n[base + H(p)]; \ + d[H(2 * p + 1)] = m[base + H(p)]; \ + } \ + } \ +} + +ZIP2(sme2_zip2_b, uint8_t, H1) +ZIP2(sme2_zip2_h, uint16_t, H2) +ZIP2(sme2_zip2_s, uint32_t, H4) +ZIP2(sme2_zip2_d, uint64_t, ) +ZIP2(sme2_zip2_q, Int128, ) + +#undef ZIP2 + +#define ZIP4(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[4]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t quads = oprsz / (sizeof(TYPE) * 4); \ + TYPE *s0, *s1, *s2, *s3; \ + if (vs == vd) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + s0 = vs; \ + s1 = vs + sizeof(ARMVectorReg); \ + s2 = vs + 2 * sizeof(ARMVectorReg); \ + s3 = vs + 3 * sizeof(ARMVectorReg); \ + for (size_t r = 0; r < 4; ++r) { \ + TYPE *d = vd + r * sizeof(ARMVectorReg); \ + size_t base = r * quads; \ + for (size_t q = 0; q < quads; ++q) { \ + d[H(4 * q + 0)] = s0[base + H(q)]; \ + d[H(4 * q + 1)] = s1[base + H(q)]; \ + d[H(4 * q + 2)] = s2[base + H(q)]; \ + d[H(4 * q + 3)] = s3[base + H(q)]; \ + } \ + } \ +} + +ZIP4(sme2_zip4_b, uint8_t, H1) +ZIP4(sme2_zip4_h, uint16_t, H2) +ZIP4(sme2_zip4_s, uint32_t, H4) +ZIP4(sme2_zip4_d, uint64_t, ) +ZIP4(sme2_zip4_q, Int128, ) + +#undef ZIP4 + +#define UZP2(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch[2]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t pairs = oprsz / (sizeof(TYPE) * 2); \ + TYPE *d0 = vd, *d1 = vd + sizeof(ARMVectorReg); \ + if (vectors_overlap(vd, 2, vn, 1)) { \ + vn = memcpy(&scratch[0], vn, oprsz); \ + } \ + if (vectors_overlap(vd, 2, vm, 1)) { \ + vm = memcpy(&scratch[1], vm, oprsz); \ + } \ + for (size_t r = 0; r < 2; ++r) { \ + TYPE *s = r ? vm : vn; \ + size_t base = r * pairs; \ + for (size_t p = 0; p < pairs; ++p) { \ + d0[base + H(p)] = s[H(2 * p + 0)]; \ + d1[base + H(p)] = s[H(2 * p + 1)]; \ + } \ + } \ +} + +UZP2(sme2_uzp2_b, uint8_t, H1) +UZP2(sme2_uzp2_h, uint16_t, H2) +UZP2(sme2_uzp2_s, uint32_t, H4) +UZP2(sme2_uzp2_d, uint64_t, ) +UZP2(sme2_uzp2_q, Int128, ) + +#undef UZP2 + +#define UZP4(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[4]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t quads = oprsz / (sizeof(TYPE) * 4); \ + TYPE *d0, *d1, *d2, *d3; \ + if (vs == vd) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + d0 = vd; \ + d1 = vd + sizeof(ARMVectorReg); \ + d2 = vd + 2 * sizeof(ARMVectorReg); \ + d3 = vd + 3 * sizeof(ARMVectorReg); \ + for (size_t r = 0; r < 4; ++r) { \ + TYPE *s = vs + r * sizeof(ARMVectorReg); \ + size_t base = r * quads; \ + for (size_t q = 0; q < quads; ++q) { \ + d0[base + H(q)] = s[H(4 * q + 0)]; \ + d1[base + H(q)] = s[H(4 * q + 1)]; \ + d2[base + H(q)] = s[H(4 * q + 2)]; \ + d3[base + H(q)] = s[H(4 * q + 3)]; \ + } \ + } \ +} + +UZP4(sme2_uzp4_b, uint8_t, H1) +UZP4(sme2_uzp4_h, uint16_t, H2) +UZP4(sme2_uzp4_s, uint32_t, H4) +UZP4(sme2_uzp4_d, uint64_t, ) +UZP4(sme2_uzp4_q, Int128, ) + +#undef UZP4 + +#define ICLAMP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + size_t stride = sizeof(ARMVectorReg) / sizeof(TYPE); \ + size_t elements = simd_oprsz(desc) / sizeof(TYPE); \ + size_t nreg = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (size_t e = 0; e < elements; e++) { \ + TYPE nn = n[H(e)], mm = m[H(e)]; \ + for (size_t r = 0; r < nreg; r++) { \ + TYPE *dd = &d[r * stride + H(e)]; \ + *dd = MIN(MAX(*dd, nn), mm); \ + } \ + } \ +} + +ICLAMP(sme2_sclamp_b, int8_t, H1) +ICLAMP(sme2_sclamp_h, int16_t, H2) +ICLAMP(sme2_sclamp_s, int32_t, H4) +ICLAMP(sme2_sclamp_d, int64_t, H8) + +ICLAMP(sme2_uclamp_b, uint8_t, H1) +ICLAMP(sme2_uclamp_h, uint16_t, H2) +ICLAMP(sme2_uclamp_s, uint32_t, H4) +ICLAMP(sme2_uclamp_d, uint64_t, H8) + +#undef ICLAMP + +/* + * Note the argument ordering to minnum and maxnum must match + * the ARM pseudocode so that NaNs are propagated properly. + */ +#define FCLAMP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *fpst, uint32_t desc) \ +{ \ + size_t stride = sizeof(ARMVectorReg) / sizeof(TYPE); \ + size_t elements = simd_oprsz(desc) / sizeof(TYPE); \ + size_t nreg = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (size_t e = 0; e < elements; e++) { \ + TYPE nn = n[H(e)], mm = m[H(e)]; \ + for (size_t r = 0; r < nreg; r++) { \ + TYPE *dd = &d[r * stride + H(e)]; \ + *dd = TYPE##_minnum(TYPE##_maxnum(nn, *dd, fpst), mm, fpst); \ + } \ + } \ +} + +FCLAMP(sme2_fclamp_h, float16, H2) +FCLAMP(sme2_fclamp_s, float32, H4) +FCLAMP(sme2_fclamp_d, float64, H8) +FCLAMP(sme2_bfclamp, bfloat16, H2) + +#undef FCLAMP + +void HELPER(sme2_sel_b)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint8_t); + DecodeCounter p = decode_counter(png, vl, MO_8); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H1(e)] = m[H1(e)]; + } + for (int e = split; e < elements; e++) { + d[H1(e)] = n[H1(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H1(e)] = n[H1(e)]; + } + for (int e = split; e < elements; e++) { + d[H1(e)] = m[H1(e)]; + } + } + } + } + } else { + int estride = 1 << p.lg2_stride; + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H1(e)] = m[H1(e)]; + } + for (; e < elements; e += estride) { + d[H1(e)] = n[H1(e)]; + for (int i = 1; i < estride; i++) { + d[H1(e + i)] = m[H1(e + i)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += estride) { + d[H1(e)] = n[H1(e)]; + for (int i = 1; i < estride; i++) { + d[H1(e + i)] = m[H1(e + i)]; + } + } + for (; e < elements; e++) { + d[H1(e)] = m[H1(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_h)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint16_t); + DecodeCounter p = decode_counter(png, vl, MO_16); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H2(e)] = m[H2(e)]; + } + for (int e = split; e < elements; e++) { + d[H2(e)] = n[H2(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H2(e)] = n[H2(e)]; + } + for (int e = split; e < elements; e++) { + d[H2(e)] = m[H2(e)]; + } + } + } + } + } else { + int estride = 1 << p.lg2_stride; + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H2(e)] = m[H2(e)]; + } + for (; e < elements; e += estride) { + d[H2(e)] = n[H2(e)]; + for (int i = 1; i < estride; i++) { + d[H2(e + i)] = m[H2(e + i)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += estride) { + d[H2(e)] = n[H2(e)]; + for (int i = 1; i < estride; i++) { + d[H2(e + i)] = m[H2(e + i)]; + } + } + for (; e < elements; e++) { + d[H2(e)] = m[H2(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_s)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint32_t); + DecodeCounter p = decode_counter(png, vl, MO_32); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H4(e)] = m[H4(e)]; + } + for (int e = split; e < elements; e++) { + d[H4(e)] = n[H4(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H4(e)] = n[H4(e)]; + } + for (int e = split; e < elements; e++) { + d[H4(e)] = m[H4(e)]; + } + } + } + } + } else { + /* p.esz must be MO_64, so stride must be 2. */ + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H4(e)] = m[H4(e)]; + } + for (; e < elements; e += 2) { + d[H4(e)] = n[H4(e)]; + d[H4(e + 1)] = m[H4(e + 1)]; + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += 2) { + d[H4(e)] = n[H4(e)]; + d[H4(e + 1)] = m[H4(e + 1)]; + } + for (; e < elements; e++) { + d[H4(e)] = m[H4(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_d)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint64_t); + DecodeCounter p = decode_counter(png, vl, MO_64); + + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint64_t *d = vd + r * sizeof(ARMVectorReg); + uint64_t *n = vn + r * sizeof(ARMVectorReg); + uint64_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + memcpy(d, m, split * sizeof(uint64_t)); + memcpy(d + split, n + split, + (elements - split) * sizeof(uint64_t)); + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint64_t *d = vd + r * sizeof(ARMVectorReg); + uint64_t *n = vn + r * sizeof(ARMVectorReg); + uint64_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + memcpy(d, n, split * sizeof(uint64_t)); + memcpy(d + split, m + split, + (elements - split) * sizeof(uint64_t)); + } + } + } +} diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index 04b6fcc..ab63cfa 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -30,6 +30,7 @@ %size_23 23:2 %dtype_23_13 23:2 13:2 %index3_22_19 22:1 19:2 +%index3_22_17 22:1 17:2 %index3_19_11 19:2 11:1 %index2_20_11 20:1 11:1 @@ -57,6 +58,11 @@ # as propagated via the MOVPRFX instruction. %reg_movprfx 0:5 +%rn_ax2 6:4 !function=times_2 + +%pnd 0:3 !function=plus_8 +%pnn 5:3 !function=plus_8 + ########################################################################### # Named attribute sets. These are used to make nice(er) names # when creating helpers common to those for the individual @@ -102,6 +108,7 @@ # Two operand @pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz @rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz +@rd_rnx2 ........ ... ..... ...... ..... rd:5 &rr_esz rn=%rn_ax2 # Two operand with governing predicate, flags setting @pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s @@ -131,11 +138,11 @@ @rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \ &rrrr_esz ra=%reg_movprfx -# Four operand with unused vector element size -@rda_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 \ - &rrrr_esz esz=0 ra=%reg_movprfx -@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \ - &rrrr_esz esz=0 rn=%reg_movprfx +# Four operand with explicit vector element size +@rda_rn_rm_ex ........ ... rm:5 ... ... rn:5 rd:5 \ + &rrrr_esz ra=%reg_movprfx +@rdn_ra_rm_ex ........ ... rm:5 ... ... ra:5 rd:5 \ + &rrrr_esz rn=%reg_movprfx # Three operand with "memory" size, aka immediate left shift @rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri @@ -222,6 +229,9 @@ @rprr_load_dt ....... dtype:4 rm:5 ... pg:3 rn:5 rd:5 &rprr_load @rpri_load_dt ....... dtype:4 . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load +@rprr_load ....... .... rm:5 ... pg:3 rn:5 rd:5 &rprr_load +@rpri_load ....... .... . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load + @rprr_load_msz ....... .... rm:5 ... pg:3 rn:5 rd:5 \ &rprr_load dtype=%msz_dtype @rpri_load_msz ....... .... . imm:s4 ... pg:3 rn:5 rd:5 \ @@ -245,7 +255,7 @@ # Stores; user must fill in ESZ, MSZ, NREG as needed. @rprr_store ....... .. .. rm:5 ... pg:3 rn:5 rd:5 &rprr_store -@rpri_store_msz ....... msz:2 .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store +@rpri_store ....... .. .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store @rprr_store_esz_n0 ....... .. esz:2 rm:5 ... pg:3 rn:5 rd:5 \ &rprr_store nreg=0 @rprr_scatter_store ....... msz:2 .. rm:5 ... pg:3 rn:5 rd:5 \ @@ -320,6 +330,11 @@ ORV 00000100 .. 011 000 001 ... ..... ..... @rd_pg_rn EORV 00000100 .. 011 001 001 ... ..... ..... @rd_pg_rn ANDV 00000100 .. 011 010 001 ... ..... ..... @rd_pg_rn +# SVE2.1 bitwise logical reduction (quadwords) +ORQV 00000100 .. 011 100 001 ... ..... ..... @rd_pg_rn +EORQV 00000100 .. 011 101 001 ... ..... ..... @rd_pg_rn +ANDQV 00000100 .. 011 110 001 ... ..... ..... @rd_pg_rn + # SVE constructive prefix (predicated) MOVPRFX_z 00000100 .. 010 000 001 ... ..... ..... @rd_pg_rn MOVPRFX_m 00000100 .. 010 001 001 ... ..... ..... @rd_pg_rn @@ -335,6 +350,13 @@ UMAXV 00000100 .. 001 001 001 ... ..... ..... @rd_pg_rn SMINV 00000100 .. 001 010 001 ... ..... ..... @rd_pg_rn UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn +# SVE2.1 segment reduction +ADDQV 00000100 .. 000 101 001 ... ..... ..... @rd_pg_rn +SMAXQV 00000100 .. 001 100 001 ... ..... ..... @rd_pg_rn +SMINQV 00000100 .. 001 110 001 ... ..... ..... @rd_pg_rn +UMAXQV 00000100 .. 001 101 001 ... ..... ..... @rd_pg_rn +UMINQV 00000100 .. 001 111 001 ... ..... ..... @rd_pg_rn + ### SVE Shift by Immediate - Predicated Group # SVE bitwise shift by immediate (predicated) @@ -428,12 +450,12 @@ XAR 00000100 .. 1 ..... 001 101 rm:5 rd:5 &rrri_esz \ rn=%reg_movprfx esz=%tszimm16_esz imm=%tszimm16_shr # SVE2 bitwise ternary operations -EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0 -BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0 -BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 +EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0 +BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0 +BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 ### SVE Index Generation Group @@ -559,6 +581,14 @@ DUP_s 00000101 .. 1 00000 001110 ..... ..... @rd_rn DUP_x 00000101 .. 1 ..... 001000 rn:5 rd:5 \ &rri imm=%imm7_22_16 +# SVE Permute Vector - one source quadwords +DUPQ 00000101 001 imm:4 1 001001 rn:5 rd:5 &rri_esz esz=0 +DUPQ 00000101 001 imm:3 10 001001 rn:5 rd:5 &rri_esz esz=1 +DUPQ 00000101 001 imm:2 100 001001 rn:5 rd:5 &rri_esz esz=2 +DUPQ 00000101 001 imm:1 1000 001001 rn:5 rd:5 &rri_esz esz=3 + +EXTQ 00000101 0110 imm:4 001001 rn:5 rd:5 &rri + # SVE insert SIMD&FP scalar register INSR_f 00000101 .. 1 10100 001110 ..... ..... @rdn_rm @@ -568,6 +598,22 @@ INSR_r 00000101 .. 1 00100 001110 ..... ..... @rdn_rm # SVE reverse vector elements REV_v 00000101 .. 1 11000 001110 ..... ..... @rd_rn +# SVE move predicate to/from vector + +PMOV_pv 00000101 00 101 01 0001110 rn:5 0 rd:4 \ + &rri_esz esz=0 imm=0 +PMOV_pv 00000101 00 101 1 imm:1 0001110 rn:5 0 rd:4 &rri_esz esz=1 +PMOV_pv 00000101 01 101 imm:2 0001110 rn:5 0 rd:4 &rri_esz esz=2 +PMOV_pv 00000101 1. 101 .. 0001110 rn:5 0 rd:4 \ + &rri_esz esz=3 imm=%index3_22_17 + +PMOV_vp 00000101 00 101 01 1001110 0 rn:4 rd:5 \ + &rri_esz esz=0 imm=0 +PMOV_vp 00000101 00 101 1 imm:1 1001110 0 rn:4 rd:5 &rri_esz esz=1 +PMOV_vp 00000101 01 101 imm:2 1001110 0 rn:4 rd:5 &rri_esz esz=2 +PMOV_vp 00000101 1. 101 .. 1001110 0 rn:4 rd:5 \ + &rri_esz esz=3 imm=%index3_22_17 + # SVE vector table lookup TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm @@ -614,6 +660,15 @@ UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0 TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0 TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0 +# SVE2.1 permute vector elements (quadwords) +ZIPQ1 01000100 .. 0 ..... 111 000 ..... ..... @rd_rn_rm +ZIPQ2 01000100 .. 0 ..... 111 001 ..... ..... @rd_rn_rm +UZPQ1 01000100 .. 0 ..... 111 010 ..... ..... @rd_rn_rm +UZPQ2 01000100 .. 0 ..... 111 011 ..... ..... @rd_rn_rm + +TBLQ 01000100 .. 0 ..... 111 110 ..... ..... @rd_rn_rm +TBXQ 00000101 .. 1 ..... 001 101 ..... ..... @rd_rn_rm + ### SVE Permute - Predicated Group # SVE compress active elements @@ -725,6 +780,7 @@ PTEST 00100101 01 010000 11 pg:4 0 rn:4 0 0000 # SVE predicate initialize PTRUE 00100101 esz:2 01100 s:1 111000 pat:5 0 rd:4 +PTRUE_cnt 00100101 esz:2 1000000111100000010 ... rd=%pnd # SVE initialize FFR SETFFR 00100101 0010 1100 1001 0000 0000 0000 @@ -765,7 +821,8 @@ BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s ### SVE Predicate Count Group # SVE predicate count -CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn +CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn +CNTP_c 00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5 # SVE inc/dec register by predicate count INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1 @@ -786,11 +843,35 @@ SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000 # SVE integer compare scalar count and limit -WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4 +&while esz rd rn rm sf u eq +WHILE_lt 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4 &while +WHILE_gt 00100101 esz:2 1 rm:5 000 sf:1 u:1 0 rn:5 eq:1 rd:4 &while # SVE2 pointer conflict compare WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 +# SVE2.1 predicate pair +%pd_pair 1:3 !function=times_2 +@while_pair ........ esz:2 . rm:5 .... u:1 . rn:5 . ... eq:1 \ + &while rd=%pd_pair sf=1 + +WHILE_lt_pair 00100101 .. 1 ..... 0101 . 1 ..... 1 ... . @while_pair +WHILE_gt_pair 00100101 .. 1 ..... 0101 . 0 ..... 1 ... . @while_pair + +# SVE2.1 predicate as count +@while_cnt ........ esz:2 . rm:5 .... u:1 . rn:5 . eq:1 ... \ + &while rd=%pnd sf=1 + +WHILE_lt_cnt2 00100101 .. 1 ..... 0100 . 1 ..... 1 . ... @while_cnt +WHILE_lt_cnt4 00100101 .. 1 ..... 0110 . 1 ..... 1 . ... @while_cnt +WHILE_gt_cnt2 00100101 .. 1 ..... 0100 . 0 ..... 1 . ... @while_cnt +WHILE_gt_cnt4 00100101 .. 1 ..... 0110 . 0 ..... 1 . ... @while_cnt + +# SVE2.1 extract mask predicate from predicate-as-counter +&pext rd rn esz imm +PEXT_1 00100101 esz:2 1 00000 0111 00 imm:2 ... 1 rd:4 &pext rn=%pnn +PEXT_2 00100101 esz:2 1 00000 0111 010 imm:1 ... 1 rd:4 &pext rn=%pnn + ### SVE Integer Wide Immediate - Unpredicated Group # SVE broadcast floating-point immediate (unpredicated) @@ -851,10 +932,13 @@ CDOT_zzzz 01000100 esz:2 0 rm:5 0001 rot:2 rn:5 rd:5 ra=%reg_movprfx #### SVE Multiply - Indexed # SVE integer dot product (indexed) -SDOT_zzxw_s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 -SDOT_zzxw_d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 -UDOT_zzxw_s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 -UDOT_zzxw_d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 +SDOT_zzxw_4s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 +SDOT_zzxw_4d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 +UDOT_zzxw_4s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 +UDOT_zzxw_4d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 + +SDOT_zzxw_2s 01000100 10 0 ..... 110010 ..... ..... @rrxr_2 esz=2 +UDOT_zzxw_2s 01000100 10 0 ..... 110011 ..... ..... @rrxr_2 esz=2 # SVE2 integer multiply-add (indexed) MLA_zzxz_h 01000100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=1 @@ -873,8 +957,8 @@ SQRDMLSH_zzxz_s 01000100 10 1 ..... 000101 ..... ..... @rrxr_2 esz=2 SQRDMLSH_zzxz_d 01000100 11 1 ..... 000101 ..... ..... @rrxr_1 esz=3 # SVE mixed sign dot product (indexed) -USDOT_zzxw_s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2 -SUDOT_zzxw_s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2 +USDOT_zzxw_4s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2 +SUDOT_zzxw_4s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2 # SVE2 saturating multiply-add (indexed) SQDMLALB_zzxw_s 01000100 10 1 ..... 0010.0 ..... ..... @rrxr_3a esz=2 @@ -968,9 +1052,11 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \ ### SVE FP Multiply-Add Indexed Group # SVE floating-point multiply-add (indexed) +FMLA_zzxz 01100100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=0 FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1 FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 +FMLS_zzxz 01100100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=0 FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1 FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 @@ -978,6 +1064,7 @@ FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 ### SVE FP Multiply Indexed Group # SVE floating-point multiply (indexed) +FMUL_zzx 01100100 0. 1 ..... 001010 ..... ..... @rrx_3 esz=0 FMUL_zzx 01100100 0. 1 ..... 001000 ..... ..... @rrx_3 esz=1 FMUL_zzx 01100100 10 1 ..... 001000 ..... ..... @rrx_2 esz=2 FMUL_zzx 01100100 11 1 ..... 001000 ..... ..... @rrx_1 esz=3 @@ -990,6 +1077,14 @@ FMINNMV 01100101 .. 000 101 001 ... ..... ..... @rd_pg_rn FMAXV 01100101 .. 000 110 001 ... ..... ..... @rd_pg_rn FMINV 01100101 .. 000 111 001 ... ..... ..... @rd_pg_rn +### SVE FP recursive reduction (quadwords) + +FADDQV 01100100 .. 010 000 101 ... ..... ..... @rd_pg_rn +FMAXNMQV 01100100 .. 010 100 101 ... ..... ..... @rd_pg_rn +FMINNMQV 01100100 .. 010 101 101 ... ..... ..... @rd_pg_rn +FMAXQV 01100100 .. 010 110 101 ... ..... ..... @rd_pg_rn +FMINQV 01100100 .. 010 111 101 ... ..... ..... @rd_pg_rn + ## SVE Floating Point Unary Operations - Unpredicated Group FRECPE 01100101 .. 001 110 001100 ..... ..... @rd_rn @@ -1151,12 +1246,24 @@ LD1_zpiz 1000010 .. 01 ..... 1.. ... ..... ..... \ # SVE contiguous load (scalar plus scalar) LD_zprr 1010010 .... ..... 010 ... ..... ..... @rprr_load_dt nreg=0 +# LD1W (128-bit element) +LD_zprr 1010010 1000 rm:5 100 pg:3 rn:5 rd:5 \ + &rprr_load dtype=16 nreg=0 +# LD1D (128-bit element) +LD_zprr 1010010 1100 rm:5 100 pg:3 rn:5 rd:5 \ + &rprr_load dtype=17 nreg=0 # SVE contiguous first-fault load (scalar plus scalar) LDFF1_zprr 1010010 .... ..... 011 ... ..... ..... @rprr_load_dt nreg=0 # SVE contiguous load (scalar plus immediate) LD_zpri 1010010 .... 0.... 101 ... ..... ..... @rpri_load_dt nreg=0 +# LD1W (128-bit element) +LD_zpri 1010010 1000 1 imm:s4 001 pg:3 rn:5 rd:5 \ + &rpri_load dtype=16 nreg=0 +# LD1D (128-bit element) +LD_zpri 1010010 1100 1 imm:s4 001 pg:3 rn:5 rd:5 \ + &rpri_load dtype=17 nreg=0 # SVE contiguous non-fault load (scalar plus immediate) LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 @@ -1166,12 +1273,26 @@ LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 # SVE load multiple structures (scalar plus scalar) # LD2B, LD2H, LD2W, LD2D; etc. LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz +# LD[234]Q +LD_zprr 1010010 01 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=1 +LD_zprr 1010010 10 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=2 +LD_zprr 1010010 11 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=3 # SVE contiguous non-temporal load (scalar plus immediate) # LDNT1B, LDNT1H, LDNT1W, LDNT1D # SVE load multiple structures (scalar plus immediate) # LD2B, LD2H, LD2W, LD2D; etc. LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz +# LD[234]Q +LD_zpri 1010010 01 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=1 +LD_zpri 1010010 10 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=2 +LD_zpri 1010010 11 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=3 # SVE load and broadcast quadword (scalar plus scalar) LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \ @@ -1222,6 +1343,10 @@ LD1_zprz 1100010 10 1. ..... 1.. ... ..... ..... \ LD1_zprz 1100010 11 1. ..... 11. ... ..... ..... \ @rprr_g_load_sc esz=3 msz=3 u=1 +# LD1Q. Note that this is subtly different from LD1_zprz because +# it is vector + scalar, not scalar + vector. +LD1Q 1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 + # SVE 64-bit gather load (vector plus immediate) LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \ @rpri_g_load esz=3 @@ -1245,8 +1370,20 @@ STR_zri 1110010 11 0. ..... 010 ... ..... ..... @rd_rn_i9 # SVE contiguous store (scalar plus immediate) # ST1B, ST1H, ST1W, ST1D; require msz <= esz -ST_zpri 1110010 .. esz:2 0.... 111 ... ..... ..... \ - @rpri_store_msz nreg=0 +ST_zpri 1110010 00 esz:2 0.... 111 ... ..... ..... \ + @rpri_store msz=0 nreg=0 +ST_zpri 1110010 01 esz:2 0.... 111 ... ..... ..... \ + @rpri_store msz=1 nreg=0 +ST_zpri 1110010 10 10 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=2 nreg=0 +ST_zpri 1110010 10 11 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=3 nreg=0 +ST_zpri 1110010 11 11 0.... 111 ... ..... ..... \ + @rpri_store msz=3 esz=3 nreg=0 +ST_zpri 1110010 10 00 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=4 nreg=0 +ST_zpri 1110010 11 10 0.... 111 ... ..... ..... \ + @rpri_store msz=3 esz=4 nreg=0 # SVE contiguous store (scalar plus scalar) # ST1B, ST1H, ST1W, ST1D; require msz <= esz @@ -1255,20 +1392,40 @@ ST_zprr 1110010 00 .. ..... 010 ... ..... ..... \ @rprr_store_esz_n0 msz=0 ST_zprr 1110010 01 .. ..... 010 ... ..... ..... \ @rprr_store_esz_n0 msz=1 -ST_zprr 1110010 10 .. ..... 010 ... ..... ..... \ - @rprr_store_esz_n0 msz=2 +ST_zprr 1110010 10 10 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=2 nreg=0 +ST_zprr 1110010 10 11 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=3 nreg=0 ST_zprr 1110010 11 11 ..... 010 ... ..... ..... \ @rprr_store msz=3 esz=3 nreg=0 +ST_zprr 1110010 10 00 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=4 nreg=0 +ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \ + @rprr_store msz=3 esz=4 nreg=0 # SVE contiguous non-temporal store (scalar plus immediate) (nreg == 0) # SVE store multiple structures (scalar plus immediate) (nreg != 0) ST_zpri 1110010 .. nreg:2 1.... 111 ... ..... ..... \ - @rpri_store_msz esz=%size_23 + @rpri_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zpri 11100100 01 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=1 +ST_zpri 11100100 10 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=2 +ST_zpri 11100100 11 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=3 # SVE contiguous non-temporal store (scalar plus scalar) (nreg == 0) # SVE store multiple structures (scalar plus scalar) (nreg != 0) -ST_zprr 1110010 msz:2 nreg:2 ..... 011 ... ..... ..... \ - @rprr_store esz=%size_23 +ST_zprr 1110010 .. nreg:2 ..... 011 ... ..... ..... \ + @rprr_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zprr 11100100 01 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=1 +ST_zprr 11100100 10 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=2 +ST_zprr 11100100 11 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=3 # SVE 32-bit scatter store (scalar plus 32-bit scaled offsets) # Require msz > 0 && msz <= esz. @@ -1293,6 +1450,10 @@ ST1_zprz 1110010 .. 01 ..... 101 ... ..... ..... \ ST1_zprz 1110010 .. 00 ..... 101 ... ..... ..... \ @rprr_scatter_store xs=2 esz=3 scale=0 +# ST1Q. Note that this is subtly different from ST1_zprz because +# it is vector + scalar, not scalar + vector. +ST1Q 1110 0100 001 rm:5 001 pg:3 rn:5 rd:5 + # SVE 64-bit scatter store (vector plus immediate) ST1_zpiz 1110010 .. 10 ..... 101 ... ..... ..... \ @rpri_scatter_store esz=3 @@ -1450,9 +1611,9 @@ EORTB 01000101 .. 0 ..... 10010 1 ..... ..... @rd_rn_rm ## SVE integer matrix multiply accumulate -SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 -USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 -UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 +SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 +USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 +UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 ## SVE2 bitwise permute @@ -1504,13 +1665,22 @@ UABA 01000101 .. 0 ..... 11111 1 ..... ..... @rd_rn_rm #### SVE2 Narrowing ## SVE2 saturating extract narrow - # Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0. -SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl + +{ + SQCVTN_sh 01000101 00 1 10001 010 000 ....0 ..... @rd_rnx2 esz=1 + SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl +} SQXTNT 01000101 .. 1 ..... 010 001 ..... ..... @rd_rn_tszimm_shl -UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl +{ + UQCVTN_sh 01000101 00 1 10001 010 010 ....0 ..... @rd_rnx2 esz=1 + UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl +} UQXTNT 01000101 .. 1 ..... 010 011 ..... ..... @rd_rn_tszimm_shl -SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl +{ + SQCVTUN_sh 01000101 00 1 10001 010 100 ....0 ..... @rd_rnx2 esz=1 + SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl +} SQXTUNT 01000101 .. 1 ..... 010 101 ..... ..... @rd_rn_tszimm_shl ## SVE2 bitwise shift right narrow @@ -1597,14 +1767,17 @@ UMLSLT_zzzw 01000100 .. 0 ..... 010 111 ..... ..... @rda_rn_rm CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx -## SVE mixed sign dot product +## SVE dot product + +SDOT_zzzz_2s 01000100 00 0 ..... 110 010 ..... ..... @rda_rn_rm_ex esz=2 +UDOT_zzzz_2s 01000100 00 0 ..... 110 011 ..... ..... @rda_rn_rm_ex esz=2 -USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm +USDOT_zzzz_4s 01000100 10 0 ..... 011 110 ..... ..... @rda_rn_rm_ex esz=2 ### SVE2 floating point matrix multiply accumulate -BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 -FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 -FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 +BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=1 +FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=2 +FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=3 ### SVE2 Memory Gather Load Group @@ -1654,26 +1827,35 @@ FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0 FLOGB 01100101 00 011 esz:2 0101 pg:3 rn:5 rd:5 &rpr_esz ### SVE2 floating-point multiply-add long (vectors) -FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 -FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 -FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0 -FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0 +FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2 +FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2 +FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2 -BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 -BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 +BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2 +BFMLSLB_zzzw 01100100 11 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFMLSLT_zzzw 01100100 11 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2 -### SVE2 floating-point bfloat16 dot-product -BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 +### SVE2 floating-point dot-product +FDOT_zzzz 01100100 00 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 ### SVE2 floating-point multiply-add long (indexed) + FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 + BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 +BFMLSLB_zzxw 01100100 11 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 +BFMLSLT_zzxw 01100100 11 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 -### SVE2 floating-point bfloat16 dot-product (indexed) +### SVE2 floating-point dot-product (indexed) + +FDOT_zzxz 01100100 00 1 ..... 010000 ..... ..... @rrxr_2 esz=2 BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 ### SVE broadcast predicate element @@ -1700,3 +1882,55 @@ PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm + +FCLAMP 01100100 .. 1 ..... 001001 ..... ..... @rda_rn_rm + +### SVE2p1 multi-vec contiguous load + +&zcrr_ldst rd png rn rm esz nreg +&zcri_ldst rd png rn imm esz nreg +%png 10:3 !function=plus_8 +%zd_ax2 1:4 !function=times_2 +%zd_ax4 2:3 !function=times_4 + +LD1_zcrr 10100000000 rm:5 0 esz:2 ... rn:5 .... - \ + &zcrr_ldst %png rd=%zd_ax2 nreg=2 +LD1_zcrr 10100000000 rm:5 1 esz:2 ... rn:5 ... 0- \ + &zcrr_ldst %png rd=%zd_ax4 nreg=4 + +ST1_zcrr 10100000001 rm:5 0 esz:2 ... rn:5 .... - \ + &zcrr_ldst %png rd=%zd_ax2 nreg=2 +ST1_zcrr 10100000001 rm:5 1 esz:2 ... rn:5 ... 0- \ + &zcrr_ldst %png rd=%zd_ax4 nreg=4 + +LD1_zcri 101000000100 imm:s4 0 esz:2 ... rn:5 .... - \ + &zcri_ldst %png rd=%zd_ax2 nreg=2 +LD1_zcri 101000000100 imm:s4 1 esz:2 ... rn:5 ... 0- \ + &zcri_ldst %png rd=%zd_ax4 nreg=4 + +ST1_zcri 101000000110 imm:s4 0 esz:2 ... rn:5 .... - \ + &zcri_ldst %png rd=%zd_ax2 nreg=2 +ST1_zcri 101000000110 imm:s4 1 esz:2 ... rn:5 ... 0- \ + &zcri_ldst %png rd=%zd_ax4 nreg=4 + +# Note: N bit and 0 bit (for nreg4) still mashed in rd. +# This is handled within gen_ldst_c(). +LD1_zcrr_stride 10100001000 rm:5 0 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=2 +LD1_zcrr_stride 10100001000 rm:5 1 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=4 + +ST1_zcrr_stride 10100001001 rm:5 0 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=2 +ST1_zcrr_stride 10100001001 rm:5 1 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=4 + +LD1_zcri_stride 101000010100 imm:s4 0 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=2 +LD1_zcri_stride 101000010100 imm:s4 1 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=4 + +ST1_zcri_stride 101000010110 imm:s4 0 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=2 +ST1_zcri_stride 101000010110 imm:s4 1 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=4 diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index d786b4b..c442fcb 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -20,15 +20,19 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internals.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "tcg/tcg.h" #include "vec_internal.h" #include "sve_ldst_internal.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" #include "accel/tcg/cpu-ops.h" +#include "accel/tcg/probe.h" #ifdef CONFIG_USER_ONLY #include "user/page-protection.h" #endif @@ -119,6 +123,11 @@ static inline uint64_t expand_pred_s(uint8_t byte) return word[byte & 0x11]; } +static inline uint64_t expand_pred_d(uint8_t byte) +{ + return -(uint64_t)(byte & 1); +} + #define LOGICAL_PPPP(NAME, FUNC) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ { \ @@ -202,6 +211,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ #define DO_EOR(N, M) (N ^ M) #define DO_ORR(N, M) (N | M) #define DO_BIC(N, M) (N & ~M) +#define DO_ORC(N, M) (N | ~M) #define DO_ADD(N, M) (N + M) #define DO_SUB(N, M) (N - M) #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) @@ -523,14 +533,9 @@ DO_ZPZZ(sve2_uhsub_zpzz_h, uint16_t, H1_2, DO_HSUB_BHS) DO_ZPZZ(sve2_uhsub_zpzz_s, uint32_t, H1_4, DO_HSUB_BHS) DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D) -static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max) -{ - return val >= max ? max : val <= min ? min : val; -} - -#define DO_SQADD_B(n, m) do_sat_bhs((int64_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SQADD_H(n, m) do_sat_bhs((int64_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SQADD_S(n, m) do_sat_bhs((int64_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SQADD_B(n, m) do_ssat_b((int64_t)n + m) +#define DO_SQADD_H(n, m) do_ssat_h((int64_t)n + m) +#define DO_SQADD_S(n, m) do_ssat_s((int64_t)n + m) static inline int64_t do_sqadd_d(int64_t n, int64_t m) { @@ -547,9 +552,9 @@ DO_ZPZZ(sve2_sqadd_zpzz_h, int16_t, H1_2, DO_SQADD_H) DO_ZPZZ(sve2_sqadd_zpzz_s, int32_t, H1_4, DO_SQADD_S) DO_ZPZZ_D(sve2_sqadd_zpzz_d, int64_t, do_sqadd_d) -#define DO_UQADD_B(n, m) do_sat_bhs((int64_t)n + m, 0, UINT8_MAX) -#define DO_UQADD_H(n, m) do_sat_bhs((int64_t)n + m, 0, UINT16_MAX) -#define DO_UQADD_S(n, m) do_sat_bhs((int64_t)n + m, 0, UINT32_MAX) +#define DO_UQADD_B(n, m) do_usat_b((int64_t)n + m) +#define DO_UQADD_H(n, m) do_usat_h((int64_t)n + m) +#define DO_UQADD_S(n, m) do_usat_s((int64_t)n + m) static inline uint64_t do_uqadd_d(uint64_t n, uint64_t m) { @@ -562,9 +567,9 @@ DO_ZPZZ(sve2_uqadd_zpzz_h, uint16_t, H1_2, DO_UQADD_H) DO_ZPZZ(sve2_uqadd_zpzz_s, uint32_t, H1_4, DO_UQADD_S) DO_ZPZZ_D(sve2_uqadd_zpzz_d, uint64_t, do_uqadd_d) -#define DO_SQSUB_B(n, m) do_sat_bhs((int64_t)n - m, INT8_MIN, INT8_MAX) -#define DO_SQSUB_H(n, m) do_sat_bhs((int64_t)n - m, INT16_MIN, INT16_MAX) -#define DO_SQSUB_S(n, m) do_sat_bhs((int64_t)n - m, INT32_MIN, INT32_MAX) +#define DO_SQSUB_B(n, m) do_ssat_b((int64_t)n - m) +#define DO_SQSUB_H(n, m) do_ssat_h((int64_t)n - m) +#define DO_SQSUB_S(n, m) do_ssat_s((int64_t)n - m) static inline int64_t do_sqsub_d(int64_t n, int64_t m) { @@ -581,9 +586,9 @@ DO_ZPZZ(sve2_sqsub_zpzz_h, int16_t, H1_2, DO_SQSUB_H) DO_ZPZZ(sve2_sqsub_zpzz_s, int32_t, H1_4, DO_SQSUB_S) DO_ZPZZ_D(sve2_sqsub_zpzz_d, int64_t, do_sqsub_d) -#define DO_UQSUB_B(n, m) do_sat_bhs((int64_t)n - m, 0, UINT8_MAX) -#define DO_UQSUB_H(n, m) do_sat_bhs((int64_t)n - m, 0, UINT16_MAX) -#define DO_UQSUB_S(n, m) do_sat_bhs((int64_t)n - m, 0, UINT32_MAX) +#define DO_UQSUB_B(n, m) do_usat_b((int64_t)n - m) +#define DO_UQSUB_H(n, m) do_usat_h((int64_t)n - m) +#define DO_UQSUB_S(n, m) do_usat_s((int64_t)n - m) static inline uint64_t do_uqsub_d(uint64_t n, uint64_t m) { @@ -595,12 +600,9 @@ DO_ZPZZ(sve2_uqsub_zpzz_h, uint16_t, H1_2, DO_UQSUB_H) DO_ZPZZ(sve2_uqsub_zpzz_s, uint32_t, H1_4, DO_UQSUB_S) DO_ZPZZ_D(sve2_uqsub_zpzz_d, uint64_t, do_uqsub_d) -#define DO_SUQADD_B(n, m) \ - do_sat_bhs((int64_t)(int8_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SUQADD_H(n, m) \ - do_sat_bhs((int64_t)(int16_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SUQADD_S(n, m) \ - do_sat_bhs((int64_t)(int32_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SUQADD_B(n, m) do_ssat_b((int64_t)(int8_t)n + m) +#define DO_SUQADD_H(n, m) do_ssat_h((int64_t)(int16_t)n + m) +#define DO_SUQADD_S(n, m) do_ssat_s((int64_t)(int32_t)n + m) static inline int64_t do_suqadd_d(int64_t n, uint64_t m) { @@ -630,12 +632,9 @@ DO_ZPZZ(sve2_suqadd_zpzz_h, uint16_t, H1_2, DO_SUQADD_H) DO_ZPZZ(sve2_suqadd_zpzz_s, uint32_t, H1_4, DO_SUQADD_S) DO_ZPZZ_D(sve2_suqadd_zpzz_d, uint64_t, do_suqadd_d) -#define DO_USQADD_B(n, m) \ - do_sat_bhs((int64_t)n + (int8_t)m, 0, UINT8_MAX) -#define DO_USQADD_H(n, m) \ - do_sat_bhs((int64_t)n + (int16_t)m, 0, UINT16_MAX) -#define DO_USQADD_S(n, m) \ - do_sat_bhs((int64_t)n + (int32_t)m, 0, UINT32_MAX) +#define DO_USQADD_B(n, m) do_usat_b((int64_t)n + (int8_t)m) +#define DO_USQADD_H(n, m) do_usat_h((int64_t)n + (int16_t)m) +#define DO_USQADD_S(n, m) do_usat_s((int64_t)n + (int32_t)m) static inline uint64_t do_usqadd_d(uint64_t n, int64_t m) { @@ -1222,37 +1221,29 @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ } \ } -#define DO_SQXTN_H(n) do_sat_bhs(n, INT8_MIN, INT8_MAX) -#define DO_SQXTN_S(n) do_sat_bhs(n, INT16_MIN, INT16_MAX) -#define DO_SQXTN_D(n) do_sat_bhs(n, INT32_MIN, INT32_MAX) - -DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H) -DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S) -DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D) - -DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H) -DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S) -DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D) +DO_XTNB(sve2_sqxtnb_h, int16_t, do_ssat_b) +DO_XTNB(sve2_sqxtnb_s, int32_t, do_ssat_h) +DO_XTNB(sve2_sqxtnb_d, int64_t, do_ssat_s) -#define DO_UQXTN_H(n) do_sat_bhs(n, 0, UINT8_MAX) -#define DO_UQXTN_S(n) do_sat_bhs(n, 0, UINT16_MAX) -#define DO_UQXTN_D(n) do_sat_bhs(n, 0, UINT32_MAX) +DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, do_ssat_b) +DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, do_ssat_h) +DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, do_ssat_s) -DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H) -DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S) -DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D) +DO_XTNB(sve2_uqxtnb_h, uint16_t, do_usat_b) +DO_XTNB(sve2_uqxtnb_s, uint32_t, do_usat_h) +DO_XTNB(sve2_uqxtnb_d, uint64_t, do_usat_s) -DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D) +DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, do_usat_b) +DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, do_usat_h) +DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, do_usat_s) -DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H) -DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S) -DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D) +DO_XTNB(sve2_sqxtunb_h, int16_t, do_usat_b) +DO_XTNB(sve2_sqxtunb_s, int32_t, do_usat_h) +DO_XTNB(sve2_sqxtunb_d, int64_t, do_usat_s) -DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D) +DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, do_usat_b) +DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, do_usat_h) +DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, do_usat_s) #undef DO_XTNB #undef DO_XTNT @@ -1829,6 +1820,52 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) #undef DO_VPZ #undef DO_VPZ_D +#define DO_VPQ(NAME, TYPE, H, INIT, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + TYPE tmp[16 / sizeof(TYPE)] = { [0 ... 16 / sizeof(TYPE) - 1] = INIT }; \ + TYPE *n = vn; uint16_t *g = vg; \ + uintptr_t oprsz = simd_oprsz(desc); \ + uintptr_t nseg = oprsz / 16, nsegelt = 16 / sizeof(TYPE); \ + for (uintptr_t s = 0; s < nseg; s++) { \ + uint16_t pg = g[H2(s)]; \ + for (uintptr_t e = 0; e < nsegelt; e++, pg >>= sizeof(TYPE)) { \ + if (pg & 1) { \ + tmp[e] = OP(tmp[H(e)], n[s * nsegelt + H(e)]); \ + } \ + } \ + } \ + memcpy(vd, tmp, 16); \ + clear_tail(vd, 16, simd_maxsz(desc)); \ +} + +DO_VPQ(sve2p1_addqv_b, uint8_t, H1, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_h, uint16_t, H2, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_s, uint32_t, H4, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_d, uint64_t, H8, 0, DO_ADD) + +DO_VPQ(sve2p1_smaxqv_b, int8_t, H1, INT8_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_h, int16_t, H2, INT16_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_s, int32_t, H4, INT32_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_d, int64_t, H8, INT64_MIN, DO_MAX) + +DO_VPQ(sve2p1_sminqv_b, int8_t, H1, INT8_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_h, int16_t, H2, INT16_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_s, int32_t, H4, INT32_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_d, int64_t, H8, INT64_MAX, DO_MIN) + +DO_VPQ(sve2p1_umaxqv_b, uint8_t, H1, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_h, uint16_t, H2, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_s, uint32_t, H4, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_d, uint64_t, H8, 0, DO_MAX) + +DO_VPQ(sve2p1_uminqv_b, uint8_t, H1, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_h, uint16_t, H2, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_s, uint32_t, H4, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_d, uint64_t, H8, -1, DO_MIN) + +#undef DO_VPQ + /* Two vector operand, one scalar operand, unpredicated. */ #define DO_ZZI(NAME, TYPE, OP) \ void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ @@ -1869,10 +1906,46 @@ DO_ZZI(sve_umini_d, uint64_t, DO_MIN) #undef DO_ZZI +#define DO_LOGIC_QV(NAME, SUFF, INIT, VOP, POP) \ +void HELPER(NAME ## _ ## SUFF)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + unsigned seg = simd_oprsz(desc) / 16; \ + uint64_t r0 = INIT, r1 = INIT; \ + for (unsigned s = 0; s < seg; s++) { \ + uint64_t p0 = expand_pred_##SUFF(*(uint8_t *)(vg + H1(s * 2))); \ + uint64_t p1 = expand_pred_##SUFF(*(uint8_t *)(vg + H1(s * 2 + 1))); \ + uint64_t v0 = *(uint64_t *)(vn + s * 16); \ + uint64_t v1 = *(uint64_t *)(vn + s * 16 + 8); \ + v0 = POP(v0, p0), v1 = POP(v1, p1); \ + r0 = VOP(r0, v0), r1 = VOP(r1, v1); \ + } \ + *(uint64_t *)(vd + 0) = r0; \ + *(uint64_t *)(vd + 8) = r1; \ + clear_tail(vd, 16, simd_maxsz(desc)); \ +} + +DO_LOGIC_QV(sve2p1_orqv, b, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, h, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, s, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, d, 0, DO_ORR, DO_AND) + +DO_LOGIC_QV(sve2p1_eorqv, b, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, h, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, s, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, d, 0, DO_EOR, DO_AND) + +DO_LOGIC_QV(sve2p1_andqv, b, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, h, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, s, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, d, -1, DO_AND, DO_ORC) + +#undef DO_LOGIC_QV + #undef DO_AND #undef DO_ORR #undef DO_EOR #undef DO_BIC +#undef DO_ORC #undef DO_ADD #undef DO_SUB #undef DO_MAX @@ -2065,27 +2138,6 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ when N is negative, add 2**M-1. */ #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) -static inline uint64_t do_urshr(uint64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else if (sh == 64) { - return x >> 63; - } else { - return 0; - } -} - -static inline int64_t do_srshr(int64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else { - /* Rounding the sign bit always produces 0. */ - return 0; - } -} - DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR) DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR) @@ -2183,10 +2235,9 @@ DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr) DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr) DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr) -#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX) -#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX) -#define DO_SQSHRUN_D(x, sh) \ - do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX) +#define DO_SQSHRUN_H(x, sh) do_usat_b((int64_t)(x) >> sh) +#define DO_SQSHRUN_S(x, sh) do_usat_h((int64_t)(x) >> sh) +#define DO_SQSHRUN_D(x, sh) do_usat_s((int64_t)(x) >> (sh < 64 ? sh : 63)) DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H) DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S) @@ -2196,9 +2247,9 @@ DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H) DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S) DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D) -#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX) -#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX) -#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX) +#define DO_SQRSHRUN_H(x, sh) do_usat_b(do_srshr(x, sh)) +#define DO_SQRSHRUN_S(x, sh) do_usat_h(do_srshr(x, sh)) +#define DO_SQRSHRUN_D(x, sh) do_usat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H) DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S) @@ -2208,9 +2259,9 @@ DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H) DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S) DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D) -#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX) -#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX) -#define DO_SQSHRN_D(x, sh) do_sat_bhs(x >> sh, INT32_MIN, INT32_MAX) +#define DO_SQSHRN_H(x, sh) do_ssat_b(x >> sh) +#define DO_SQSHRN_S(x, sh) do_ssat_h(x >> sh) +#define DO_SQSHRN_D(x, sh) do_ssat_s(x >> sh) DO_SHRNB(sve2_sqshrnb_h, int16_t, uint8_t, DO_SQSHRN_H) DO_SHRNB(sve2_sqshrnb_s, int32_t, uint16_t, DO_SQSHRN_S) @@ -2220,9 +2271,9 @@ DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H) DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S) DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D) -#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX) -#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX) -#define DO_SQRSHRN_D(x, sh) do_sat_bhs(do_srshr(x, sh), INT32_MIN, INT32_MAX) +#define DO_SQRSHRN_H(x, sh) do_ssat_b(do_srshr(x, sh)) +#define DO_SQRSHRN_S(x, sh) do_ssat_h(do_srshr(x, sh)) +#define DO_SQRSHRN_D(x, sh) do_ssat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrnb_h, int16_t, uint8_t, DO_SQRSHRN_H) DO_SHRNB(sve2_sqrshrnb_s, int32_t, uint16_t, DO_SQRSHRN_S) @@ -2984,6 +3035,56 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) } } +/* + * TODO: This could use half_shuffle64 and similar bit tricks to + * expand blocks of bits at once. + */ +#define DO_PMOV_PV(NAME, ESIZE) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned idx = simd_data(desc); \ + unsigned elements = vl / ESIZE; \ + ARMPredicateReg *d = vd; \ + ARMVectorReg *s = vs; \ + memset(d, 0, sizeof(*d)); \ + for (unsigned e = 0; e < elements; ++e) { \ + depositn(d->p, e * ESIZE, 1, extractn(s->d, elements * idx + e, 1)); \ + } \ +} + +DO_PMOV_PV(pmov_pv_h, 2) +DO_PMOV_PV(pmov_pv_s, 4) +DO_PMOV_PV(pmov_pv_d, 8) + +#undef DO_PMOV_PV + +/* + * TODO: This could use half_unshuffle64 and similar bit tricks to + * compress blocks of bits at once. + */ +#define DO_PMOV_VP(NAME, ESIZE) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned idx = simd_data(desc); \ + unsigned elements = vl / ESIZE; \ + ARMVectorReg *d = vd; \ + ARMPredicateReg *s = vs; \ + if (idx == 0) { \ + memset(d, 0, vl); \ + } \ + for (unsigned e = 0; e < elements; ++e) { \ + depositn(d->d, elements * idx + e, 1, extractn(s->p, e * ESIZE, 1)); \ + } \ +} + +DO_PMOV_VP(pmov_vp_h, 2) +DO_PMOV_VP(pmov_vp_s, 4) +DO_PMOV_VP(pmov_vp_d, 8) + +#undef DO_PMOV_VP + typedef void tb_impl_fn(void *, void *, void *, void *, uintptr_t, bool); static inline void do_tbl1(void *vd, void *vn, void *vm, uint32_t desc, @@ -3449,6 +3550,45 @@ DO_UZP(sve_uzp_s, uint32_t, H1_4) DO_UZP(sve_uzp_d, uint64_t, H1_8) DO_UZP(sve2_uzp_q, Int128, ) +typedef void perseg_zzz_fn(void *vd, void *vn, void *vm, uint32_t desc); + +static void do_perseg_zzz(void *vd, void *vn, void *vm, + uint32_t desc, perseg_zzz_fn *fn) +{ + intptr_t oprsz = simd_oprsz(desc); + + desc = simd_desc(16, 16, simd_data(desc)); + for (intptr_t i = 0; i < oprsz; i += 16) { + fn(vd + i, vn + i, vm + i, desc); + } +} + +#define DO_PERSEG_ZZZ(NAME, FUNC) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { do_perseg_zzz(vd, vn, vm, desc, FUNC); } + +DO_PERSEG_ZZZ(sve2p1_uzpq_b, helper_sve_uzp_b) +DO_PERSEG_ZZZ(sve2p1_uzpq_h, helper_sve_uzp_h) +DO_PERSEG_ZZZ(sve2p1_uzpq_s, helper_sve_uzp_s) +DO_PERSEG_ZZZ(sve2p1_uzpq_d, helper_sve_uzp_d) + +DO_PERSEG_ZZZ(sve2p1_zipq_b, helper_sve_zip_b) +DO_PERSEG_ZZZ(sve2p1_zipq_h, helper_sve_zip_h) +DO_PERSEG_ZZZ(sve2p1_zipq_s, helper_sve_zip_s) +DO_PERSEG_ZZZ(sve2p1_zipq_d, helper_sve_zip_d) + +DO_PERSEG_ZZZ(sve2p1_tblq_b, helper_sve_tbl_b) +DO_PERSEG_ZZZ(sve2p1_tblq_h, helper_sve_tbl_h) +DO_PERSEG_ZZZ(sve2p1_tblq_s, helper_sve_tbl_s) +DO_PERSEG_ZZZ(sve2p1_tblq_d, helper_sve_tbl_d) + +DO_PERSEG_ZZZ(sve2p1_tbxq_b, helper_sve2_tbx_b) +DO_PERSEG_ZZZ(sve2p1_tbxq_h, helper_sve2_tbx_h) +DO_PERSEG_ZZZ(sve2p1_tbxq_s, helper_sve2_tbx_s) +DO_PERSEG_ZZZ(sve2p1_tbxq_d, helper_sve2_tbx_d) + +#undef DO_PERSEG_ZZZ + #define DO_TRN(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ { \ @@ -3989,15 +4129,6 @@ static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g, return flags; } -static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) -{ - /* It is quicker to zero the whole predicate than loop on OPRSZ. - * The compiler should turn this into 4 64-bit integer stores. - */ - memset(d, 0, sizeof(ARMPredicateReg)); - return PREDTEST_INIT; -} - void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { @@ -4005,7 +4136,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, true); } else { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } @@ -4016,7 +4147,8 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, true); } else { - return do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } } @@ -4027,7 +4159,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, false); } else { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } @@ -4038,7 +4170,8 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, false); } else { - return do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } } @@ -4094,35 +4227,30 @@ void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) { intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (!last_active_pred(vn, vg, oprsz)) { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } -/* As if PredTest(Ones(PL), D, esz). */ -static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, - uint64_t esz_mask) -{ - uint32_t flags = PREDTEST_INIT; - intptr_t i; - - for (i = 0; i < oprsz / 8; i++) { - flags = iter_predtest_fwd(d->p[i], esz_mask, flags); - } - if (oprsz & 7) { - uint64_t mask = ~(-1ULL << (8 * (oprsz & 7))); - flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags); - } - return flags; -} - uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) { intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { - return predtest_ones(vd, oprsz, -1); - } else { - return do_zero(vd, oprsz); + ARMPredicateReg *d = vd; + uint32_t flags = PREDTEST_INIT; + intptr_t i; + + /* As if PredTest(Ones(PL), D, MO_8). */ + for (i = 0; i < oprsz / 8; i++) { + flags = iter_predtest_fwd(d->p[i], -1, flags); + } + if (oprsz & 7) { + uint64_t mask = ~(-1ULL << (8 * (oprsz & 7))); + flags = iter_predtest_fwd(d->p[i], mask, flags); + } + return flags; } + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) @@ -4139,66 +4267,200 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) return sum; } -uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc) +uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t desc) { - intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); - intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); - uint64_t esz_mask = pred_esz_masks[esz]; - ARMPredicateReg *d = vd; - uint32_t flags; - intptr_t i; + int pl = FIELD_EX32(desc, PREDDESC, OPRSZ); + int vl = pl * 8; + unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ); + int lg2_width = FIELD_EX32(desc, PREDDESC, DATA) + 1; + DecodeCounter p = decode_counter(png, vl, v_esz); + unsigned maxelem = (vl << lg2_width) >> v_esz; + unsigned count = p.count; + + if (p.invert) { + if (count >= maxelem) { + return 0; + } + count = maxelem - count; + } else { + count = MIN(count, maxelem); + } + return count >> p.lg2_stride; +} + +/* C.f. Arm pseudocode EncodePredCount */ +static uint64_t encode_pred_count(uint32_t elements, uint32_t count, + uint32_t esz, bool invert) +{ + uint32_t pred; - /* Begin with a zero predicate register. */ - flags = do_zero(d, oprsz); if (count == 0) { - return flags; + return 0; + } + if (invert) { + count = elements - count; + } else if (count == elements) { + count = 0; + invert = true; } - /* Set all of the requested bits. */ - for (i = 0; i < count / 64; ++i) { - d->p[i] = esz_mask; + pred = (count << 1) | 1; + pred <<= esz; + pred |= invert << 15; + + return pred; +} + +/* C.f. Arm pseudocode PredCountTest */ +static uint32_t pred_count_test(uint32_t elements, uint32_t count, bool invert) +{ + uint32_t flags; + + if (count == 0) { + flags = 1; /* !N, Z, C */ + } else if (!invert) { + flags = (1u << 31) | 2; /* N, !Z */ + flags |= count != elements; /* C */ + } else { + flags = 2; /* !Z, !C */ + flags |= (count == elements) << 31; /* N */ } - if (count & 63) { - d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + return flags; +} + +/* D must be cleared on entry. */ +static void do_whilel(ARMPredicateReg *d, uint64_t esz_mask, + uint32_t count, uint32_t oprbits) +{ + tcg_debug_assert(count <= oprbits); + if (count) { + uint32_t i; + + /* Set all of the requested bits. */ + for (i = 0; i < count / 64; ++i) { + d->p[i] = esz_mask; + } + if (count & 63) { + d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + } } +} + +uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; - return predtest_ones(d, oprsz, esz_mask); + count <<= esz; + memset(d, 0, sizeof(*d)); + do_whilel(d, esz_mask, count, oprbits); + return pred_count_test(oprbits, count, false); } -uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) +uint32_t HELPER(sve_while2l)(void *vd, uint32_t count, uint32_t pred_desc) { - intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); - intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; uint64_t esz_mask = pred_esz_masks[esz]; ARMPredicateReg *d = vd; - intptr_t i, invcount, oprbits; - uint64_t bits; - if (count == 0) { - return do_zero(d, oprsz); + count <<= esz; + memset(d, 0, 2 * sizeof(*d)); + if (count <= oprbits) { + do_whilel(&d[0], esz_mask, count, oprbits); + } else { + do_whilel(&d[0], esz_mask, oprbits, oprbits); + do_whilel(&d[1], esz_mask, count - oprbits, oprbits); } - oprbits = oprsz * 8; + return pred_count_test(2 * oprbits, count, false); +} + +uint32_t HELPER(sve_whilecl)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t pl = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t scale = FIELD_EX32(pred_desc, PREDDESC, DATA); + uint32_t vl = pl * 8; + uint32_t elements = (vl >> esz) << scale; + ARMPredicateReg *d = vd; + + *d = (ARMPredicateReg) { + .p[0] = encode_pred_count(elements, count, esz, false) + }; + return pred_count_test(elements, count, false); +} + +/* D must be cleared on entry. */ +static void do_whileg(ARMPredicateReg *d, uint64_t esz_mask, + uint32_t count, uint32_t oprbits) +{ tcg_debug_assert(count <= oprbits); + if (count) { + uint32_t i, invcount = oprbits - count; + uint64_t bits = esz_mask & MAKE_64BIT_MASK(invcount & 63, 64); - bits = esz_mask; - if (oprbits & 63) { - bits &= MAKE_64BIT_MASK(0, oprbits & 63); + for (i = invcount / 64; i < oprbits / 64; ++i) { + d->p[i] = bits; + bits = esz_mask; + } + if (oprbits & 63) { + d->p[i] = bits & MAKE_64BIT_MASK(0, oprbits & 63); + } } +} - invcount = oprbits - count; - for (i = (oprsz - 1) / 8; i > invcount / 64; --i) { - d->p[i] = bits; - bits = esz_mask; - } +uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; - d->p[i] = bits & MAKE_64BIT_MASK(invcount & 63, 64); + count <<= esz; + memset(d, 0, sizeof(*d)); + do_whileg(d, esz_mask, count, oprbits); + return pred_count_test(oprbits, count, true); +} - while (--i >= 0) { - d->p[i] = 0; +uint32_t HELPER(sve_while2g)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; + + count <<= esz; + memset(d, 0, 2 * sizeof(*d)); + if (count <= oprbits) { + do_whileg(&d[1], esz_mask, count, oprbits); + } else { + do_whilel(&d[1], esz_mask, oprbits, oprbits); + do_whileg(&d[0], esz_mask, count - oprbits, oprbits); } - return predtest_ones(d, oprsz, esz_mask); + return pred_count_test(2 * oprbits, count, true); +} + +uint32_t HELPER(sve_whilecg)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t pl = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t scale = FIELD_EX32(pred_desc, PREDDESC, DATA); + uint32_t vl = pl * 8; + uint32_t elements = (vl >> esz) << scale; + ARMPredicateReg *d = vd; + + *d = (ARMPredicateReg) { + .p[0] = encode_pred_count(elements, count, esz, true) + }; + return pred_count_test(elements, count, true); } /* Recursive reduction on a function; @@ -4209,66 +4471,87 @@ uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) * The recursion is bounded to depth 7 (128 fp16 elements), so there's * little to gain with a more complex non-recursive form. */ -#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ -static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ +#define DO_REDUCE(NAME, SUF, TYPE, H, FUNC, IDENT) \ +static TYPE FUNC##_reduce(TYPE *data, float_status *status, uintptr_t n) \ { \ if (n == 1) { \ return *data; \ } else { \ uintptr_t half = n / 2; \ - TYPE lo = NAME##_reduce(data, status, half); \ - TYPE hi = NAME##_reduce(data + half, status, half); \ + TYPE lo = FUNC##_reduce(data, status, half); \ + TYPE hi = FUNC##_reduce(data + half, status, half); \ return FUNC(lo, hi, status); \ } \ } \ -uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ +uint64_t helper_sve_##NAME##v_##SUF(void *vn, void *vg, \ + float_status *status, uint32_t desc) \ { \ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + TYPE ident = IDENT; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ do { \ TYPE nn = *(TYPE *)(vn + H(i)); \ - *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \ + *(TYPE *)((void *)data + i) = (pg & 1 ? nn : ident); \ i += sizeof(TYPE), pg >>= sizeof(TYPE); \ } while (i & 15); \ } \ for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((void *)data + i) = IDENT; \ + *(TYPE *)((void *)data + i) = ident; \ } \ - return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ + return FUNC##_reduce(data, status, maxsz / sizeof(TYPE)); \ +} \ +void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \ + float_status *status, uint32_t desc) \ +{ \ + unsigned oprsz = simd_oprsz(desc), segments = oprsz / 16; \ + TYPE ident = IDENT; \ + for (unsigned e = 0; e < 16; e += sizeof(TYPE)) { \ + TYPE data[ARM_MAX_VQ]; \ + for (unsigned s = 0; s < segments; s++) { \ + uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \ + TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \ + data[s] = (pg >> e) & 1 ? nn : ident; \ + } \ + *(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \ + } \ + clear_tail(vd, 16, simd_maxsz(desc)); \ } -DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) -DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) -DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) +DO_REDUCE(fadd,h, float16, H1_2, float16_add, float16_zero) +DO_REDUCE(fadd,s, float32, H1_4, float32_add, float32_zero) +DO_REDUCE(fadd,d, float64, H1_8, float64_add, float64_zero) -/* Identity is floatN_default_nan, without the function call. */ -DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) -DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) -DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) +/* + * We can't avoid the function call for the default NaN value, because + * it changes when FPCR.AH is set. + */ +DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, float16_default_nan(status)) +DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, float32_default_nan(status)) +DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, float64_default_nan(status)) -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, float16_default_nan(status)) +DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, float32_default_nan(status)) +DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, float64_default_nan(status)) -DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) -DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) -DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) +DO_REDUCE(fmin,h, float16, H1_2, float16_min, float16_infinity) +DO_REDUCE(fmin,s, float32, H1_4, float32_min, float32_infinity) +DO_REDUCE(fmin,d, float64, H1_8, float64_min, float64_infinity) -DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) -DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) -DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) +DO_REDUCE(fmax,h, float16, H1_2, float16_max, float16_chs(float16_infinity)) +DO_REDUCE(fmax,s, float32, H1_4, float32_max, float32_chs(float32_infinity)) +DO_REDUCE(fmax,d, float64, H1_8, float64_max, float64_chs(float64_infinity)) -DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) -DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) -DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) +DO_REDUCE(ah_fmin,h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) +DO_REDUCE(ah_fmin,s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) +DO_REDUCE(ah_fmin,d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) -DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, +DO_REDUCE(ah_fmax,h, float16, H1_2, helper_vfp_ah_maxh, float16_chs(float16_infinity)) -DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, +DO_REDUCE(ah_fmax,s, float32, H1_4, helper_vfp_ah_maxs, float32_chs(float32_infinity)) -DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, +DO_REDUCE(ah_fmax,d, float64, H1_8, helper_vfp_ah_maxd, float64_chs(float64_infinity)) #undef DO_REDUCE @@ -4351,14 +4634,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ } while (i != 0); \ } +DO_ZPZZ_FP(sve_fadd_b16, uint16_t, H1_2, bfloat16_add) DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add) +DO_ZPZZ_FP(sve_fsub_b16, uint16_t, H1_2, bfloat16_sub) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub) +DO_ZPZZ_FP(sve_fmul_b16, uint16_t, H1_2, bfloat16_mul) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul) @@ -4367,26 +4653,32 @@ DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div) +DO_ZPZZ_FP(sve_fmin_b16, uint16_t, H1_2, bfloat16_min) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min) +DO_ZPZZ_FP(sve_fmax_b16, uint16_t, H1_2, bfloat16_max) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) +DO_ZPZZ_FP(sve_ah_fmin_b16, uint16_t, H1_2, helper_sme2_ah_fmin_b16) DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) +DO_ZPZZ_FP(sve_ah_fmax_b16, uint16_t, H1_2, helper_sme2_ah_fmax_b16) DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) +DO_ZPZZ_FP(sve_fminnum_b16, uint16_t, H1_2, bfloat16_minnum) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) +DO_ZPZZ_FP(sve_fmaxnum_b16, uint16_t, H1_2, bfloat16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum) @@ -4550,7 +4842,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, \ * FZ16. When converting from fp16, this affects flushing input denormals; * when converting to fp16, this affects flushing output denormals. */ -static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) +float32 sve_f16_to_f32(float16 f, float_status *fpst) { bool save = get_flush_inputs_to_zero(fpst); float32 ret; @@ -4572,7 +4864,7 @@ static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) return ret; } -static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) +float16 sve_f32_to_f16(float32 f, float_status *fpst) { bool save = get_flush_to_zero(fpst); float16 ret; @@ -4812,6 +5104,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) #undef DO_ZPZ_FP +static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3, int flags) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)(vm + H1_2(i)); + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + r = bfloat16_muladd(e1, e2, e3, flags, status); + *(uint16_t *)(vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); +} + +void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); +} + +void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); +} + static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3, int flags) @@ -6001,17 +6362,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6023,13 +6381,13 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, #define DO_LD1_1(NAME, ESZ) \ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ sve_##NAME##_host, sve_##NAME##_tlb); \ } \ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ sve_##NAME##_host, sve_##NAME##_tlb); \ @@ -6037,25 +6395,25 @@ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ #define DO_LD1_2(NAME, ESZ, MSZ) \ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } \ void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ @@ -6081,18 +6439,21 @@ DO_LD1_2(ld1sds, MO_64, MO_32) DO_LD1_2(ld1dd, MO_64, MO_64) +DO_LD1_2(ld1squ, MO_128, MO_32) +DO_LD1_2(ld1dqu, MO_128, MO_64) + #undef DO_LD1_1 #undef DO_LD1_2 #define DO_LDN_1(N) \ void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ sve_ld1bb_host, sve_ld1bb_tlb); \ } \ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ sve_ld1bb_host, sve_ld1bb_tlb); \ @@ -6100,25 +6461,25 @@ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ #define DO_LDN_2(N, SUFF, ESZ) \ void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ @@ -6140,6 +6501,10 @@ DO_LDN_2(2, dd, MO_64) DO_LDN_2(3, dd, MO_64) DO_LDN_2(4, dd, MO_64) +DO_LDN_2(2, qq, MO_128) +DO_LDN_2(3, qq, MO_128) +DO_LDN_2(4, qq, MO_128) + #undef DO_LDN_1 #undef DO_LDN_2 @@ -6359,17 +6724,14 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t retaddr, + uint64_t desc, const uintptr_t retaddr, const int esz, const int msz, const SVEContFault fault, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6382,25 +6744,25 @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, #define DO_LDFF1_LDNF1_1(PART, ESZ) \ void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ @@ -6408,49 +6770,49 @@ void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ #define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ @@ -6617,17 +6979,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6639,13 +6998,13 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, #define DO_STN_1(N, NAME, ESZ) \ void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ } \ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ @@ -6653,25 +7012,25 @@ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ #define DO_STN_2(N, NAME, ESZ, MSZ) \ void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } \ void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ @@ -6703,6 +7062,13 @@ DO_STN_2(2, dd, MO_64, MO_64) DO_STN_2(3, dd, MO_64, MO_64) DO_STN_2(4, dd, MO_64, MO_64) +DO_STN_2(1, sq, MO_128, MO_32) +DO_STN_2(1, dq, MO_128, MO_64) + +DO_STN_2(2, qq, MO_128, MO_128) +DO_STN_2(3, qq, MO_128, MO_128) +DO_STN_2(4, qq, MO_128, MO_128) + #undef DO_STN_1 #undef DO_STN_2 @@ -6808,14 +7174,12 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -6829,13 +7193,13 @@ void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -6843,18 +7207,32 @@ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ #define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } +#define DO_LD1_ZPZ_Q(MEM, OFS, MSZ) \ +void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 16, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 16, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + DO_LD1_ZPZ_S(bsu, zsu, MO_8) DO_LD1_ZPZ_S(bsu, zss, MO_8) DO_LD1_ZPZ_D(bdu, zsu, MO_8) @@ -6919,6 +7297,9 @@ DO_LD1_ZPZ_D(dd_be, zsu, MO_64) DO_LD1_ZPZ_D(dd_be, zss, MO_64) DO_LD1_ZPZ_D(dd_be, zd, MO_64) +DO_LD1_ZPZ_Q(qq_le, zd, MO_128) +DO_LD1_ZPZ_Q(qq_be, zd, MO_128) + #undef DO_LD1_ZPZ_S #undef DO_LD1_ZPZ_D @@ -7017,15 +7398,13 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, const int esz, const int msz, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7040,14 +7419,14 @@ void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ldff##MEM##_##OFS) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -7056,14 +7435,14 @@ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ #define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ldff##MEM##_##OFS) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -7222,14 +7601,12 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7243,13 +7620,13 @@ void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } \ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ @@ -7257,18 +7634,32 @@ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ #define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } \ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } +#define DO_ST1_ZPZ_Q(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 16, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 16, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} + DO_ST1_ZPZ_S(bs, zsu, MO_8) DO_ST1_ZPZ_S(hs_le, zsu, MO_16) DO_ST1_ZPZ_S(hs_be, zsu, MO_16) @@ -7305,9 +7696,507 @@ DO_ST1_ZPZ_D(sd_be, zd, MO_32) DO_ST1_ZPZ_D(dd_le, zd, MO_64) DO_ST1_ZPZ_D(dd_be, zd, MO_64) +DO_ST1_ZPZ_Q(qq_le, zd, MO_128) +DO_ST1_ZPZ_Q(qq_be, zd, MO_128) + #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D +/* + * SVE2.1 consecutive register load/store + */ + +static unsigned sve2p1_cont_ldst_elements(SVEContLdSt *info, vaddr addr, + uint32_t png, intptr_t reg_max, + int N, int v_esz) +{ + const int esize = 1 << v_esz; + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; + DecodeCounter p = decode_counter(png, reg_max, v_esz); + unsigned b_count = p.count << v_esz; + unsigned b_stride = 1 << (v_esz + p.lg2_stride); + intptr_t page_split; + + /* Set all of the element indices to -1, and the TLB data to 0. */ + memset(info, -1, offsetof(SVEContLdSt, page)); + memset(info->page, 0, sizeof(info->page)); + + if (p.invert) { + if (b_count >= reg_max * N) { + return 0; + } + reg_off_first = b_count; + reg_off_last = reg_max * N - b_stride; + } else { + if (b_count == 0) { + return 0; + } + reg_off_first = 0; + reg_off_last = MIN(b_count - esize, reg_max * N - b_stride); + } + + info->reg_off_first[0] = reg_off_first; + info->mem_off_first[0] = reg_off_first; + + page_split = -(addr | TARGET_PAGE_MASK); + if (reg_off_last + esize <= page_split || reg_off_first >= page_split) { + /* The entire operation fits within a single page. */ + info->reg_off_last[0] = reg_off_last; + return b_stride; + } + + info->page_split = page_split; + reg_off_split = ROUND_DOWN(page_split, esize); + + /* + * This is the last full element on the first page, but it is not + * necessarily active. If there is no full element, i.e. the first + * active element is the one that's split, this value remains -1. + * It is useful as iteration bounds. + */ + if (reg_off_split != 0) { + info->reg_off_last[0] = ROUND_DOWN(reg_off_split - esize, b_stride); + } + + /* Determine if an unaligned element spans the pages. */ + if (page_split & (esize - 1)) { + /* It is helpful to know if the split element is active. */ + if ((reg_off_split & (b_stride - 1)) == 0) { + info->reg_off_split = reg_off_split; + info->mem_off_split = reg_off_split; + } + reg_off_split += esize; + } + + /* + * We do want the first active element on the second page, because + * this may affect the address reported in an exception. + */ + reg_off_split = ROUND_UP(reg_off_split, b_stride); + if (reg_off_split <= reg_off_last) { + info->reg_off_first[1] = reg_off_split; + info->mem_off_first[1] = reg_off_split; + info->reg_off_last[1] = reg_off_last; + } + return b_stride; +} + +static void sve2p1_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + target_ulong addr, unsigned estride, + int esize, int wp_access, uintptr_t ra) +{ +#ifndef CONFIG_USER_ONLY + intptr_t count_off, count_last; + int flags0 = info->page[0].flags; + int flags1 = info->page[1].flags; + + if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) { + return; + } + + /* Indicate that watchpoints are handled. */ + info->page[0].flags = flags0 & ~TLB_WATCHPOINT; + info->page[1].flags = flags1 & ~TLB_WATCHPOINT; + + if (flags0 & TLB_WATCHPOINT) { + count_off = info->reg_off_first[0]; + count_last = info->reg_off_split; + if (count_last < 0) { + count_last = info->reg_off_last[0]; + } + do { + cpu_check_watchpoint(env_cpu(env), addr + count_off, + esize, info->page[0].attrs, wp_access, ra); + count_off += estride; + } while (count_off <= count_last); + } + + count_off = info->reg_off_first[1]; + if ((flags1 & TLB_WATCHPOINT) && count_off >= 0) { + count_last = info->reg_off_last[1]; + do { + cpu_check_watchpoint(env_cpu(env), addr + count_off, + esize, info->page[1].attrs, + wp_access, ra); + count_off += estride; + } while (count_off <= count_last); + } +#endif +} + +static void sve2p1_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, + target_ulong addr, unsigned estride, + int esize, uint32_t mtedesc, + uintptr_t ra) +{ + intptr_t count_off, count_last; + + /* + * TODO: estride is always a small power of two, <= 8. + * Manipulate the stride within the loops such that + * - first iteration hits addr + off, as required, + * - second iteration hits ALIGN_UP(addr, 16), + * - other iterations advance addr by 16. + * This will minimize the probing to once per MTE granule. + */ + + /* Process the page only if MemAttr == Tagged. */ + if (info->page[0].tagged) { + count_off = info->reg_off_first[0]; + count_last = info->reg_off_split; + if (count_last < 0) { + count_last = info->reg_off_last[0]; + } + + do { + mte_check(env, mtedesc, addr + count_off, ra); + count_off += estride; + } while (count_off <= count_last); + } + + count_off = info->reg_off_first[1]; + if (count_off >= 0 && info->page[1].tagged) { + count_last = info->reg_off_last[1]; + do { + mte_check(env, mtedesc, addr + count_off, ra); + count_off += estride; + } while (count_off <= count_last); + } +} + +static inline QEMU_ALWAYS_INLINE +void sve2p1_ld1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, + uint32_t png, uint64_t desc64, + const uintptr_t ra, const MemOp esz, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; + const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; + const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); + const intptr_t reg_max = simd_oprsz(desc); + const unsigned esize = 1 << esz; + intptr_t count_off, count_last; + intptr_t reg_off, reg_last, reg_n; + SVEContLdSt info; + unsigned estride, flags; + void *host; + + estride = sve2p1_cont_ldst_elements(&info, addr, png, reg_max, N, esz); + if (estride == 0) { + /* The entire predicate was false; no load occurs. */ + for (unsigned n = 0; n < N; n++) { + memset(zd + n * rstride, 0, reg_max); + } + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra); + + /* Handle watchpoints for all active elements. */ + sve2p1_cont_ldst_watchpoints(&info, env, addr, estride, + esize, BP_MEM_READ, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve2p1_cont_ldst_mte_check(&info, env, estride, addr, + esize, mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch[4] = { }; + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[1]; + if (count_last < 0) { + count_last = info.reg_off_split; + if (count_last < 0) { + count_last = info.reg_off_last[0]; + } + } + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + + do { + reg_last = MIN(count_last - count_off, reg_max - esize); + do { + tlb_fn(env, &scratch[reg_n], reg_off, addr + count_off, ra); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + for (unsigned n = 0; n < N; ++n) { + memcpy(&zd[n * rstride], &scratch[n], reg_max); + } + return; + } + + /* The entire operation is in RAM, on valid pages. */ + + for (unsigned n = 0; n < N; ++n) { + memset(&zd[n * rstride], 0, reg_max); + } + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[0]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[0].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + count_off = info.reg_off_split; + if (unlikely(count_off >= 0)) { + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + } + + count_off = info.reg_off_first[1]; + if (unlikely(count_off >= 0)) { + count_last = info.reg_off_last[1]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[1].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + } +} + +void HELPER(sve2p1_ld1bb_c)(CPUARMState *env, void *vd, target_ulong addr, + uint32_t png, uint64_t desc) +{ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), MO_8, + sve_ld1bb_host, sve_ld1bb_tlb); +} + +#define DO_LD1_2(NAME, ESZ) \ +void HELPER(sve2p1_##NAME##_le_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve2p1_##NAME##_be_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_2(ld1hh, MO_16) +DO_LD1_2(ld1ss, MO_32) +DO_LD1_2(ld1dd, MO_64) + +#undef DO_LD1_2 + +static inline QEMU_ALWAYS_INLINE +void sve2p1_st1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, + uint32_t png, uint64_t desc64, + const uintptr_t ra, const int esz, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; + const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; + const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); + const intptr_t reg_max = simd_oprsz(desc); + const unsigned esize = 1 << esz; + intptr_t count_off, count_last; + intptr_t reg_off, reg_last, reg_n; + SVEContLdSt info; + unsigned estride, flags; + void *host; + + estride = sve2p1_cont_ldst_elements(&info, addr, png, reg_max, N, esz); + if (estride == 0) { + /* The entire predicate was false; no store occurs. */ + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra); + + /* Handle watchpoints for all active elements. */ + sve2p1_cont_ldst_watchpoints(&info, env, addr, estride, + esize, BP_MEM_WRITE, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve2p1_cont_ldst_mte_check(&info, env, estride, addr, + esize, mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[1]; + if (count_last < 0) { + count_last = info.reg_off_split; + if (count_last < 0) { + count_last = info.reg_off_last[0]; + } + } + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + + do { + reg_last = MIN(count_last - count_off, reg_max - esize); + do { + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + return; + } + + /* The entire operation is in RAM, on valid pages. */ + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[0]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[0].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + count_off = info.reg_off_split; + if (unlikely(count_off >= 0)) { + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + } + + count_off = info.reg_off_first[1]; + if (unlikely(count_off >= 0)) { + count_last = info.reg_off_last[1]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[1].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + } +} + +void HELPER(sve2p1_st1bb_c)(CPUARMState *env, void *vd, target_ulong addr, + uint32_t png, uint64_t desc) +{ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), MO_8, + sve_st1bb_host, sve_st1bb_tlb); +} + +#define DO_ST1_2(NAME, ESZ) \ +void HELPER(sve2p1_##NAME##_le_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve2p1_##NAME##_be_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_ST1_2(st1hh, MO_16) +DO_ST1_2(st1ss, MO_32) +DO_ST1_2(st1dd, MO_64) + +#undef DO_ST1_2 + void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; @@ -7711,3 +8600,31 @@ DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64) #undef DO_FCVTLT #undef DO_FCVTNT + +void HELPER(pext)(void *vd, uint32_t png, uint32_t desc) +{ + int pl = FIELD_EX32(desc, PREDDESC, OPRSZ); + int vl = pl * 8; + unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ); + int part = FIELD_EX32(desc, PREDDESC, DATA); + DecodeCounter p = decode_counter(png, vl, v_esz); + uint64_t mask = pred_esz_masks[v_esz + p.lg2_stride]; + ARMPredicateReg *d = vd; + + /* + * Convert from element count to byte count and adjust + * for the portion of the 4*VL counter to be extracted. + */ + int b_count = (p.count << v_esz) - vl * part; + + memset(d, 0, sizeof(*d)); + if (p.invert) { + if (b_count <= 0) { + do_whilel(vd, mask, vl, vl); + } else if (b_count < vl) { + do_whileg(vd, mask, vl - b_count, vl); + } + } else if (b_count > 0) { + do_whilel(vd, mask, MIN(b_count, vl), vl); + } +} diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h index 4f159ec..c67cda9 100644 --- a/target/arm/tcg/sve_ldst_internal.h +++ b/target/arm/tcg/sve_ldst_internal.h @@ -20,7 +20,7 @@ #ifndef TARGET_ARM_SVE_LDST_INTERNAL_H #define TARGET_ARM_SVE_LDST_INTERNAL_H -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" /* * Load one element into @vd + @reg_off from @host. @@ -116,6 +116,94 @@ DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) +#define DO_LD_PRIM_3(NAME, FUNC) \ + static inline void sve_##NAME##_host(void *vd, \ + intptr_t reg_off, void *host) \ + { sve_##FUNC##_host(vd, reg_off, host); \ + *(uint64_t *)(vd + reg_off + 8) = 0; } \ + static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ + { sve_##FUNC##_tlb(env, vd, reg_off, addr, ra); \ + *(uint64_t *)(vd + reg_off + 8) = 0; } + +DO_LD_PRIM_3(ld1squ_be, ld1sdu_be) +DO_LD_PRIM_3(ld1squ_le, ld1sdu_le) +DO_LD_PRIM_3(ld1dqu_be, ld1dd_be) +DO_LD_PRIM_3(ld1dqu_le, ld1dd_le) + +#define sve_st1sq_be_host sve_st1sd_be_host +#define sve_st1sq_le_host sve_st1sd_le_host +#define sve_st1sq_be_tlb sve_st1sd_be_tlb +#define sve_st1sq_le_tlb sve_st1sd_le_tlb + +#define sve_st1dq_be_host sve_st1dd_be_host +#define sve_st1dq_le_host sve_st1dd_le_host +#define sve_st1dq_be_tlb sve_st1dd_be_tlb +#define sve_st1dq_le_tlb sve_st1dd_le_tlb + +/* + * The ARMVectorReg elements are stored in host-endian 64-bit units. + * For 128-bit quantities, the sequence defined by the Elem[] pseudocode + * corresponds to storing the two 64-bit pieces in little-endian order. + */ +/* FIXME: Nothing in this file makes any effort at atomicity. */ + +static inline void sve_ld1qq_be_host(void *vd, intptr_t reg_off, void *host) +{ + sve_ld1dd_be_host(vd, reg_off + 8, host); + sve_ld1dd_be_host(vd, reg_off, host + 8); +} + +static inline void sve_ld1qq_le_host(void *vd, intptr_t reg_off, void *host) +{ + sve_ld1dd_le_host(vd, reg_off, host); + sve_ld1dd_le_host(vd, reg_off + 8, host + 8); +} + +static inline void +sve_ld1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_ld1dd_be_tlb(env, vd, reg_off + 8, addr, ra); + sve_ld1dd_be_tlb(env, vd, reg_off, addr + 8, ra); +} + +static inline void +sve_ld1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_ld1dd_le_tlb(env, vd, reg_off, addr, ra); + sve_ld1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra); +} + +static inline void sve_st1qq_be_host(void *vd, intptr_t reg_off, void *host) +{ + sve_st1dd_be_host(vd, reg_off + 8, host); + sve_st1dd_be_host(vd, reg_off, host + 8); +} + +static inline void sve_st1qq_le_host(void *vd, intptr_t reg_off, void *host) +{ + sve_st1dd_le_host(vd, reg_off, host); + sve_st1dd_le_host(vd, reg_off + 8, host + 8); +} + +static inline void +sve_st1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_st1dd_be_tlb(env, vd, reg_off + 8, addr, ra); + sve_st1dd_be_tlb(env, vd, reg_off, addr + 8, ra); +} + +static inline void +sve_st1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_st1dd_le_tlb(env, vd, reg_off, addr, ra); + sve_st1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra); +} + #undef DO_LD_TLB #undef DO_ST_TLB #undef DO_LD_HOST @@ -123,6 +211,7 @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) #undef DO_ST_PRIM_1 #undef DO_LD_PRIM_2 #undef DO_ST_PRIM_2 +#undef DO_LD_PRIM_3 /* * Resolve the guest virtual address to info->host and info->flags. diff --git a/target/arm/tcg/tlb-insns.c b/target/arm/tcg/tlb-insns.c index 630a481..1a0a332 100644 --- a/target/arm/tcg/tlb-insns.c +++ b/target/arm/tcg/tlb-insns.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "exec/cputlb.h" +#include "exec/target_page.h" #include "cpu.h" #include "internals.h" #include "cpu-features.h" @@ -34,7 +35,6 @@ static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } -#ifdef TARGET_AARCH64 /* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */ static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -45,7 +45,6 @@ static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, } return CP_ACCESS_OK; } -#endif /* IS variants of TLB operations must affect all cores */ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -150,7 +149,8 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2); + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS); } static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -160,7 +160,8 @@ static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E2); + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -203,7 +204,7 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2); + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS); } static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -211,7 +212,8 @@ static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2); + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } /* @@ -229,12 +231,16 @@ static int vae1_tlbmask(CPUARMState *env) if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; } else { /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */ mask = ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; + ARMMMUIdxBit_E10_1_GCS | + ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS; } return mask; } @@ -247,13 +253,20 @@ static int vae2_tlbmask(CPUARMState *env) if (hcr & HCR_E2H) { mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; } else { - mask = ARMMMUIdxBit_E2; + mask = ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS; } return mask; } +static int vae3_tlbmask(void) +{ + return ARMMMUIdxBit_E3 | ARMMMUIdxBit_E3_GCS; +} + /* Return 56 if TBI is enabled, 64 otherwise. */ static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, uint64_t addr) @@ -326,9 +339,12 @@ static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, static int e2_tlbmask(CPUARMState *env) { return (ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS | ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2); + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -355,7 +371,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); CPUState *cs = CPU(cpu); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); + tlb_flush_by_mmuidx(cs, vae3_tlbmask()); } static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -381,7 +397,7 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); + tlb_flush_by_mmuidx_all_cpus_synced(cs, vae3_tlbmask()); } static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -412,7 +428,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = CPU(cpu); uint64_t pageaddr = sextract64(value << 12, 0, 56); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); + tlb_flush_page_by_mmuidx(cs, pageaddr, vae3_tlbmask()); } static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -466,7 +482,7 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E3, bits); + vae3_tlbmask(), bits); } static int ipas2e1_tlbmask(CPUARMState *env, int64_t value) @@ -801,7 +817,6 @@ static const ARMCPRegInfo tlbi_el3_cp_reginfo[] = { .writefn = tlbi_aa64_vae3_write }, }; -#ifdef TARGET_AARCH64 typedef struct { uint64_t base; uint64_t length; @@ -965,7 +980,7 @@ static void tlbi_aa64_rvae3_write(CPUARMState *env, * flush-last-level-only. */ - do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); + do_rvae_write(env, value, vae3_tlbmask(), tlb_force_broadcast(env)); } static void tlbi_aa64_rvae3is_write(CPUARMState *env, @@ -979,7 +994,7 @@ static void tlbi_aa64_rvae3is_write(CPUARMState *env, * flush-last-level-only or inner/outer specific flushes. */ - do_rvae_write(env, value, ARMMMUIdxBit_E3, true); + do_rvae_write(env, value, vae3_tlbmask(), true); } static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1269,8 +1284,6 @@ static const ARMCPRegInfo tlbi_rme_reginfo[] = { .writefn = tlbi_aa64_paallos_write }, }; -#endif - void define_tlb_insn_regs(ARMCPU *cpu) { CPUARMState *env = &cpu->env; @@ -1298,7 +1311,6 @@ void define_tlb_insn_regs(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_EL3)) { define_arm_cp_regs(cpu, tlbi_el3_cp_reginfo); } -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_tlbirange, cpu)) { define_arm_cp_regs(cpu, tlbirange_reginfo); } @@ -1308,5 +1320,4 @@ void define_tlb_insn_regs(ARMCPU *cpu) if (cpu_isar_feature(aa64_rme, cpu)) { define_arm_cp_regs(cpu, tlbi_rme_reginfo); } -#endif } diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c index 8841f03..f1983a5 100644 --- a/target/arm/tcg/tlb_helper.c +++ b/target/arm/tcg/tlb_helper.c @@ -9,9 +9,9 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/helper-proto.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" /* * Returns true if the stage 1 translation regime is using LPAE format page @@ -24,13 +24,13 @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) return regime_using_lpae_format(env, mmu_idx); } -static inline uint32_t merge_syn_data_abort(uint32_t template_syn, +static inline uint64_t merge_syn_data_abort(uint32_t template_syn, ARMMMUFaultInfo *fi, unsigned int target_el, bool same_el, bool is_write, - int fsc) + int fsc, bool gcs) { - uint32_t syn; + uint64_t syn; /* * ISV is only set for stage-2 data aborts routed to EL2 and @@ -75,6 +75,11 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn, /* Merge the runtime syndrome with the template syndrome. */ syn |= template_syn; } + + /* Form ISS2 at the top of the syndrome. */ + syn |= (uint64_t)fi->dirtybit << 37; + syn |= (uint64_t)gcs << 40; + return syn; } @@ -176,7 +181,9 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, int target_el = exception_target_el(env); int current_el = arm_current_el(env); bool same_el; - uint32_t syn, exc, fsr, fsc; + uint32_t exc, fsr, fsc; + uint64_t syn; + /* * We know this must be a data or insn abort, and that * env->exception.syndrome contains the template syndrome set @@ -246,9 +253,10 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc); exc = EXCP_PREFETCH_ABORT; } else { + bool gcs = regime_is_gcs(core_to_arm_mmu_idx(env, mmu_idx)); syn = merge_syn_data_abort(env->exception.syndrome, fi, target_el, same_el, access_type == MMU_DATA_STORE, - fsc); + fsc, gcs); if (access_type == MMU_DATA_STORE && arm_feature(env, ARM_FEATURE_V6)) { fsr |= (1 << 11); @@ -277,7 +285,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); } -void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc) +void helper_exception_pc_alignment(CPUARMState *env, vaddr pc) { ARMMMUFaultInfo fi = { .type = ARMFault_Alignment }; int target_el = exception_target_el(env); diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 3901432..918d5ed 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -17,8 +17,7 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" - -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "translate.h" #include "translate-a64.h" #include "qemu/log.h" @@ -27,6 +26,7 @@ #include "cpregs.h" static TCGv_i64 cpu_X[32]; +static TCGv_i64 cpu_gcspr[4]; static TCGv_i64 cpu_pc; /* Load/store exclusive handling */ @@ -78,6 +78,10 @@ static int scale_by_log2_tag_granule(DisasContext *s, int x) /* initialize TCG globals. */ void a64_translate_init(void) { + static const char gcspr_names[4][12] = { + "gcspr_el0", "gcspr_el1", "gcspr_el2", "gcspr_el3" + }; + int i; cpu_pc = tcg_global_mem_new_i64(tcg_env, @@ -91,10 +95,17 @@ void a64_translate_init(void) cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, exclusive_high), "exclusive_high"); + + for (i = 0; i < 4; i++) { + cpu_gcspr[i] = + tcg_global_mem_new_i64(tcg_env, + offsetof(CPUARMState, cp15.gcspr_el[i]), + gcspr_names[i]); + } } /* - * Return the core mmu_idx to use for A64 load/store insns which + * Return the full arm mmu_idx to use for A64 load/store insns which * have a "unprivileged load/store" variant. Those insns access * EL0 if executed from an EL which has control over EL0 (usually * EL1) but behave like normal loads and stores if executed from @@ -104,7 +115,7 @@ void a64_translate_init(void) * normal encoding (in which case we will return the same * thing as get_mem_index(). */ -static int get_a64_user_mem_index(DisasContext *s, bool unpriv) +static ARMMMUIdx full_a64_user_mem_index(DisasContext *s, bool unpriv) { /* * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, @@ -131,7 +142,19 @@ static int get_a64_user_mem_index(DisasContext *s, bool unpriv) g_assert_not_reached(); } } - return arm_to_core_mmu_idx(useridx); + return useridx; +} + +/* Return the core mmu_idx per above. */ +static int core_a64_user_mem_index(DisasContext *s, bool unpriv) +{ + return arm_to_core_mmu_idx(full_a64_user_mem_index(s, unpriv)); +} + +/* For a given translation regime, return the core mmu_idx for gcs access. */ +static int core_gcs_mem_index(ARMMMUIdx armidx) +{ + return arm_to_core_mmu_idx(regime_to_gcs(armidx)); } static void set_btype_raw(int val) @@ -409,6 +432,39 @@ static MemOp check_ordered_align(DisasContext *s, int rn, int imm, return finalize_memop(s, mop); } +static void gen_add_gcs_record(DisasContext *s, TCGv_i64 value) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + + tcg_gen_addi_i64(addr, gcspr, -8); + tcg_gen_qemu_st_i64(value, clean_data_tbi(s, addr), mmuidx, mop); + tcg_gen_mov_i64(gcspr, addr); +} + +static void gen_load_check_gcs_record(DisasContext *s, TCGv_i64 target, + GCSInstructionType it, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 rec_va = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(rec_va, clean_data_tbi(s, gcspr), mmuidx, mop); + + if (s->gcs_rvcen) { + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(it, rt)); + + tcg_gen_brcond_i64(TCG_COND_NE, rec_va, target, fail_label); + } + + gen_a64_set_pc(s, rec_va); + tcg_gen_addi_i64(gcspr, gcspr, 8); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -434,12 +490,6 @@ static void gen_rebuild_hflags(DisasContext *s) gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); } -static void gen_exception_internal(int excp) -{ - assert(excp_is_internal(excp)); - gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); -} - static void gen_exception_internal_insn(DisasContext *s, int excp) { gen_a64_update_pc(s, 0); @@ -1076,11 +1126,9 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i64 cf_64 = tcg_temp_new_i64(); TCGv_i64 vf_64 = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_extu_i32_i64(cf_64, cpu_CF); - tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); - tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); + tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); @@ -1094,12 +1142,10 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 t0_32 = tcg_temp_new_i32(); TCGv_i32 t1_32 = tcg_temp_new_i32(); TCGv_i32 tmp = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_extrl_i64_i32(t0_32, t0); tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); @@ -1392,11 +1438,8 @@ static bool fp_access_check_only(DisasContext *s) return true; } -static bool fp_access_check(DisasContext *s) +static bool nonstreaming_check(DisasContext *s) { - if (!fp_access_check_only(s)) { - return false; - } if (s->sme_trap_nonstreaming && s->is_nonstreaming) { gen_exception_insn(s, 0, EXCP_UDEF, syn_smetrap(SME_ET_Streaming, false)); @@ -1405,6 +1448,11 @@ static bool fp_access_check(DisasContext *s) return true; } +static bool fp_access_check(DisasContext *s) +{ + return fp_access_check_only(s) && nonstreaming_check(s); +} + /* * Return <0 for non-supported element sizes, with MO_16 controlled by * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. @@ -1455,14 +1503,24 @@ static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) */ bool sve_access_check(DisasContext *s) { - if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { + if (dc_isar_feature(aa64_sme, s)) { bool ret; - assert(dc_isar_feature(aa64_sme, s)); - ret = sme_sm_enabled_check(s); + if (s->pstate_sm) { + ret = sme_enabled_check(s); + } else if (dc_isar_feature(aa64_sve, s)) { + goto continue_sve; + } else { + ret = sme_sm_enabled_check(s); + } + if (ret) { + ret = nonstreaming_check(s); + } s->sve_access_checked = (ret ? 1 : -1); return ret; } + + continue_sve: if (s->sve_excp_el) { /* Assert that we only raise one exception per instruction. */ assert(!s->sve_access_checked); @@ -1499,7 +1557,8 @@ bool sme_enabled_check(DisasContext *s) * to be zero when fp_excp_el has priority. This is because we need * sme_excp_el by itself for cpregs access checks. */ - if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { + if (s->sme_excp_el + && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) { bool ret = sme_access_check(s); s->fp_access_checked = (ret ? 1 : -1); return ret; @@ -1640,7 +1699,14 @@ static bool trans_B(DisasContext *s, arg_i *a) static bool trans_BL(DisasContext *s, arg_i *a) { - gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); + TCGv_i64 link = tcg_temp_new_i64(); + + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); + } + tcg_gen_mov_i64(cpu_reg(s, 30), link); + reset_btype(s); gen_goto_tb(s, 0, a->imm); return true; @@ -1737,15 +1803,15 @@ static bool trans_BR(DisasContext *s, arg_r *a) static bool trans_BLR(DisasContext *s, arg_r *a) { - TCGv_i64 dst = cpu_reg(s, a->rn); - TCGv_i64 lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + TCGv_i64 link = tcg_temp_new_i64(); + + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); - gen_a64_set_pc(s, dst); + gen_a64_set_pc(s, cpu_reg(s, a->rn)); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1753,7 +1819,13 @@ static bool trans_BLR(DisasContext *s, arg_r *a) static bool trans_RET(DisasContext *s, arg_r *a) { - gen_a64_set_pc(s, cpu_reg(s, a->rn)); + TCGv_i64 target = cpu_reg(s, a->rn); + + if (s->gcs_en) { + gen_load_check_gcs_record(s, target, GCS_IT_RET_nPauth, a->rn); + } else { + gen_a64_set_pc(s, target); + } s->base.is_jmp = DISAS_JUMP; return true; } @@ -1797,21 +1869,21 @@ static bool trans_BRAZ(DisasContext *s, arg_braz *a) static bool trans_BLRAZ(DisasContext *s, arg_braz *a) { - TCGv_i64 dst, lr; + TCGv_i64 dst, link; if (!dc_isar_feature(aa64_pauth, s)) { return false; } - dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); - lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + + link = tcg_temp_new_i64(); + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); gen_a64_set_pc(s, dst); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1821,8 +1893,17 @@ static bool trans_RETA(DisasContext *s, arg_reta *a) { TCGv_i64 dst; + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); - gen_a64_set_pc(s, dst); + if (s->gcs_en) { + GCSInstructionType it = a->m ? GCS_IT_RET_PauthB : GCS_IT_RET_PauthA; + gen_load_check_gcs_record(s, dst, it, 30); + } else { + gen_a64_set_pc(s, dst); + } s->base.is_jmp = DISAS_JUMP; return true; } @@ -1843,20 +1924,21 @@ static bool trans_BRA(DisasContext *s, arg_bra *a) static bool trans_BLRA(DisasContext *s, arg_bra *a) { - TCGv_i64 dst, lr; + TCGv_i64 dst, link; if (!dc_isar_feature(aa64_pauth, s)) { return false; } dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); - lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + + link = tcg_temp_new_i64(); + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); gen_a64_set_pc(s, dst); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1864,6 +1946,9 @@ static bool trans_BLRA(DisasContext *s, arg_bra *a) static bool trans_ERET(DisasContext *s, arg_ERET *a) { +#ifdef CONFIG_USER_ONLY + return false; +#else TCGv_i64 dst; if (s->current_el == 0) { @@ -1883,10 +1968,14 @@ static bool trans_ERET(DisasContext *s, arg_ERET *a) /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; return true; +#endif } static bool trans_ERETA(DisasContext *s, arg_reta *a) { +#ifdef CONFIG_USER_ONLY + return false; +#else TCGv_i64 dst; if (!dc_isar_feature(aa64_pauth, s)) { @@ -1912,6 +2001,7 @@ static bool trans_ERETA(DisasContext *s, arg_reta *a) /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; return true; +#endif } static bool trans_NOP(DisasContext *s, arg_NOP *a) @@ -2054,6 +2144,14 @@ static bool trans_ESB(DisasContext *s, arg_ESB *a) return true; } +static bool trans_GCSB(DisasContext *s, arg_GCSB *a) +{ + if (dc_isar_feature(aa64_gcs, s)) { + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + } + return true; +} + static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) { if (s->pauth_active) { @@ -2118,6 +2216,20 @@ static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) return true; } +static bool trans_CHKFEAT(DisasContext *s, arg_CHKFEAT *a) +{ + uint64_t feat_en = 0; + + if (s->gcs_en) { + feat_en |= 1 << 0; + } + if (feat_en) { + TCGv_i64 x16 = cpu_reg(s, 16); + tcg_gen_andi_i64(x16, x16, ~feat_en); + } + return true; +} + static bool trans_CLREX(DisasContext *s, arg_CLREX *a) { tcg_gen_movi_i64(cpu_exclusive_addr, -1); @@ -2449,6 +2561,195 @@ static void gen_sysreg_undef(DisasContext *s, bool isread, gen_exception_insn(s, 0, EXCP_UDEF, syndrome); } +static void gen_gcspopm(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 value = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPM, rt)); + + /* The value at top-of-stack must have low 2 bits clear. */ + tcg_gen_qemu_ld_i64(value, clean_data_tbi(s, gcspr), mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_TSTNE, value, 3, fail_label); + + /* Complete the pop and return the value. */ + tcg_gen_addi_i64(gcspr, gcspr, 8); + tcg_gen_mov_i64(cpu_reg(s, rt), value); +} + +static void gen_gcspushx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int spsr_idx = aarch64_banked_spsr_index(s->current_el); + int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); + int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addr, gcspr, -8); + tcg_gen_qemu_st_i64(cpu_reg(s, 30), addr, mmuidx, mop); + + tcg_gen_ld_i64(tmp, tcg_env, spsr_off); + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); + + tcg_gen_ld_i64(tmp, tcg_env, elr_off); + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); + + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tcg_constant_i64(0b1001), addr, mmuidx, mop); + + tcg_gen_mov_i64(gcspr, addr); + clear_pstate_bits(PSTATE_EXLOCK); +} + +static void gen_gcspopcx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int spsr_idx = aarch64_banked_spsr_index(s->current_el); + int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); + int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); + int gcscr_off = offsetof(CPUARMState, cp15.gcscr_el[s->current_el]); + int pstate_off = offsetof(CPUARMState, pstate); + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp1 = tcg_temp_new_i64(); + TCGv_i64 tmp2 = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPCX, 31)); + + /* The value at top-of-stack must be an exception token. */ + tcg_gen_qemu_ld_i64(tmp1, gcspr, mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp1, 0b1001, fail_label); + + /* Validate in turn, ELR ... */ + tcg_gen_addi_i64(addr, gcspr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_ld_i64(tmp2, tcg_env, elr_off); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); + + /* ... SPSR ... */ + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_ld_i64(tmp2, tcg_env, spsr_off); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); + + /* ... and LR. */ + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, cpu_reg(s, 30), fail_label); + + /* Writeback stack pointer after pop. */ + tcg_gen_addi_i64(gcspr, addr, 8); + + /* PSTATE.EXLOCK = GetCurrentEXLOCKEN(). */ + tcg_gen_ld_i64(tmp1, tcg_env, gcscr_off); + tcg_gen_ld_i64(tmp2, tcg_env, pstate_off); + tcg_gen_shri_i64(tmp1, tmp1, ctz64(GCSCR_EXLOCKEN)); + tcg_gen_deposit_i64(tmp2, tmp2, tmp1, ctz64(PSTATE_EXLOCK), 1); + tcg_gen_st_i64(tmp2, tcg_env, pstate_off); +} + +static void gen_gcspopx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPX, 31)); + + /* The value at top-of-stack must be an exception token. */ + tcg_gen_qemu_ld_i64(tmp, gcspr, mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0b1001, fail_label); + + /* + * The other three values in the exception return record + * are ignored, but are loaded anyway to raise faults. + */ + tcg_gen_addi_i64(addr, gcspr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(gcspr, addr, 8); +} + +static void gen_gcsss1(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 inptr = cpu_reg(s, rt); + TCGv_i64 cmp = tcg_temp_new_i64(); + TCGv_i64 new = tcg_temp_new_i64(); + TCGv_i64 old = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS1, rt)); + + /* Compute the valid cap entry that the new stack must have. */ + tcg_gen_deposit_i64(cmp, inptr, tcg_constant_i64(1), 0, 12); + /* Compute the in-progress cap entry for the old stack. */ + tcg_gen_deposit_i64(new, gcspr, tcg_constant_i64(5), 0, 3); + + /* Swap the valid cap the with the in-progress cap. */ + tcg_gen_atomic_cmpxchg_i64(old, inptr, cmp, new, mmuidx, mop); + tcg_gen_brcond_i64(TCG_COND_NE, old, cmp, fail_label); + + /* The new stack had a valid cap: change gcspr. */ + tcg_gen_andi_i64(gcspr, inptr, ~7); +} + +static void gen_gcsss2(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 outptr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS2, rt)); + + /* Validate that the new stack has an in-progress cap. */ + tcg_gen_qemu_ld_i64(outptr, gcspr, mmuidx, mop); + tcg_gen_andi_i64(tmp, outptr, 7); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 5, fail_label); + + /* Push a valid cap to the old stack. */ + tcg_gen_andi_i64(outptr, outptr, ~7); + tcg_gen_addi_i64(outptr, outptr, -8); + tcg_gen_deposit_i64(tmp, outptr, tcg_constant_i64(1), 0, 12); + tcg_gen_qemu_st_i64(tmp, outptr, mmuidx, mop); + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + + /* Pop the in-progress cap from the new stack. */ + tcg_gen_addi_i64(gcspr, gcspr, 8); + + /* Return a pointer to the old stack cap. */ + tcg_gen_mov_i64(cpu_reg(s, rt), outptr); +} + +/* + * Look up @key, returning the cpreg, which must exist. + * Additionally, the new cpreg must also be accessible. + */ +static const ARMCPRegInfo * +redirect_cpreg(DisasContext *s, uint32_t key, bool isread) +{ + const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); + assert(ri); + assert(cp_access_ok(s->current_el, ri, isread)); + return ri; +} + /* MRS - move from system register * MSR (register) - move to system register * SYS @@ -2460,8 +2761,7 @@ static void handle_sys(DisasContext *s, bool isread, unsigned int op0, unsigned int op1, unsigned int op2, unsigned int crn, unsigned int crm, unsigned int rt) { - uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, op1, op2); + uint32_t key = ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2); const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); bool need_exit_tb = false; bool nv_trap_to_el2 = false; @@ -2555,6 +2855,27 @@ static void handle_sys(DisasContext *s, bool isread, } } + if (ri->vhe_redir_to_el2 && s->current_el == 2 && s->e2h) { + /* + * This one of the FOO_EL1 registers which redirect to FOO_EL2 + * from EL2 when HCR_EL2.E2H is set. + */ + key = ri->vhe_redir_to_el2; + ri = redirect_cpreg(s, key, isread); + } else if (ri->vhe_redir_to_el01 && s->current_el >= 2) { + /* + * This is one of the FOO_EL12 or FOO_EL02 registers. + * With !E2H, they all UNDEF. + * With E2H, from EL2 or EL3, they redirect to FOO_EL1/FOO_EL0. + */ + if (!s->e2h) { + gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); + return; + } + key = ri->vhe_redir_to_el01; + ri = redirect_cpreg(s, key, isread); + } + if (ri->accessfn || (ri->fgt && s->fgt_active)) { /* Emit code to perform further access permissions checks at * runtime; this may result in an exception. @@ -2597,11 +2918,8 @@ static void handle_sys(DisasContext *s, bool isread, * We don't use the EL1 register's access function, and * fine-grained-traps on EL1 also do not apply here. */ - key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, 0, op2); - ri = get_arm_cp_reginfo(s->cp_regs, key); - assert(ri); - assert(cp_access_ok(s->current_el, ri, isread)); + key = ENCODE_AA64_CP_REG(op0, 0, crn, crm, op2); + ri = redirect_cpreg(s, key, isread); /* * We might not have done an update_pc earlier, so check we don't * need it. We could support this in future if necessary. @@ -2725,6 +3043,51 @@ static void handle_sys(DisasContext *s, bool isread, } } return; + case ARM_CP_GCSPUSHM: + if (s->gcs_en) { + gen_add_gcs_record(s, cpu_reg(s, rt)); + } + return; + case ARM_CP_GCSPOPM: + /* Note that X[rt] is unchanged if !GCSEnabled. */ + if (s->gcs_en) { + gen_gcspopm(s, rt); + } + return; + case ARM_CP_GCSPUSHX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspushx(s); + } + return; + case ARM_CP_GCSPOPCX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspopcx(s); + } + return; + case ARM_CP_GCSPOPX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspopx(s); + } + return; + case ARM_CP_GCSSS1: + if (s->gcs_en) { + gen_gcsss1(s, rt); + } + return; + case ARM_CP_GCSSS2: + if (s->gcs_en) { + gen_gcsss2(s, rt); + } + return; default: g_assert_not_reached(); } @@ -3231,7 +3594,7 @@ static bool trans_LDXP(DisasContext *s, arg_stxr *a) static bool trans_CASP(DisasContext *s, arg_CASP *a) { - if (!dc_isar_feature(aa64_atomics, s)) { + if (!dc_isar_feature(aa64_lse, s)) { return false; } if (((a->rt | a->rs) & 1) != 0) { @@ -3244,7 +3607,7 @@ static bool trans_CASP(DisasContext *s, arg_CASP *a) static bool trans_CAS(DisasContext *s, arg_CAS *a) { - if (!dc_isar_feature(aa64_atomics, s)) { + if (!dc_isar_feature(aa64_lse, s)) { return false; } gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); @@ -3519,7 +3882,7 @@ static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, if (!a->p) { tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } - memidx = get_a64_user_mem_index(s, a->unpriv); + memidx = core_a64_user_mem_index(s, a->unpriv); *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, a->w || a->rn != 31, mop, a->unpriv, memidx); @@ -3540,7 +3903,7 @@ static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = get_a64_user_mem_index(s, a->unpriv); + int memidx = core_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); @@ -3558,7 +3921,7 @@ static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = get_a64_user_mem_index(s, a->unpriv); + int memidx = core_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); @@ -3737,15 +4100,64 @@ static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, return true; } -TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) -TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) -TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) -TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) -TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) -TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) -TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) -TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) -TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) +TRANS_FEAT(LDADD, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) +TRANS_FEAT(LDCLR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) +TRANS_FEAT(LDEOR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) +TRANS_FEAT(LDSET, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) +TRANS_FEAT(LDSMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) +TRANS_FEAT(LDSMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) +TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) +TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) +TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) + +typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp); + +static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a, + Atomic128ThreeOpFn *fn, bool invert) +{ + MemOp mop; + int rlo, rhi; + TCGv_i64 clean_addr, tlo, thi; + TCGv_i128 t16; + + if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) { + return false; + } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, MO_128); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + + rlo = (s->be_data == MO_LE ? a->rt : a->rt2); + rhi = (s->be_data == MO_LE ? a->rt2 : a->rt); + + tlo = read_cpu_reg(s, rlo, true); + thi = read_cpu_reg(s, rhi, true); + if (invert) { + tcg_gen_not_i64(tlo, tlo); + tcg_gen_not_i64(thi, thi); + } + /* + * The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + t16 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(t16, tlo, thi); + + fn(t16, clean_addr, t16, get_mem_index(s), mop); + + tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16); + return true; +} + +TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_and_i128, true) +TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_or_i128, false) +TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_xchg_i128, false) static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) { @@ -3753,7 +4165,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) TCGv_i64 clean_addr; MemOp mop; - if (!dc_isar_feature(aa64_atomics, s) || + if (!dc_isar_feature(aa64_lse, s) || !dc_isar_feature(aa64_rcpc_8_3, s)) { return false; } @@ -3876,6 +4288,42 @@ static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) return true; } +static bool trans_GCSSTR(DisasContext *s, arg_GCSSTR *a) +{ + ARMMMUIdx armidx; + + if (!dc_isar_feature(aa64_gcs, s)) { + return false; + } + + /* + * The pseudocode for GCSSTTR is + * + * effective_el = AArch64.IsUnprivAccessPriv() ? PSTATE.EL : EL0; + * if (effective_el == PSTATE.EL) CheckGCSSTREnabled(); + * + * We have cached the result of IsUnprivAccessPriv in DisasContext, + * but since we need the result of full_a64_user_mem_index anyway, + * use the mmu_idx test as a proxy for the effective_el test. + */ + armidx = full_a64_user_mem_index(s, a->unpriv); + if (armidx == s->mmu_idx && s->gcsstr_el != 0) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_gcs_gcsstr(a->rn, a->rt), + s->gcsstr_el); + return true; + } + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_qemu_st_i64(cpu_reg(s, a->rt), + clean_data_tbi(s, cpu_reg_sp(s, a->rn)), + core_gcs_mem_index(armidx), + finalize_memop(s, MO_64 | MO_ALIGN)); + return true; +} + static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) { TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; @@ -4407,7 +4855,7 @@ static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, return false; } - memidx = get_a64_user_mem_index(s, a->unpriv); + memidx = core_a64_user_mem_index(s, a->unpriv); /* * We pass option_a == true, matching our implementation; @@ -4461,8 +4909,8 @@ static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) return false; } - rmemidx = get_a64_user_mem_index(s, runpriv); - wmemidx = get_a64_user_mem_index(s, wunpriv); + rmemidx = core_a64_user_mem_index(s, runpriv); + wmemidx = core_a64_user_mem_index(s, wunpriv); /* * We pass option_a == true, matching our implementation; @@ -4547,6 +4995,50 @@ TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) /* + * Min/Max (immediate) + */ + +static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(t1, n); + tcg_gen_extrl_i64_i32(t2, m); + fn(t1, t1, t2); + tcg_gen_extu_i32_i64(d, t1); +} + +static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_smax_i32); +} + +static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_smin_i32); +} + +static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_umax_i32); +} + +static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_umin_i32); +} + +TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) +TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) +TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) +TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) + +/* * Add/subtract (immediate, with tags) */ @@ -6108,9 +6600,9 @@ static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, return true; } -TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) -TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) -TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) +TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b) +TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b) TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) @@ -6870,12 +7362,12 @@ static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, return true; } -TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) -TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) +TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b) +TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b) TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, - gen_helper_gvec_sudot_idx_b) + gen_helper_gvec_sudot_idx_4b) TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, - gen_helper_gvec_usdot_idx_b) + gen_helper_gvec_usdot_idx_4b) TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, gen_helper_gvec_bfdot_idx) @@ -8151,6 +8643,28 @@ static bool trans_PACGA(DisasContext *s, arg_rrr *a) return false; } +static bool gen_rrr(DisasContext *s, arg_rrr_sf *a, ArithTwoOp fn) +{ + TCGv_i64 tcg_rm = cpu_reg(s, a->rm); + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + + fn(tcg_rd, tcg_rn, tcg_rm); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + return true; +} + +TRANS_FEAT(SMAX, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) +TRANS_FEAT(SMIN, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) +TRANS_FEAT(UMAX, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) +TRANS_FEAT(UMIN, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) + typedef void ArithOneOp(TCGv_i64, TCGv_i64); static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) @@ -8159,13 +8673,22 @@ static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) return true; } -static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +/* + * Perform 32-bit operation fn on the low half of n; + * the high half of the output is zeroed. + */ +static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn) { - TCGv_i32 t32 = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t32, tcg_rn); - gen_helper_rbit(t32, t32); - tcg_gen_extu_i32_i64(tcg_rd, t32); + tcg_gen_extrl_i64_i32(t, n); + fn(t, t); + tcg_gen_extu_i32_i64(d, t); +} + +static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, gen_helper_rbit); } static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) @@ -8221,15 +8744,42 @@ static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_clrsb_i32); +} + +TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) +TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) + +static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ TCGv_i32 t32 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t32, tcg_rn); - tcg_gen_clrsb_i32(t32, t32); + tcg_gen_ctzi_i32(t32, t32, 32); tcg_gen_extu_i32_i64(tcg_rd, t32); } -TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) -TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) +static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64); +} + +static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_ctpop_i32); +} + +static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_abs_i32); +} + +TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? gen_ctz64 : gen_ctz32) +TRANS_FEAT(CNT, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? tcg_gen_ctpop_i64 : gen_cnt32) +TRANS_FEAT(ABS, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? tcg_gen_abs_i64 : gen_abs32) static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) { @@ -8600,7 +9150,7 @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a) tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); nzcv = a->nzcv; - has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); + has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); if (nzcv & 8) { /* N */ tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); } else { @@ -10133,8 +10683,10 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); + dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL); dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; + dc->max_svl = arm_cpu->sme_max_vq * 16; dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); dc->bt = EX_TBFLAG_A64(tb_flags, BT); dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); @@ -10147,13 +10699,17 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); dc->naa = EX_TBFLAG_A64(tb_flags, NAA); + dc->e2h = EX_TBFLAG_A64(tb_flags, E2H); dc->nv = EX_TBFLAG_A64(tb_flags, NV); dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); - dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); + dc->nv2_mem_e20 = dc->nv2 && dc->e2h; dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); + dc->gcs_en = EX_TBFLAG_A64(tb_flags, GCS_EN); + dc->gcs_rvcen = EX_TBFLAG_A64(tb_flags, GCS_RVCEN); + dc->gcsstr_el = EX_TBFLAG_A64(tb_flags, GCSSTR_EL); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -10247,7 +10803,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * start of the TB. */ assert(s->base.num_insns == 1); - gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); s->base.is_jmp = DISAS_NORETURN; s->base.pc_next = QEMU_ALIGN_UP(pc, 4); return; @@ -10380,6 +10936,8 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; } } + + emit_delayed_exceptions(dc); } const TranslatorOps aarch64_translator_ops = { diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index b2420f5..9c45f89 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -28,7 +28,7 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, bool sve_access_check(DisasContext *s); bool sme_enabled_check(DisasContext *s); bool sme_enabled_check_with_svcr(DisasContext *s, unsigned); -uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, +uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, uint32_t msz, bool is_write, uint32_t data); /* This function corresponds to CheckStreamingSVEEnabled. */ @@ -225,7 +225,13 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); -void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); -void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); +void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, + int len, int rn, int imm, MemOp align); +void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, + int len, int rn, int imm, MemOp align); #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index c4fecb8..844d2e2 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -271,7 +271,7 @@ static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_sdot_b); + gen_helper_gvec_sdot_4b); } static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) @@ -280,7 +280,7 @@ static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_udot_b); + gen_helper_gvec_udot_4b); } static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) @@ -289,7 +289,7 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_usdot_b); + gen_helper_gvec_usdot_4b); } static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) @@ -356,7 +356,7 @@ static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_sdot_idx_b); + gen_helper_gvec_sdot_idx_4b); } static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) @@ -365,7 +365,7 @@ static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_udot_idx_b); + gen_helper_gvec_udot_idx_4b); } static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) @@ -374,7 +374,7 @@ static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_usdot_idx_b); + gen_helper_gvec_usdot_idx_4b); } static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) @@ -383,7 +383,7 @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_sudot_idx_b); + gen_helper_gvec_sudot_idx_4b); } static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) @@ -1010,8 +1010,8 @@ DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) -DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) -DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) +DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_nf_s, gen_helper_gvec_fmla_nf_h) +DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_nf_s, gen_helper_gvec_fmls_nf_h) DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index fcbb350..091c56d 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -27,16 +27,25 @@ #include "decode-sme.c.inc" +static bool sme2_zt0_enabled_check(DisasContext *s) +{ + if (!sme_za_enabled_check(s)) { + return false; + } + if (s->zt0_excp_el) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_InaccessibleZT0, false), + s->zt0_excp_el); + return false; + } + return true; +} -/* - * Resolve tile.size[index] to a host pointer, where tile and index - * are always decoded together, dependent on the element size. - */ +/* Resolve tile.size[rs+imm] to a host pointer. */ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, - int tile_index, bool vertical) + int tile, int imm, int div_len, + int vec_mod, bool vertical) { - int tile = tile_index >> (4 - esz); - int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); int pos, len, offset; TCGv_i32 tmp; TCGv_ptr addr; @@ -44,10 +53,23 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, /* Compute the final index, which is Rs+imm. */ tmp = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); - tcg_gen_addi_i32(tmp, tmp, index); + /* + * Round the vector index down to a multiple of vec_mod if necessary. + * We do this before adding the offset, to handle cases like + * MOVA (tile to vector, 2 registers) where we want to call this + * several times in a loop with an increasing offset. We rely on + * the instruction encodings always forcing the initial offset in + * [rs + offset] to be a multiple of vec_mod. The pseudocode usually + * does the round-down after adding the offset rather than before, + * but MOVA is an exception. + */ + if (vec_mod > 1) { + tcg_gen_andc_i32(tmp, tmp, tcg_constant_i32(vec_mod - 1)); + } + tcg_gen_addi_i32(tmp, tmp, imm); /* Prepare a power-of-two modulo via extraction of @len bits. */ - len = ctz32(streaming_vec_reg_size(s)) - esz; + len = ctz32(streaming_vec_reg_size(s) / div_len) - esz; if (!len) { /* @@ -92,7 +114,7 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, offset = tile * sizeof(ARMVectorReg); /* Include the byte offset of zarray to make this relative to env. */ - offset += offsetof(CPUARMState, zarray); + offset += offsetof(CPUARMState, za_state.za); tcg_gen_addi_i32(tmp, tmp, offset); /* Add the byte offset to env to produce the final pointer. */ @@ -103,6 +125,14 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, return addr; } +/* Resolve ZArray[rs+imm] to a host pointer. */ +static TCGv_ptr get_zarray(DisasContext *s, int rs, int imm, + int div_len, int vec_mod) +{ + /* ZA[n] equates to ZA0H.B[n]. */ + return get_tile_rowcol(s, MO_8, rs, 0, imm, div_len, vec_mod, false); +} + /* * Resolve tile.size[0] to a host pointer. * Used by e.g. outer product insns where we require the entire tile. @@ -112,7 +142,7 @@ static TCGv_ptr get_tile(DisasContext *s, int esz, int tile) TCGv_ptr addr = tcg_temp_new_ptr(); int offset; - offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray); + offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, za_state.za); tcg_gen_addi_ptr(addr, tcg_env, offset); return addr; @@ -130,7 +160,40 @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a) return true; } -static bool trans_MOVA(DisasContext *s, arg_MOVA *a) +static bool trans_ZERO_zt0(DisasContext *s, arg_ZERO_zt0 *a) +{ + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (sme_enabled_check(s) && sme2_zt0_enabled_check(s)) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), 0); + } + return true; +} + +static bool trans_ZERO_za(DisasContext *s, arg_ZERO_za *a) +{ + if (!dc_isar_feature(aa64_sme2p1, s)) { + return false; + } + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / a->ngrp; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, a->ngrp, a->nvec); + + for (int r = 0; r < a->ngrp; ++r) { + for (int i = 0; i < a->nvec; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + tcg_gen_gvec_dup_imm_var(MO_64, t_za, o_za, svl, svl, 0); + } + } + } + return true; +} + +static bool do_mova_tile(DisasContext *s, arg_mova_p *a, bool to_vec) { static gen_helper_gvec_4 * const h_fns[5] = { gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, @@ -152,14 +215,11 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) TCGv_i32 t_desc; int svl; - if (!dc_isar_feature(aa64_sme, s)) { - return false; - } if (!sme_smza_enabled_check(s)) { return true; } - t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, a->off, 1, 0, a->v); t_zr = vec_full_reg_ptr(s, a->zr); t_pg = pred_full_reg_ptr(s, a->pg); @@ -168,14 +228,14 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) if (a->v) { /* Vertical slice -- use sme mova helpers. */ - if (a->to_vec) { + if (to_vec) { zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); } else { cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); } } else { /* Horizontal slice -- reuse sve sel helpers. */ - if (a->to_vec) { + if (to_vec) { h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); } else { h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); @@ -184,9 +244,150 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) return true; } +TRANS_FEAT(MOVA_tz, aa64_sme, do_mova_tile, a, false) +TRANS_FEAT(MOVA_zt, aa64_sme, do_mova_tile, a, true) + +static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, + bool to_vec, bool zero) +{ + static gen_helper_gvec_2 * const cz_fns[] = { + gen_helper_sme2_mova_cz_b, gen_helper_sme2_mova_cz_h, + gen_helper_sme2_mova_cz_s, gen_helper_sme2_mova_cz_d, + }; + static gen_helper_gvec_2 * const zc_fns[] = { + gen_helper_sme2_mova_zc_b, gen_helper_sme2_mova_zc_h, + gen_helper_sme2_mova_zc_s, gen_helper_sme2_mova_zc_d, + }; + static gen_helper_gvec_2 * const zc_z_fns[] = { + gen_helper_sme2p1_movaz_zc_b, gen_helper_sme2p1_movaz_zc_h, + gen_helper_sme2p1_movaz_zc_s, gen_helper_sme2p1_movaz_zc_d, + gen_helper_sme2p1_movaz_zc_q, + }; + TCGv_ptr t_za; + int svl, bytes_per_op = n << a->esz; + + /* + * The MaxImplementedSVL check happens in the decode pseudocode, + * before the SM+ZA enabled check in the operation pseudocode. + * This will (currently) only fail for NREG=4, ESZ=MO_64. + */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + return true; + } + + assert(a->esz <= MO_64 + zero); + + if (!sme_smza_enabled_check(s)) { + return true; + } + + svl = streaming_vec_reg_size(s); + + /* + * The CurrentVL check happens in the operation pseudocode, + * after the SM+ZA enabled check. + */ + if (svl < bytes_per_op) { + unallocated_encoding(s); + return true; + } + + if (a->v) { + TCGv_i32 t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); + + for (int i = 0; i < n; ++i) { + TCGv_ptr t_zr = vec_full_reg_ptr(s, a->zr * n + i); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, + a->off * n + i, 1, n, a->v); + if (zero) { + zc_z_fns[a->esz](t_zr, t_za, t_desc); + } else if (to_vec) { + zc_fns[a->esz](t_zr, t_za, t_desc); + } else { + cz_fns[a->esz](t_za, t_zr, t_desc); + } + } + } else { + for (int i = 0; i < n; ++i) { + int o_zr = vec_full_reg_offset(s, a->zr * n + i); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, + a->off * n + i, 1, n, a->v); + if (to_vec) { + tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, 0, svl, svl); + if (zero) { + tcg_gen_gvec_dup_imm_var(MO_8, t_za, 0, svl, svl, 0); + } + } else { + tcg_gen_gvec_mov_var(MO_8, t_za, 0, tcg_env, o_zr, svl, svl); + } + } + } + return true; +} + +TRANS_FEAT(MOVA_tz2, aa64_sme2, do_mova_tile_n, a, 2, false, false) +TRANS_FEAT(MOVA_tz4, aa64_sme2, do_mova_tile_n, a, 4, false, false) +TRANS_FEAT(MOVA_zt2, aa64_sme2, do_mova_tile_n, a, 2, true, false) +TRANS_FEAT(MOVA_zt4, aa64_sme2, do_mova_tile_n, a, 4, true, false) + +TRANS_FEAT(MOVAZ_zt, aa64_sme2p1, do_mova_tile_n, a, 1, true, true) +TRANS_FEAT(MOVAZ_zt2, aa64_sme2p1, do_mova_tile_n, a, 2, true, true) +TRANS_FEAT(MOVAZ_zt4, aa64_sme2p1, do_mova_tile_n, a, 4, true, true) + +static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n, + bool to_vec, bool zero) +{ + TCGv_ptr t_za; + int svl; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + svl = streaming_vec_reg_size(s); + t_za = get_zarray(s, a->rv, a->off, n, 0); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zr = vec_full_reg_offset(s, a->zr * n + i); + + if (to_vec) { + tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, o_za, svl, svl); + if (zero) { + tcg_gen_gvec_dup_imm_var(MO_8, t_za, o_za, svl, svl, 0); + } + } else { + tcg_gen_gvec_mov_var(MO_8, t_za, o_za, tcg_env, o_zr, svl, svl); + } + } + return true; +} + +TRANS_FEAT(MOVA_az2, aa64_sme2, do_mova_array_n, a, 2, false, false) +TRANS_FEAT(MOVA_az4, aa64_sme2, do_mova_array_n, a, 4, false, false) +TRANS_FEAT(MOVA_za2, aa64_sme2, do_mova_array_n, a, 2, true, false) +TRANS_FEAT(MOVA_za4, aa64_sme2, do_mova_array_n, a, 4, true, false) + +TRANS_FEAT(MOVAZ_za2, aa64_sme2p1, do_mova_array_n, a, 2, true, true) +TRANS_FEAT(MOVAZ_za4, aa64_sme2p1, do_mova_array_n, a, 4, true, true) + +static bool do_movt(DisasContext *s, arg_MOVT_rzt *a, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + if (sme2_zt0_enabled_check(s)) { + func(cpu_reg(s, a->rt), tcg_env, + offsetof(CPUARMState, za_state.zt0) + a->off * 8); + } + return true; +} + +TRANS_FEAT(MOVT_rzt, aa64_sme2, do_movt, a, tcg_gen_ld_i64) +TRANS_FEAT(MOVT_ztr, aa64_sme2, do_movt, a, tcg_gen_st_i64) + static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) { - typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32); + typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i64); /* * Indexed by [esz][be][v][mte][st], which is (except for load/store) @@ -214,7 +415,7 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) TCGv_ptr t_za, t_pg; TCGv_i64 addr; - uint32_t desc; + uint64_t desc; bool be = s->be_data == MO_BE; bool mte = s->mte_active[0]; @@ -225,7 +426,7 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) return true; } - t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, a->off, 1, 0, a->v); t_pg = pred_full_reg_ptr(s, a->pg); addr = tcg_temp_new_i64(); @@ -239,32 +440,41 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0); fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr, - tcg_constant_i32(desc)); + tcg_constant_i64(desc)); return true; } -typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int); +typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int, MemOp); static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) { - int svl = streaming_vec_reg_size(s); - int imm = a->imm; - TCGv_ptr base; + if (sme_za_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int imm = a->imm; + TCGv_ptr base = get_zarray(s, a->rv, imm, 1, 0); - if (!sme_za_enabled_check(s)) { - return true; + fn(s, base, 0, svl, a->rn, imm * svl, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } - - /* ZA[n] equates to ZA0H.B[n]. */ - base = get_tile_rowcol(s, MO_8, a->rv, imm, false); - - fn(s, base, 0, svl, a->rn, imm * svl); return true; } TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) +static bool do_ldst_zt0(DisasContext *s, arg_ldstzt0 *a, GenLdStR *fn) +{ + if (sme2_zt0_enabled_check(s)) { + fn(s, tcg_env, offsetof(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), a->rn, 0, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); + } + return true; +} + +TRANS_FEAT(LDR_zt0, aa64_sme2, do_ldst_zt0, a, gen_sve_ldr) +TRANS_FEAT(STR_zt0, aa64_sme2, do_ldst_zt0, a, gen_sve_str) + static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, gen_helper_gvec_4 *fn) { @@ -316,7 +526,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); - uint32_t desc = simd_desc(svl, svl, a->sub); + uint32_t desc = simd_desc(svl, svl, 0); TCGv_ptr za, zn, zm, pn, pm, fpst; if (!sme_smza_enabled_check(s)) { @@ -338,7 +548,7 @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); - uint32_t desc = simd_desc(svl, svl, a->sub); + uint32_t desc = simd_desc(svl, svl, 0); TCGv_ptr za, zn, zm, pn, pm; if (!sme_smza_enabled_check(s)) { @@ -355,14 +565,32 @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, return true; } -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, - MO_32, gen_helper_sme_fmopa_h) -TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, - MO_32, FPST_A64, gen_helper_sme_fmopa_s) -TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, - MO_64, FPST_A64, gen_helper_sme_fmopa_d) +TRANS_FEAT(FMOPA_w_h, aa64_sme, do_outprod_env, a, MO_32, + !a->sub ? gen_helper_sme_fmopa_w_h + : !s->fpcr_ah ? gen_helper_sme_fmops_w_h + : gen_helper_sme_ah_fmops_w_h) +TRANS_FEAT(FMOPA_h, aa64_sme_f16f16, do_outprod_fpst, a, MO_16, FPST_ZA_F16, + !a->sub ? gen_helper_sme_fmopa_h + : !s->fpcr_ah ? gen_helper_sme_fmops_h + : gen_helper_sme_ah_fmops_h) +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, FPST_ZA, + !a->sub ? gen_helper_sme_fmopa_s + : !s->fpcr_ah ? gen_helper_sme_fmops_s + : gen_helper_sme_ah_fmops_s) +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, FPST_ZA, + !a->sub ? gen_helper_sme_fmopa_d + : !s->fpcr_ah ? gen_helper_sme_fmops_d + : gen_helper_sme_ah_fmops_d) + +TRANS_FEAT(BFMOPA, aa64_sme_b16b16, do_outprod_fpst, a, MO_16, FPST_ZA, + !a->sub ? gen_helper_sme_bfmopa + : !s->fpcr_ah ? gen_helper_sme_bfmops + : gen_helper_sme_ah_bfmops) -TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa) +TRANS_FEAT(BFMOPA_w, aa64_sme, do_outprod_env, a, MO_32, + !a->sub ? gen_helper_sme_bfmopa_w + : !s->fpcr_ah ? gen_helper_sme_bfmops_w + : gen_helper_sme_ah_bfmops_w) TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) @@ -373,3 +601,1173 @@ TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_ TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d) TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d) TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d) + +TRANS_FEAT(BMOPA, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_bmopa_s) +TRANS_FEAT(SMOPA2_s, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_smopa2_s) +TRANS_FEAT(UMOPA2_s, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_umopa2_s) + +static bool do_z2z_n1(DisasContext *s, arg_z2z_en *a, GVecGen3Fn *fn) +{ + int esz, dn, vsz, mofs, n; + bool overlap = false; + + if (!sme_sm_enabled_check(s)) { + return true; + } + + esz = a->esz; + n = a->n; + dn = a->zdn; + mofs = vec_full_reg_offset(s, a->zm); + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + if (dofs == mofs) { + overlap = true; + } else { + fn(esz, dofs, dofs, mofs, vsz, vsz); + } + } + if (overlap) { + fn(esz, mofs, mofs, mofs, vsz, vsz); + } + return true; +} + +static void gen_sme2_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_srshl_b, gen_helper_sme2_srshl_h, + gen_helper_sme2_srshl_s, gen_helper_sme2_srshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +static void gen_sme2_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_urshl_b, gen_helper_sme2_urshl_h, + gen_helper_sme2_urshl_s, gen_helper_sme2_urshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +TRANS_FEAT(ADD_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_add) +TRANS_FEAT(SMAX_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_smax) +TRANS_FEAT(SMIN_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_smin) +TRANS_FEAT(UMAX_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_umax) +TRANS_FEAT(UMIN_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_umin) +TRANS_FEAT(SRSHL_n1, aa64_sme2, do_z2z_n1, a, gen_sme2_srshl) +TRANS_FEAT(URSHL_n1, aa64_sme2, do_z2z_n1, a, gen_sme2_urshl) +TRANS_FEAT(SQDMULH_n1, aa64_sme2, do_z2z_n1, a, gen_gvec_sve2_sqdmulh) + +static bool do_z2z_nn(DisasContext *s, arg_z2z_en *a, GVecGen3Fn *fn) +{ + int esz, dn, dm, vsz, n; + + if (!sme_sm_enabled_check(s)) { + return true; + } + + esz = a->esz; + n = a->n; + dn = a->zdn; + dm = a->zm; + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + int mofs = vec_full_reg_offset(s, dm + i); + + fn(esz, dofs, dofs, mofs, vsz, vsz); + } + return true; +} + +TRANS_FEAT(SMAX_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_smax) +TRANS_FEAT(SMIN_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_smin) +TRANS_FEAT(UMAX_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_umax) +TRANS_FEAT(UMIN_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_umin) +TRANS_FEAT(SRSHL_nn, aa64_sme2, do_z2z_nn, a, gen_sme2_srshl) +TRANS_FEAT(URSHL_nn, aa64_sme2, do_z2z_nn, a, gen_sme2_urshl) +TRANS_FEAT(SQDMULH_nn, aa64_sme2, do_z2z_nn, a, gen_gvec_sve2_sqdmulh) + +static bool do_z2z_n1_fpst(DisasContext *s, arg_z2z_en *a, + gen_helper_gvec_3_ptr * const fns[4]) +{ + int esz = a->esz, n, dn, vsz, mofs; + bool overlap = false; + gen_helper_gvec_3_ptr *fn; + TCGv_ptr fpst; + + /* These insns use MO_8 to encode BFloat16. */ + if (esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fn = fns[esz]; + n = a->n; + dn = a->zdn; + mofs = vec_full_reg_offset(s, a->zm); + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + if (dofs == mofs) { + overlap = true; + } else { + tcg_gen_gvec_3_ptr(dofs, dofs, mofs, fpst, vsz, vsz, 0, fn); + } + } + if (overlap) { + tcg_gen_gvec_3_ptr(mofs, mofs, mofs, fpst, vsz, vsz, 0, fn); + } + return true; +} + +static bool do_z2z_nn_fpst(DisasContext *s, arg_z2z_en *a, + gen_helper_gvec_3_ptr * const fns[4]) +{ + int esz = a->esz, n, dn, dm, vsz; + gen_helper_gvec_3_ptr *fn; + TCGv_ptr fpst; + + if (esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fn = fns[esz]; + n = a->n; + dn = a->zdn; + dm = a->zm; + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + int mofs = vec_full_reg_offset(s, dm + i); + + tcg_gen_gvec_3_ptr(dofs, dofs, mofs, fpst, vsz, vsz, 0, fn); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_fmax[2][4] = { + { gen_helper_gvec_fmax_b16, + gen_helper_gvec_fmax_h, + gen_helper_gvec_fmax_s, + gen_helper_gvec_fmax_d }, + { gen_helper_gvec_ah_fmax_b16, + gen_helper_gvec_ah_fmax_h, + gen_helper_gvec_ah_fmax_s, + gen_helper_gvec_ah_fmax_d }, +}; +TRANS_FEAT(FMAX_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmax[s->fpcr_ah]) +TRANS_FEAT(FMAX_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmax[s->fpcr_ah]) + +static gen_helper_gvec_3_ptr * const f_vector_fmin[2][4] = { + { gen_helper_gvec_fmin_b16, + gen_helper_gvec_fmin_h, + gen_helper_gvec_fmin_s, + gen_helper_gvec_fmin_d }, + { gen_helper_gvec_ah_fmin_b16, + gen_helper_gvec_ah_fmin_h, + gen_helper_gvec_ah_fmin_s, + gen_helper_gvec_ah_fmin_d }, +}; +TRANS_FEAT(FMIN_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmin[s->fpcr_ah]) +TRANS_FEAT(FMIN_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmin[s->fpcr_ah]) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[4] = { + gen_helper_gvec_fmaxnum_b16, + gen_helper_gvec_fmaxnum_h, + gen_helper_gvec_fmaxnum_s, + gen_helper_gvec_fmaxnum_d, +}; +TRANS_FEAT(FMAXNM_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmaxnm) +TRANS_FEAT(FMAXNM_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmaxnm) + +static gen_helper_gvec_3_ptr * const f_vector_fminnm[4] = { + gen_helper_gvec_fminnum_b16, + gen_helper_gvec_fminnum_h, + gen_helper_gvec_fminnum_s, + gen_helper_gvec_fminnum_d, +}; +TRANS_FEAT(FMINNM_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fminnm) +TRANS_FEAT(FMINNM_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fminnm) + +/* Add/Sub vector Z[m] to each Z[n*N] with result in ZA[d*N]. */ +static bool do_azz_n1(DisasContext *s, arg_azz_n *a, int esz, + GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n, o_zm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 0); + o_zm = vec_full_reg_offset(s, a->zm); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zn = vec_full_reg_offset(s, (a->zn + i) % 32); + + fn(esz, t_za, o_za, tcg_env, o_zn, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_sub_var) + +/* Add/Sub each vector Z[m*N] to each Z[n*N] with result in ZA[d*N]. */ +static bool do_azz_nn(DisasContext *s, arg_azz_n *a, int esz, + GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 1); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zn = vec_full_reg_offset(s, a->zn + i); + int o_zm = vec_full_reg_offset(s, a->zm + i); + + fn(esz, t_za, o_za, tcg_env, o_zn, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_sub_var) + +/* Add/Sub each ZA[d*N] += Z[m*N] */ +static bool do_aaz(DisasContext *s, arg_az_n *a, int esz, GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 0); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zm = vec_full_reg_offset(s, a->zm + i); + + fn(esz, t_za, o_za, t_za, o_za, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_aaz_s, aa64_sme2, do_aaz, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_aaz_s, aa64_sme2, do_aaz, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_aaz_d, aa64_sme2_i16i64, do_aaz, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_aaz_d, aa64_sme2_i16i64, do_aaz, a, MO_64, tcg_gen_gvec_sub_var) + +/* + * Expand array multi-vector single (n1), array multi-vector (nn), + * and array multi-vector indexed (nx), for floating-point accumulate. + * multi: true for nn, false for n1. + * fpst: >= 0 to set ptr argument for FPST_*, < 0 for ENV. + * data: stuff for simd_data, including any index. + */ +#define FPST_ENV -1 + +static bool do_azz_fp(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, int fpst, + gen_helper_gvec_3_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t, ptr; + + if (fpst >= 0) { + ptr = fpstatus_ptr(fpst); + } else { + ptr = tcg_env; + } + t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, ptr, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_azz_acc_fp(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, int fpst, + gen_helper_gvec_4_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t, ptr; + + if (fpst >= 0) { + ptr = fpstatus_ptr(fpst); + } else { + ptr = tcg_env; + } + t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, ptr, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_fmlal(DisasContext *s, arg_azz_n *a, bool sub, bool multi) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + (1 << 2) | sub, 1, + multi, FPST_ENV, gen_helper_sve2_fmlal_zzzw_s); +} + +TRANS_FEAT(FMLAL_n1, aa64_sme2, do_fmlal, a, false, false) +TRANS_FEAT(FMLSL_n1, aa64_sme2, do_fmlal, a, true, false) +TRANS_FEAT(FMLAL_nn, aa64_sme2, do_fmlal, a, false, true) +TRANS_FEAT(FMLSL_nn, aa64_sme2, do_fmlal, a, true, true) + +static bool do_fmlal_nx(DisasContext *s, arg_azx_n *a, bool sub) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + (a->idx << 3) | (1 << 2) | sub, 1, + false, FPST_ENV, gen_helper_sve2_fmlal_zzxw_s); +} + +TRANS_FEAT(FMLAL_nx, aa64_sme2, do_fmlal_nx, a, false) +TRANS_FEAT(FMLSL_nx, aa64_sme2, do_fmlal_nx, a, true) + +static bool do_bfmlal(DisasContext *s, arg_azz_n *a, bool sub, bool multi) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, FPST_ZA, + (!sub ? gen_helper_gvec_bfmlal + : s->fpcr_ah ? gen_helper_gvec_ah_bfmlsl + : gen_helper_gvec_bfmlsl)); +} + +TRANS_FEAT(BFMLAL_n1, aa64_sme2, do_bfmlal, a, false, false) +TRANS_FEAT(BFMLSL_n1, aa64_sme2, do_bfmlal, a, true, false) +TRANS_FEAT(BFMLAL_nn, aa64_sme2, do_bfmlal, a, false, true) +TRANS_FEAT(BFMLSL_nn, aa64_sme2, do_bfmlal, a, true, true) + +static bool do_bfmlal_nx(DisasContext *s, arg_azx_n *a, bool sub) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + a->idx << 1, 0, false, FPST_ZA, + !sub ? gen_helper_gvec_bfmlal_idx + : s->fpcr_ah ? gen_helper_gvec_ah_bfmlsl_idx + : gen_helper_gvec_bfmlsl_idx); +} + +TRANS_FEAT(BFMLAL_nx, aa64_sme2, do_bfmlal_nx, a, false) +TRANS_FEAT(BFMLSL_nx, aa64_sme2, do_bfmlal_nx, a, true) + +static bool do_fdot(DisasContext *s, arg_azz_n *a, bool multi) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, 1, 0, + multi, FPST_ENV, gen_helper_sme2_fdot_h); +} + +TRANS_FEAT(FDOT_n1, aa64_sme2, do_fdot, a, false) +TRANS_FEAT(FDOT_nn, aa64_sme2, do_fdot, a, true) + +static bool do_fdot_nx(DisasContext *s, arg_azx_n *a) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx | (1 << 2), 0, false, FPST_ENV, + gen_helper_sme2_fdot_idx_h); +} + +TRANS_FEAT(FDOT_nx, aa64_sme2, do_fdot_nx, a) + +static bool do_bfdot(DisasContext *s, arg_azz_n *a, bool multi) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, 0, 0, + multi, FPST_ENV, gen_helper_gvec_bfdot); +} + +TRANS_FEAT(BFDOT_n1, aa64_sme2, do_bfdot, a, false) +TRANS_FEAT(BFDOT_nn, aa64_sme2, do_bfdot, a, true) + +static bool do_bfdot_nx(DisasContext *s, arg_azx_n *a) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, a->idx, 0, + false, FPST_ENV, gen_helper_gvec_bfdot_idx); +} + +TRANS_FEAT(BFDOT_nx, aa64_sme2, do_bfdot_nx, a) + +static bool do_vdot(DisasContext *s, arg_azx_n *a, gen_helper_gvec_4_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / 2; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, 2, 1); + TCGv_ptr t_zn = vec_full_reg_ptr(s, a->zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, a->zm); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int i = 0; i < 2; ++i) { + int o_za = i * vstride * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, a->idx | (i << 2)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, tcg_env, tcg_constant_i32(desc)); + } + } + return true; +} + +TRANS_FEAT(FVDOT, aa64_sme, do_vdot, a, gen_helper_sme2_fvdot_idx_h) +TRANS_FEAT(BFVDOT, aa64_sme, do_vdot, a, gen_helper_sme2_bfvdot_idx) + +static bool do_fmla(DisasContext *s, arg_azz_n *a, bool multi, + ARMFPStatusFlavour fpst, gen_helper_gvec_3_ptr *fn) +{ + return do_azz_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fpst, fn); +} + +TRANS_FEAT(FMLA_n1_h, aa64_sme_f16f16, do_fmla, a, false, FPST_ZA_F16, + gen_helper_gvec_vfma_h) +TRANS_FEAT(FMLS_n1_h, aa64_sme_f16f16, do_fmla, a, false, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h) +TRANS_FEAT(FMLA_nn_h, aa64_sme_f16f16, do_fmla, a, true, FPST_ZA_F16, + gen_helper_gvec_vfma_h) +TRANS_FEAT(FMLS_nn_h, aa64_sme_f16f16, do_fmla, a, true, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h) + +TRANS_FEAT(FMLA_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_vfma_s) +TRANS_FEAT(FMLS_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s) +TRANS_FEAT(FMLA_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_vfma_s) +TRANS_FEAT(FMLS_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s) + +TRANS_FEAT(FMLA_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_vfma_d) +TRANS_FEAT(FMLS_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d) +TRANS_FEAT(FMLA_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_vfma_d) +TRANS_FEAT(FMLS_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d) + +TRANS_FEAT(BFMLA_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_bfmla) +TRANS_FEAT(BFMLS_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls) +TRANS_FEAT(BFMLA_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_bfmla) +TRANS_FEAT(BFMLS_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls) + +static bool do_fmla_nx(DisasContext *s, arg_azx_n *a, + ARMFPStatusFlavour fpst, gen_helper_gvec_4_ptr *fn) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx, 0, false, fpst, fn); +} + +TRANS_FEAT(FMLA_nx_h, aa64_sme_f16f16, do_fmla_nx, a, FPST_ZA_F16, + gen_helper_gvec_fmla_idx_h) +TRANS_FEAT(FMLS_nx_h, aa64_sme_f16f16, do_fmla_nx, a, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_h : gen_helper_gvec_fmls_idx_h) +TRANS_FEAT(FMLA_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_fmla_idx_s) +TRANS_FEAT(FMLS_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_s : gen_helper_gvec_fmls_idx_s) +TRANS_FEAT(FMLA_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_fmla_idx_d) +TRANS_FEAT(FMLS_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_d : gen_helper_gvec_fmls_idx_d) + +TRANS_FEAT(BFMLA_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_bfmla_idx) +TRANS_FEAT(BFMLS_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls_idx : gen_helper_gvec_bfmls_idx) + +static bool do_faddsub(DisasContext *s, arg_az_n *a, ARMFPStatusFlavour fpst, + gen_helper_gvec_3_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int n = a->n; + int zm = a->zm; + int vstride = svl / n; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, n, 0); + TCGv_ptr ptr = fpstatus_ptr(fpst); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int r = 0; r < n; ++r) { + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm + r); + int o_za = r * vstride * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, 0); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t, t_zm, ptr, tcg_constant_i32(desc)); + } + } + return true; +} + +TRANS_FEAT(FADD_nn_h, aa64_sme_f16f16, do_faddsub, a, + FPST_ZA_F16, gen_helper_gvec_fadd_h) +TRANS_FEAT(FSUB_nn_h, aa64_sme_f16f16, do_faddsub, a, + FPST_ZA_F16, gen_helper_gvec_fsub_h) + +TRANS_FEAT(FADD_nn_s, aa64_sme2, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fadd_s) +TRANS_FEAT(FSUB_nn_s, aa64_sme2, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fsub_s) + +TRANS_FEAT(FADD_nn_d, aa64_sme2_f64f64, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fadd_d) +TRANS_FEAT(FSUB_nn_d, aa64_sme2_f64f64, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fsub_d) + +TRANS_FEAT(BFADD_nn, aa64_sme_b16b16, do_faddsub, a, + FPST_ZA, gen_helper_gvec_bfadd) +TRANS_FEAT(BFSUB_nn, aa64_sme_b16b16, do_faddsub, a, + FPST_ZA, gen_helper_gvec_bfsub) + +/* + * Expand array multi-vector single (n1), array multi-vector (nn), + * and array multi-vector indexed (nx), for integer accumulate. + * multi: true for nn, false for n1. + * data: stuff for simd_data, including any index. + */ +static bool do_azz_acc(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, + gen_helper_gvec_4 *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_dot(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +static void gen_helper_gvec_sudot_4b(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, + TCGv_ptr a, TCGv_i32 desc) +{ + gen_helper_gvec_usdot_4b(d, m, n, a, desc); +} + +TRANS_FEAT(USDOT_n1, aa64_sme2, do_dot, a, false, gen_helper_gvec_usdot_4b) +TRANS_FEAT(SUDOT_n1, aa64_sme2, do_dot, a, false, gen_helper_gvec_sudot_4b) +TRANS_FEAT(SDOT_n1_2h, aa64_sme2, do_dot, a, false, gen_helper_gvec_sdot_2h) +TRANS_FEAT(UDOT_n1_2h, aa64_sme2, do_dot, a, false, gen_helper_gvec_udot_2h) +TRANS_FEAT(SDOT_n1_4b, aa64_sme2, do_dot, a, false, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_n1_4b, aa64_sme2, do_dot, a, false, gen_helper_gvec_udot_4b) +TRANS_FEAT(SDOT_n1_4h, aa64_sme2_i16i64, do_dot, a, false, gen_helper_gvec_sdot_4h) +TRANS_FEAT(UDOT_n1_4h, aa64_sme2_i16i64, do_dot, a, false, gen_helper_gvec_udot_4h) + +TRANS_FEAT(USDOT_nn, aa64_sme2, do_dot, a, true, gen_helper_gvec_usdot_4b) +TRANS_FEAT(SDOT_nn_2h, aa64_sme2, do_dot, a, true, gen_helper_gvec_sdot_2h) +TRANS_FEAT(UDOT_nn_2h, aa64_sme2, do_dot, a, true, gen_helper_gvec_udot_2h) +TRANS_FEAT(SDOT_nn_4b, aa64_sme2, do_dot, a, true, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_nn_4b, aa64_sme2, do_dot, a, true, gen_helper_gvec_udot_4b) +TRANS_FEAT(SDOT_nn_4h, aa64_sme2_i16i64, do_dot, a, true, gen_helper_gvec_sdot_4h) +TRANS_FEAT(UDOT_nn_4h, aa64_sme2_i16i64, do_dot, a, true, gen_helper_gvec_udot_4h) + +static bool do_dot_nx(DisasContext *s, arg_azx_n *a, gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx, 0, false, fn); +} + +TRANS_FEAT(USDOT_nx, aa64_sme2, do_dot_nx, a, gen_helper_gvec_usdot_idx_4b) +TRANS_FEAT(SUDOT_nx, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sudot_idx_4b) +TRANS_FEAT(SDOT_nx_2h, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sdot_idx_2h) +TRANS_FEAT(UDOT_nx_2h, aa64_sme2, do_dot_nx, a, gen_helper_gvec_udot_idx_2h) +TRANS_FEAT(SDOT_nx_4b, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sdot_idx_4b) +TRANS_FEAT(UDOT_nx_4b, aa64_sme2, do_dot_nx, a, gen_helper_gvec_udot_idx_4b) +TRANS_FEAT(SDOT_nx_4h, aa64_sme2_i16i64, do_dot_nx, a, gen_helper_gvec_sdot_idx_4h) +TRANS_FEAT(UDOT_nx_4h, aa64_sme2_i16i64, do_dot_nx, a, gen_helper_gvec_udot_idx_4h) + +static bool do_vdot_nx(DisasContext *s, arg_azx_n *a, gen_helper_gvec_3 *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + fn(get_zarray(s, a->rv, a->off, a->n, 0), + vec_full_reg_ptr(s, a->zn), + vec_full_reg_ptr(s, a->zm), + tcg_constant_i32(simd_desc(svl, svl, a->idx))); + } + return true; +} + +TRANS_FEAT(SVDOT_nx_2h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_2h) +TRANS_FEAT(SVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_4b) +TRANS_FEAT(SVDOT_nx_4h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_4h) + +TRANS_FEAT(UVDOT_nx_2h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_2h) +TRANS_FEAT(UVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_4b) +TRANS_FEAT(UVDOT_nx_4h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_4h) + +TRANS_FEAT(SUVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_suvdot_idx_4b) +TRANS_FEAT(USVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_usvdot_idx_4b) + +static bool do_smlal(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +TRANS_FEAT(SMLAL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_smlal_zzzw_s) +TRANS_FEAT(SMLSL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_smlsl_zzzw_s) +TRANS_FEAT(UMLAL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_umlal_zzzw_s) +TRANS_FEAT(UMLSL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_umlsl_zzzw_s) + +TRANS_FEAT(SMLAL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_smlal_zzzw_s) +TRANS_FEAT(SMLSL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_smlsl_zzzw_s) +TRANS_FEAT(UMLAL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_umlal_zzzw_s) +TRANS_FEAT(UMLSL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_umlsl_zzzw_s) + +static bool do_smlal_nx(DisasContext *s, arg_azx_n *a, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + a->idx << 1, 0, false, fn); +} + +TRANS_FEAT(SMLAL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_smlal_idx_s) +TRANS_FEAT(SMLSL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_smlsl_idx_s) +TRANS_FEAT(UMLAL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_umlal_idx_s) +TRANS_FEAT(UMLSL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_umlsl_idx_s) + +static bool do_smlall(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +static void gen_helper_sme2_sumlall_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, + TCGv_ptr a, TCGv_i32 desc) +{ + gen_helper_sme2_usmlall_s(d, m, n, a, desc); +} + +TRANS_FEAT(SMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_smlall_s) +TRANS_FEAT(SMLSLL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_smlsll_s) +TRANS_FEAT(UMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_umlall_s) +TRANS_FEAT(UMLSLL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_umlsll_s) +TRANS_FEAT(USMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_usmlall_s) +TRANS_FEAT(SUMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_sumlall_s) + +TRANS_FEAT(SMLALL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_smlall_d) +TRANS_FEAT(SMLSLL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_smlsll_d) +TRANS_FEAT(UMLALL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_umlall_d) +TRANS_FEAT(UMLSLL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_umlsll_d) + +TRANS_FEAT(SMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_smlall_s) +TRANS_FEAT(SMLSLL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_smlsll_s) +TRANS_FEAT(UMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_umlall_s) +TRANS_FEAT(UMLSLL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_umlsll_s) +TRANS_FEAT(USMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_usmlall_s) + +TRANS_FEAT(SMLALL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_smlall_d) +TRANS_FEAT(SMLSLL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_smlsll_d) +TRANS_FEAT(UMLALL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_umlall_d) +TRANS_FEAT(UMLSLL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_umlsll_d) + +static bool do_smlall_nx(DisasContext *s, arg_azx_n *a, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm, + a->idx << 2, 0, false, fn); +} + +TRANS_FEAT(SMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_smlall_idx_s) +TRANS_FEAT(SMLSLL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_s) +TRANS_FEAT(UMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_umlall_idx_s) +TRANS_FEAT(UMLSLL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_umlsll_idx_s) +TRANS_FEAT(USMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_usmlall_idx_s) +TRANS_FEAT(SUMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_sumlall_idx_s) + +TRANS_FEAT(SMLALL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_smlall_idx_d) +TRANS_FEAT(SMLSLL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_d) +TRANS_FEAT(UMLALL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_umlall_idx_d) +TRANS_FEAT(UMLSLL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_umlsll_idx_d) + +static bool do_zz_fpst(DisasContext *s, arg_zz_n *a, int data, + ARMFPStatusFlavour type, gen_helper_gvec_2_ptr *fn) +{ + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + TCGv_ptr fpst = fpstatus_ptr(type); + + for (int i = 0, n = a->n; i < n; ++i) { + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->zd + i), + vec_full_reg_offset(s, a->zn + i), + fpst, svl, svl, data, fn); + } + } + return true; +} + +TRANS_FEAT(BFCVT, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_bfcvt) +TRANS_FEAT(BFCVTN, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_bfcvtn) +TRANS_FEAT(FCVT_n, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_fcvt_n) +TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_fcvtn) + +TRANS_FEAT(FCVT_w, aa64_sme_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvt_w) +TRANS_FEAT(FCVTL, aa64_sme_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvtl) + +TRANS_FEAT(FCVTZS, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_gvec_vcvt_rz_fs) +TRANS_FEAT(FCVTZU, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_gvec_vcvt_rz_fu) + +TRANS_FEAT(SCVTF, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_scvtf) +TRANS_FEAT(UCVTF, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_ucvtf) + +TRANS_FEAT(FRINTN, aa64_sme2, do_zz_fpst, a, float_round_nearest_even, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTP, aa64_sme2, do_zz_fpst, a, float_round_up, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTM, aa64_sme2, do_zz_fpst, a, float_round_down, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTA, aa64_sme2, do_zz_fpst, a, float_round_ties_away, + FPST_A64, gen_helper_gvec_vrint_rm_s) + +static bool do_zz(DisasContext *s, arg_zz_n *a, int data, + gen_helper_gvec_2 *fn) +{ + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + + for (int i = 0, n = a->n; i < n; ++i) { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd + i), + vec_full_reg_offset(s, a->zn + i), + svl, svl, data, fn); + } + } + return true; +} + +TRANS_FEAT(SQCVT_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_sh) +TRANS_FEAT(UQCVT_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_sh) +TRANS_FEAT(SQCVTU_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_sh) + +TRANS_FEAT(SQCVT_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_sb) +TRANS_FEAT(UQCVT_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_sb) +TRANS_FEAT(SQCVTU_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_sb) + +TRANS_FEAT(SQCVT_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_dh) +TRANS_FEAT(UQCVT_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_dh) +TRANS_FEAT(SQCVTU_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_dh) + +TRANS_FEAT(SQCVTN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtn_sb) +TRANS_FEAT(UQCVTN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvtn_sb) +TRANS_FEAT(SQCVTUN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_sb) + +TRANS_FEAT(SQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtn_dh) +TRANS_FEAT(UQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvtn_dh) +TRANS_FEAT(SQCVTUN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_dh) + +TRANS_FEAT(SUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_bh) +TRANS_FEAT(SUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_hs) +TRANS_FEAT(SUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_sd) + +TRANS_FEAT(SUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_bh) +TRANS_FEAT(SUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_hs) +TRANS_FEAT(SUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_sd) + +TRANS_FEAT(UUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_bh) +TRANS_FEAT(UUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_hs) +TRANS_FEAT(UUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_sd) + +TRANS_FEAT(UUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_bh) +TRANS_FEAT(UUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_hs) +TRANS_FEAT(UUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_sd) + +static bool do_zipuzp_4(DisasContext *s, arg_zz_e *a, + gen_helper_gvec_2 * const fn[5]) +{ + int bytes_per_op = 4 << a->esz; + + /* Both MO_64 and MO_128 can fail the size test. */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + } else if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + if (svl < bytes_per_op) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + svl, svl, 0, fn[a->esz]); + } + } + return true; +} + +static gen_helper_gvec_2 * const zip4_fns[] = { + gen_helper_sme2_zip4_b, + gen_helper_sme2_zip4_h, + gen_helper_sme2_zip4_s, + gen_helper_sme2_zip4_d, + gen_helper_sme2_zip4_q, +}; +TRANS_FEAT(ZIP_4, aa64_sme2, do_zipuzp_4, a, zip4_fns) + +static gen_helper_gvec_2 * const uzp4_fns[] = { + gen_helper_sme2_uzp4_b, + gen_helper_sme2_uzp4_h, + gen_helper_sme2_uzp4_s, + gen_helper_sme2_uzp4_d, + gen_helper_sme2_uzp4_q, +}; +TRANS_FEAT(UZP_4, aa64_sme2, do_zipuzp_4, a, uzp4_fns) + +static bool do_zz_rshr(DisasContext *s, arg_rshr *a, gen_helper_gvec_2 *fn) +{ + if (sve_access_check(s)) { + int vl = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vl, vl, a->shift, fn); + } + return true; +} + +TRANS_FEAT(SQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sh) +TRANS_FEAT(UQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sh) +TRANS_FEAT(SQRSHRU_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sh) + +TRANS_FEAT(SQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sb) +TRANS_FEAT(SQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_dh) +TRANS_FEAT(UQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sb) +TRANS_FEAT(UQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_dh) +TRANS_FEAT(SQRSHRU_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sb) +TRANS_FEAT(SQRSHRU_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_dh) + +TRANS_FEAT(SQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sh) +TRANS_FEAT(UQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sh) +TRANS_FEAT(SQRSHRUN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sh) + +TRANS_FEAT(SQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sb) +TRANS_FEAT(SQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_dh) +TRANS_FEAT(UQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sb) +TRANS_FEAT(UQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_dh) +TRANS_FEAT(SQRSHRUN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sb) +TRANS_FEAT(SQRSHRUN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_dh) + +static bool do_zipuzp_2(DisasContext *s, arg_zzz_e *a, + gen_helper_gvec_3 * const fn[5]) +{ + int bytes_per_op = 2 << a->esz; + + /* MO_128 can fail the size test. */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + } else if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + if (svl < bytes_per_op) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + svl, svl, 0, fn[a->esz]); + } + } + return true; +} + +static gen_helper_gvec_3 * const zip2_fns[] = { + gen_helper_sme2_zip2_b, + gen_helper_sme2_zip2_h, + gen_helper_sme2_zip2_s, + gen_helper_sme2_zip2_d, + gen_helper_sme2_zip2_q, +}; +TRANS_FEAT(ZIP_2, aa64_sme2, do_zipuzp_2, a, zip2_fns) + +static gen_helper_gvec_3 * const uzp2_fns[] = { + gen_helper_sme2_uzp2_b, + gen_helper_sme2_uzp2_h, + gen_helper_sme2_uzp2_s, + gen_helper_sme2_uzp2_d, + gen_helper_sme2_uzp2_q, +}; +TRANS_FEAT(UZP_2, aa64_sme2, do_zipuzp_2, a, uzp2_fns) + +static bool trans_FCLAMP(DisasContext *s, arg_zzz_en *a) +{ + static gen_helper_gvec_3_ptr * const fn[] = { + gen_helper_sme2_bfclamp, + gen_helper_sme2_fclamp_h, + gen_helper_sme2_fclamp_s, + gen_helper_sme2_fclamp_d, + }; + TCGv_ptr fpst; + int vl; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + /* This insn uses MO_8 to encode BFloat16. */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); + vl = vec_full_reg_size(s); + + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + fpst, vl, vl, a->n, fn[a->esz]); + return true; +} + +static bool do_clamp(DisasContext *s, arg_zzz_en *a, + gen_helper_gvec_3 * const fn[4]) +{ + int vl; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + /* + * Clamp is just a min+max, easily supported by most host + * vector operations -- we already have such an expansion in + * translate-sve.c for a single output. + * TODO: Add support in gvec for multiple simultaneous output, + * and/or copy to temporary upon overlap. + */ + vl = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + vl, vl, a->n, fn[a->esz]); + return true; +} + +static gen_helper_gvec_3 * const sclamp_fns[] = { + gen_helper_sme2_sclamp_b, + gen_helper_sme2_sclamp_h, + gen_helper_sme2_sclamp_s, + gen_helper_sme2_sclamp_d, +}; +TRANS(SCLAMP, do_clamp, a, sclamp_fns) + +static gen_helper_gvec_3 * const uclamp_fns[] = { + gen_helper_sme2_uclamp_b, + gen_helper_sme2_uclamp_h, + gen_helper_sme2_uclamp_s, + gen_helper_sme2_uclamp_d, +}; +TRANS(UCLAMP, do_clamp, a, uclamp_fns) + +static bool trans_SEL(DisasContext *s, arg_SEL *a) +{ + typedef void sme_sel_fn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); + static sme_sel_fn * const fns[4] = { + gen_helper_sme2_sel_b, gen_helper_sme2_sel_h, + gen_helper_sme2_sel_s, gen_helper_sme2_sel_d + }; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->n); + TCGv_ptr t_d = tcg_temp_new_ptr(); + TCGv_ptr t_n = tcg_temp_new_ptr(); + TCGv_ptr t_m = tcg_temp_new_ptr(); + TCGv_i32 png = tcg_temp_new_i32(); + + tcg_gen_addi_ptr(t_d, tcg_env, vec_full_reg_offset(s, a->zd)); + tcg_gen_addi_ptr(t_n, tcg_env, vec_full_reg_offset(s, a->zn)); + tcg_gen_addi_ptr(t_m, tcg_env, vec_full_reg_offset(s, a->zm)); + + tcg_gen_ld16u_i32(png, tcg_env, pred_full_reg_offset(s, a->pg) + ^ (HOST_BIG_ENDIAN ? 6 : 0)); + + fns[a->esz](t_d, t_n, t_m, png, tcg_constant_i32(desc)); + } + return true; +} + +static bool do_lut(DisasContext *s, arg_lut *a, + gen_helper_gvec_2_ptr *fn, bool strided) +{ + if (sme_sm_enabled_check(s) && sme2_zt0_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + tcg_env, svl, svl, strided | (a->idx << 1), fn); + } + return true; +} + +TRANS_FEAT(LUTI2_c_1b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1b, false) +TRANS_FEAT(LUTI2_c_1h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1h, false) +TRANS_FEAT(LUTI2_c_1s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1s, false) + +TRANS_FEAT(LUTI2_c_2b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2b, false) +TRANS_FEAT(LUTI2_c_2h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2h, false) +TRANS_FEAT(LUTI2_c_2s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2s, false) + +TRANS_FEAT(LUTI2_c_4b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4b, false) +TRANS_FEAT(LUTI2_c_4h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4h, false) +TRANS_FEAT(LUTI2_c_4s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4s, false) + +TRANS_FEAT(LUTI4_c_1b, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1b, false) +TRANS_FEAT(LUTI4_c_1h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1h, false) +TRANS_FEAT(LUTI4_c_1s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1s, false) + +TRANS_FEAT(LUTI4_c_2b, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2b, false) +TRANS_FEAT(LUTI4_c_2h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2h, false) +TRANS_FEAT(LUTI4_c_2s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2s, false) + +TRANS_FEAT(LUTI4_c_4h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4h, false) +TRANS_FEAT(LUTI4_c_4s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4s, false) + +static bool do_lut_s4(DisasContext *s, arg_lut *a, gen_helper_gvec_2_ptr *fn) +{ + return !(a->zd & 0b01100) && do_lut(s, a, fn, true); +} + +static bool do_lut_s8(DisasContext *s, arg_lut *a, gen_helper_gvec_2_ptr *fn) +{ + return !(a->zd & 0b01000) && do_lut(s, a, fn, true); +} + +TRANS_FEAT(LUTI2_s_2b, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti2_2b) +TRANS_FEAT(LUTI2_s_2h, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti2_2h) + +TRANS_FEAT(LUTI2_s_4b, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti2_4b) +TRANS_FEAT(LUTI2_s_4h, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti2_4h) + +TRANS_FEAT(LUTI4_s_2b, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti4_2b) +TRANS_FEAT(LUTI4_s_2h, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti4_2h) + +TRANS_FEAT(LUTI4_s_4h, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti4_4h) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index d23be47..07b827f 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -31,9 +31,9 @@ typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); +typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_i64, TCGv_i32); + TCGv_ptr, TCGv_i64, TCGv_i64); /* * Helpers for extracting complex instruction fields. @@ -89,7 +89,7 @@ static inline int expand_imm_sh8u(DisasContext *s, int x) */ static inline int msz_dtype(DisasContext *s, int msz) { - static const uint8_t dtype[4] = { 0, 5, 10, 15 }; + static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 }; return dtype[msz]; } @@ -190,6 +190,10 @@ static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, arg_rrr_esz *a, int data) { + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -403,6 +407,10 @@ static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, arg_rprr_esz *a) { + /* These insns use MO_8 to encode BFloat16. */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -629,7 +637,7 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) * = | ~(m | k) */ tcg_gen_and_i64(n, n, k); - if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { tcg_gen_or_i64(m, m, k); tcg_gen_orc_i64(d, n, m); } else { @@ -778,6 +786,9 @@ DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) DO_ZPZ(ABS, aa64_sve, sve_abs) DO_ZPZ(NEG, aa64_sve, sve_neg) DO_ZPZ(RBIT, aa64_sve, sve_rbit) +DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv) +DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv) +DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv) static gen_helper_gvec_3 * const fabs_fns[4] = { NULL, gen_helper_sve_fabs_h, @@ -828,6 +839,41 @@ TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) +static gen_helper_gvec_3 * const addqv_fns[4] = { + gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h, + gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d, +}; +TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const smaxqv_fns[4] = { + gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h, + gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d, +}; +TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const sminqv_fns[4] = { + gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h, + gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d, +}; +TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const umaxqv_fns[4] = { + gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h, + gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d, +}; +TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const uminqv_fns[4] = { + gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h, + gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d, +}; +TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0) + /* *** SVE Integer Reduction Group */ @@ -1679,6 +1725,22 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) +static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a) +{ + if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) { + return false; + } + if (sve_access_check(s)) { + /* Canonical TRUE is 0 count, invert bit, plus element size. */ + int val = (1 << 15) | (1 << a->esz); + + /* Write val to the first uint64_t; clear all of the rest. */ + tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd), + 8, size_for_gvec(pred_full_reg_size(s)), val); + } + return true; +} + /* Note pat == 31 is #all, to set all elements. */ TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false) @@ -2148,6 +2210,55 @@ static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) +static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a) +{ + unsigned vl, dofs, sofs0, sofs1, sofs2, imm; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + imm = a->imm; + if (imm == 0) { + /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */ + return true; + } + + vl = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + sofs2 = vec_full_reg_offset(s, a->rn); + + if (imm & 8) { + sofs0 = dofs + 8; + sofs1 = sofs2; + sofs2 += 8; + } else { + sofs0 = dofs; + sofs1 = dofs + 8; + } + imm = (imm & 7) << 3; + + for (unsigned i = 0; i < vl; i += 16) { + TCGv_i64 s0 = tcg_temp_new_i64(); + TCGv_i64 s1 = tcg_temp_new_i64(); + TCGv_i64 s2 = tcg_temp_new_i64(); + + tcg_gen_ld_i64(s0, tcg_env, sofs0 + i); + tcg_gen_ld_i64(s1, tcg_env, sofs1 + i); + tcg_gen_ld_i64(s2, tcg_env, sofs2 + i); + + tcg_gen_extract2_i64(s0, s0, s1, imm); + tcg_gen_extract2_i64(s1, s1, s2, imm); + + tcg_gen_st_i64(s0, tcg_env, dofs + i); + tcg_gen_st_i64(s1, tcg_env, dofs + i + 8); + } + return true; +} + /* *** SVE Permute - Unpredicated Group */ @@ -2195,6 +2306,27 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) return true; } +static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a) +{ + unsigned vl, dofs, nofs; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + nofs = vec_reg_offset(s, a->rn, a->imm, a->esz); + + for (unsigned i = 0; i < vl; i += 16) { + tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16); + } + return true; +} + static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) { typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); @@ -2256,12 +2388,124 @@ static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) +static gen_helper_gvec_3 * const tblq_fns[4] = { + gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h, + gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d +}; +TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + tblq_fns[a->esz], a, 0) + static gen_helper_gvec_3 * const tbx_fns[4] = { gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d }; TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) +static gen_helper_gvec_3 * const tbxq_fns[4] = { + gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h, + gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d +}; +TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + tbxq_fns[a->esz], a, 0) + +static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a) +{ + static gen_helper_gvec_2 * const fns[4] = { + NULL, gen_helper_pmov_pv_h, + gen_helper_pmov_pv_s, gen_helper_pmov_pv_d + }; + unsigned vl, pl, vofs, pofs; + TCGv_i64 tmp; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + if (a->esz != MO_8) { + tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vl, vl, a->imm, fns[a->esz]); + return true; + } + + /* + * Copy the low PL bytes from vector Zn, zero-extending to a + * multiple of 8 bytes, so that Pd is properly cleared. + */ + + pl = vl / 8; + pofs = pred_full_reg_offset(s, a->rd); + vofs = vec_full_reg_offset(s, a->rn); + + QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32); + for (unsigned i = 32; i >= 8; i >>= 1) { + if (pl & i) { + tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i); + pofs += i; + vofs += i; + } + } + switch (pl & 7) { + case 0: + return true; + case 2: + tmp = tcg_temp_new_i64(); + tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0)); + break; + case 4: + tmp = tcg_temp_new_i64(); + tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0)); + break; + case 6: + tmp = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmp, tcg_env, vofs); + tcg_gen_extract_i64(tmp, tmp, 0, 48); + break; + default: + g_assert_not_reached(); + } + tcg_gen_st_i64(tmp, tcg_env, pofs); + return true; +} + +static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a) +{ + static gen_helper_gvec_2 * const fns[4] = { + NULL, gen_helper_pmov_vp_h, + gen_helper_pmov_vp_s, gen_helper_pmov_vp_d + }; + unsigned vl; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + + if (a->esz == MO_8) { + /* + * The low PL bytes are copied from Pn to Zd unchanged. + * We know that the unused portion of Pn is zero, and + * that imm == 0, so the balance of Zd must be zeroed. + */ + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->rn), + size_for_gvec(vl / 8), vl); + } else { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->rn), + vl, vl, a->imm, fns[a->esz]); + } + return true; +} + static bool trans_UNPK(DisasContext *s, arg_UNPK *a) { static gen_helper_gvec_2 * const fns[4][2] = { @@ -2352,6 +2596,23 @@ TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) *** SVE Permute - Interleaving Group */ +static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rrr_esz *a, int data) +{ + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + if (vsz < 32) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vsz, vsz, data, fn); + } + } + return true; +} + static gen_helper_gvec_3 * const zip_fns[4] = { gen_helper_sve_zip_b, gen_helper_sve_zip_h, gen_helper_sve_zip_s, gen_helper_sve_zip_d, @@ -2361,26 +2622,43 @@ TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, zip_fns[a->esz], a, vec_full_reg_size(s) / 2) -TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, 0) -TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, - QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) +TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, 0) +TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, + QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) + +static gen_helper_gvec_3 * const zipq_fns[4] = { + gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h, + gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d, +}; +TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + zipq_fns[a->esz], a, 0) +TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + zipq_fns[a->esz], a, 16 / 2) static gen_helper_gvec_3 * const uzp_fns[4] = { gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, }; - TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, uzp_fns[a->esz], a, 0) TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, uzp_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 0) -TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 16) +TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 0) +TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 16) + +static gen_helper_gvec_3 * const uzpq_fns[4] = { + gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h, + gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d, +}; +TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + uzpq_fns[a->esz], a, 0) +TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + uzpq_fns[a->esz], a, 1 << a->esz) static gen_helper_gvec_3 * const trn_fns[4] = { gen_helper_sve_trn_b, gen_helper_sve_trn_h, @@ -2392,10 +2670,10 @@ TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, trn_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 0) -TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 16) +TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 0) +TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 16) /* *** SVE Permute Vector - Predicated Group @@ -2981,6 +3259,36 @@ static bool trans_CNTP(DisasContext *s, arg_CNTP *a) return true; } +static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a) +{ + TCGv_i32 t_png; + uint32_t desc = 0; + + if (dc_isar_feature(aa64_sve2p1, s)) { + if (!sve_access_check(s)) { + return true; + } + } else if (dc_isar_feature(aa64_sme2, s)) { + if (!sme_sm_enabled_check(s)) { + return true; + } + } else { + return false; + } + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, a->rn) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl); + + gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc)); + return true; +} + static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) { if (!dc_isar_feature(aa64_sve, s)) { @@ -3091,7 +3399,9 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a) return true; } -static bool trans_WHILE(DisasContext *s, arg_WHILE *a) +typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +static bool do_WHILE(DisasContext *s, arg_while *a, + bool lt, int scale, int data, gen_while_fn *fn) { TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2; @@ -3101,14 +3411,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) TCGCond cond; uint64_t maxval; /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ - bool eq = a->eq == a->lt; + bool eq = a->eq == lt; - /* The greater-than conditions are all SVE2. */ - if (a->lt - ? !dc_isar_feature(aa64_sve, s) - : !dc_isar_feature(aa64_sve2, s)) { - return false; - } if (!sve_access_check(s)) { return true; } @@ -3132,7 +3436,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - if (a->lt) { + if (lt) { tcg_gen_sub_i64(t0, op1, op0); if (a->u) { maxval = a->sf ? UINT64_MAX : UINT32_MAX; @@ -3152,7 +3456,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) } } - tmax = tcg_constant_i64(vsz >> a->esz); + tmax = tcg_constant_i64((vsz << scale) >> a->esz); if (eq) { /* Equality means one more iteration. */ tcg_gen_addi_i64(t0, t0, 1); @@ -3181,24 +3485,38 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, t0); - /* Scale elements to bits. */ - tcg_gen_shli_i32(t2, t2, a->esz); - desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, data); ptr = tcg_temp_new_ptr(); tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); - if (a->lt) { - gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); - } else { - gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); - } + fn(t2, ptr, t2, tcg_constant_i32(desc)); + do_pred_flags(t2); return true; } +TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE, + a, true, 0, 0, gen_helper_sve_whilel) +TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE, + a, false, 0, 0, gen_helper_sve_whileg) + +TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 1, 0, gen_helper_sve_while2l) +TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 1, 0, gen_helper_sve_while2g) + +TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 1, 1, gen_helper_sve_whilecl) +TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 2, 2, gen_helper_sve_whilecl) +TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 1, 1, gen_helper_sve_whilecg) +TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 2, 2, gen_helper_sve_whilecg) + static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) { TCGv_i64 op0, op1, diff, t1, tmax; @@ -3217,7 +3535,7 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) op0 = read_cpu_reg(s, a->rn, 1); op1 = read_cpu_reg(s, a->rm, 1); - tmax = tcg_constant_i64(vsz); + tmax = tcg_constant_i64(vsz >> a->esz); diff = tcg_temp_new_i64(); if (a->rw) { @@ -3227,15 +3545,15 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) tcg_gen_sub_i64(diff, op0, op1); tcg_gen_sub_i64(t1, op1, op0); tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); - /* Round down to a multiple of ESIZE. */ - tcg_gen_andi_i64(diff, diff, -1 << a->esz); + /* Divide, rounding down, by ESIZE. */ + tcg_gen_shri_i64(diff, diff, a->esz); /* If op1 == op0, diff == 0, and the condition is always true. */ tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); } else { /* WHILEWR */ tcg_gen_sub_i64(diff, op1, op0); - /* Round down to a multiple of ESIZE. */ - tcg_gen_andi_i64(diff, diff, -1 << a->esz); + /* Divide, rounding down, by ESIZE. */ + tcg_gen_shri_i64(diff, diff, a->esz); /* If op0 >= op1, diff <= 0, the condition is always true. */ tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); } @@ -3258,6 +3576,42 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) return true; } +static bool do_pext(DisasContext *s, arg_pext *a, int n) +{ + TCGv_i32 t_png; + TCGv_ptr t_pd; + int pl; + + if (!sve_access_check(s)) { + return true; + } + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, a->rn) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + t_pd = tcg_temp_new_ptr(); + pl = pred_full_reg_size(s); + + for (int i = 0; i < n; ++i) { + int rd = (a->rd + i) % 16; + int part = a->imm * n + i; + unsigned desc = 0; + + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, part); + + tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd)); + gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc)); + } + return true; +} + +TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1) +TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2) + /* *** SVE Integer Wide Immediate - Unpredicated Group */ @@ -3385,8 +3739,8 @@ DO_ZZI(UMIN, umin) #undef DO_ZZI static gen_helper_gvec_4 * const dot_fns[2][2] = { - { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, - { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } + { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h }, + { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h } }; TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) @@ -3395,19 +3749,24 @@ TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, * SVE Multiply - Indexed */ -TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sdot_idx_b, a) -TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sdot_idx_h, a) -TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_udot_idx_b, a) -TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_udot_idx_h, a) - -TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sudot_idx_b, a) -TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_usdot_idx_b, a) +TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_4b, a) +TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_4h, a) +TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_4b, a) +TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_4h, a) + +TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sudot_idx_4b, a) +TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_usdot_idx_4b, a) + +TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_2h, a) +TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_2h, a) #define DO_SVE2_RRX(NAME, FUNC) \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ @@ -3524,31 +3883,38 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) *** SVE Floating Point Multiply-Add Indexed Group */ +static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { - NULL, gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h, gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d }; -TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz]) static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { - { NULL, NULL }, + { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx }, { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, }; -TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmls_idx_fns[a->esz][s->fpcr_ah], - a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a, + fmls_idx_fns[a->esz][s->fpcr_ah]) /* *** SVE Floating Point Multiply Indexed Group */ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { - NULL, gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h, gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, }; TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, @@ -3621,6 +3987,54 @@ DO_VPZ_AH(FMAXV, fmaxv) #undef DO_VPZ +static gen_helper_gvec_3_ptr * const faddqv_fns[4] = { + NULL, gen_helper_sve2p1_faddqv_h, + gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d, +}; +TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + faddqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = { + NULL, gen_helper_sve2p1_fmaxnmqv_h, + gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d, +}; +TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + fmaxnmqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = { + NULL, gen_helper_sve2p1_fminnmqv_h, + gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d, +}; +TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + fminnmqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = { + NULL, gen_helper_sve2p1_fmaxqv_h, + gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d, +}; +static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = { + NULL, gen_helper_sve2p1_ah_fmaxqv_h, + gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d, +}; +TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + (s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fminqv_fns[4] = { + NULL, gen_helper_sve2p1_fminqv_h, + gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d, +}; +static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = { + NULL, gen_helper_sve2p1_ah_fminqv_h, + gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d, +}; +TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + (s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + /* *** SVE Floating Point Unary Operations - Unpredicated Group */ @@ -3747,7 +4161,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) #define DO_FP3(NAME, name) \ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ - NULL, gen_helper_gvec_##name##_h, \ + gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ }; \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) @@ -3803,13 +4217,34 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ name##_zpzz_fns[a->esz], a) -DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) -DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) -DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) -DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) -DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) -DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) -DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) +/* Similar, but for insns where sz == 0 encodes bfloat16 */ +#define DO_ZPZZ_FP_B16(NAME, FEAT, name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) + +#define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ + gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \ + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ + name##_zpzz_fns[a->esz], a) + +DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd) +DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub) +DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul) +DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) +DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) +DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum) +DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) @@ -3940,19 +4375,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + #define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ - NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ - s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ - a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) @@ -4143,7 +4587,7 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, */ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, - int len, int rn, int imm) + int len, int rn, int imm, MemOp align) { int len_align = QEMU_ALIGN_DOWN(len, 16); int len_remain = len % 16; @@ -4172,12 +4616,15 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, for (i = 0; i < len_align; i += 16) { tcg_gen_qemu_ld_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_extr_i128_i64(t0, t1, t16); tcg_gen_st_i64(t0, base, vofs + i); tcg_gen_st_i64(t1, base, vofs + i + 8); tcg_gen_addi_i64(clean_addr, clean_addr, 16); } + if (len_align) { + align = MO_UNALN; + } } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_temp_new_ptr(); @@ -4187,7 +4634,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, t16 = tcg_temp_new_i128(); tcg_gen_qemu_ld_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 16); tp = tcg_temp_new_ptr(); @@ -4202,6 +4649,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_st_i64(t1, tp, vofs + 8); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); + align = MO_UNALN; } /* @@ -4210,7 +4658,9 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, */ if (len_remain >= 8) { t0 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LEUQ | MO_ATOM_NONE | align); + align = MO_UNALN; tcg_gen_st_i64(t0, base, vofs + len_align); len_remain -= 8; len_align += 8; @@ -4225,12 +4675,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_ld_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain) | MO_ATOM_NONE); + MO_LE | ctz32(len_remain) + | MO_ATOM_NONE | align); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LEUL | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); @@ -4245,7 +4697,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, /* Similarly for stores. */ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, - int len, int rn, int imm) + int len, int rn, int imm, MemOp align) { int len_align = QEMU_ALIGN_DOWN(len, 16); int len_remain = len % 16; @@ -4277,9 +4729,12 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_ld_i64(t1, base, vofs + i + 8); tcg_gen_concat_i64_i128(t16, t0, t1); tcg_gen_qemu_st_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 16); } + if (len_align) { + align = MO_UNALN; + } } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_temp_new_ptr(); @@ -4303,13 +4758,16 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_addi_i64(clean_addr, clean_addr, 16); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); + align = MO_UNALN; } /* Predicate register stores can be any multiple of 2. */ if (len_remain >= 8) { t0 = tcg_temp_new_i64(); tcg_gen_ld_i64(t0, base, vofs + len_align); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LEUQ | MO_ATOM_NONE | align); + align = MO_UNALN; len_remain -= 8; len_align += 8; if (len_remain) { @@ -4325,11 +4783,13 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_st_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain) | MO_ATOM_NONE); + MO_LE | ctz32(len_remain) + | MO_ATOM_NONE | align); break; case 6: - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LEUL | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); @@ -4349,7 +4809,8 @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } return true; } @@ -4362,7 +4823,8 @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_2 : MO_UNALN); } return true; } @@ -4375,7 +4837,8 @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } return true; } @@ -4388,7 +4851,8 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_2 : MO_UNALN); } return true; } @@ -4398,34 +4862,37 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) */ /* The memory mode of the dtype. */ -static const MemOp dtype_mop[16] = { +static const MemOp dtype_mop[19] = { MO_UB, MO_UB, MO_UB, MO_UB, MO_SL, MO_UW, MO_UW, MO_UW, MO_SW, MO_SW, MO_UL, MO_UL, - MO_SB, MO_SB, MO_SB, MO_UQ + MO_SB, MO_SB, MO_SB, MO_UQ, + /* Artificial values used by decode */ + MO_UL, MO_UQ, MO_128, }; #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) /* The vector element size of dtype. */ -static const uint8_t dtype_esz[16] = { +static const uint8_t dtype_esz[19] = { 0, 1, 2, 3, 3, 1, 2, 3, 3, 2, 2, 3, - 3, 2, 1, 3 + 3, 2, 1, 3, + /* Artificial values used by decode */ + 4, 4, 4, }; -uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, +uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, uint32_t msz, bool is_write, uint32_t data) { uint32_t sizem1; - uint32_t desc = 0; + uint64_t desc = 0; /* Assert all of the data fits, with or without MTE enabled. */ assert(nregs >= 1 && nregs <= 4); sizem1 = (nregs << msz) - 1; assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); - assert(data < 1u << SVE_MTEDESC_SHIFT); if (s->mte_active[0]) { desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); @@ -4433,9 +4900,9 @@ uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); - desc <<= SVE_MTEDESC_SHIFT; + desc <<= 32; } - return simd_desc(vsz, vsz, desc | data); + return simd_desc(vsz, vsz, data) | desc; } static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -4443,7 +4910,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_gvec_mem *fn) { TCGv_ptr t_pg; - uint32_t desc; + uint64_t desc; if (!s->mte_active[0]) { addr = clean_data_tbi(s, addr); @@ -4459,11 +4926,11 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); } /* Indexed by [mte][be][dtype][nreg] */ -static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { +static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = { { /* mte inactive, little-endian */ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, @@ -4487,7 +4954,13 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r }, + + { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_le_r, + gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r }, + }, /* mte inactive, big-endian */ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, @@ -4512,7 +4985,14 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r }, + + { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_be_r, + gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r }, + }, + }, { /* mte active, little-endian */ { { gen_helper_sve_ld1bb_r_mte, @@ -4545,7 +5025,15 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1dd_le_r_mte, gen_helper_sve_ld2dd_le_r_mte, gen_helper_sve_ld3dd_le_r_mte, - gen_helper_sve_ld4dd_le_r_mte } }, + gen_helper_sve_ld4dd_le_r_mte }, + + { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_le_r_mte, + gen_helper_sve_ld3qq_le_r_mte, + gen_helper_sve_ld4qq_le_r_mte }, + }, /* mte active, big-endian */ { { gen_helper_sve_ld1bb_r_mte, @@ -4578,7 +5066,16 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1dd_be_r_mte, gen_helper_sve_ld2dd_be_r_mte, gen_helper_sve_ld3dd_be_r_mte, - gen_helper_sve_ld4dd_be_r_mte } } }, + gen_helper_sve_ld4dd_be_r_mte }, + + { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_be_r_mte, + gen_helper_sve_ld3qq_be_r_mte, + gen_helper_sve_ld4qq_be_r_mte }, + }, + }, }; static void do_ld_zpa(DisasContext *s, int zt, int pg, @@ -4597,9 +5094,32 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg, static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) { - if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { + if (a->rm == 31) { return false; } + + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case 16: case 17: + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); @@ -4611,9 +5131,28 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case 16: case 17: + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); } + if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> dtype_esz[a->dtype]; @@ -4839,7 +5378,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; int poff; - uint32_t desc; + uint64_t desc; /* Load the first quadword using the normal predicated load helpers. */ if (!s->mte_active[0]) { @@ -4870,7 +5409,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); /* Replicate that first quadword. */ if (vsz > 16) { @@ -4913,7 +5452,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz_r32; TCGv_ptr t_pg; int poff, doff; - uint32_t desc; + uint64_t desc; if (vsz < 32) { /* @@ -4954,7 +5493,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); /* * Replicate that first octaword. @@ -5060,7 +5599,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + static gen_helper_gvec_mem * const fn_single[2][2][4][5] = { { { { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, gen_helper_sve_st1bs_r, @@ -5071,9 +5610,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_le_r }, { NULL, NULL, gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, + gen_helper_sve_st1sd_le_r, + gen_helper_sve_st1sq_le_r, }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, + gen_helper_sve_st1dd_le_r, + gen_helper_sve_st1dq_le_r, } }, { { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, gen_helper_sve_st1bs_r, @@ -5084,9 +5625,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_be_r }, { NULL, NULL, gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, + gen_helper_sve_st1sd_be_r, + gen_helper_sve_st1sq_be_r }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } } }, + gen_helper_sve_st1dd_be_r, + gen_helper_sve_st1dq_be_r } } }, { { { gen_helper_sve_st1bb_r_mte, gen_helper_sve_st1bh_r_mte, @@ -5098,9 +5641,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_le_r_mte }, { NULL, NULL, gen_helper_sve_st1ss_le_r_mte, - gen_helper_sve_st1sd_le_r_mte }, + gen_helper_sve_st1sd_le_r_mte, + gen_helper_sve_st1sq_le_r_mte }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r_mte } }, + gen_helper_sve_st1dd_le_r_mte, + gen_helper_sve_st1dq_le_r_mte } }, { { gen_helper_sve_st1bb_r_mte, gen_helper_sve_st1bh_r_mte, gen_helper_sve_st1bs_r_mte, @@ -5111,59 +5656,73 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_be_r_mte }, { NULL, NULL, gen_helper_sve_st1ss_be_r_mte, - gen_helper_sve_st1sd_be_r_mte }, + gen_helper_sve_st1sd_be_r_mte, + gen_helper_sve_st1sq_be_r_mte }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r_mte } } }, + gen_helper_sve_st1dd_be_r_mte, + gen_helper_sve_st1dq_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = { { { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_le_r, gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, + gen_helper_sve_st2dd_le_r, + gen_helper_sve_st2qq_le_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_le_r, gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, + gen_helper_sve_st3dd_le_r, + gen_helper_sve_st3qq_le_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_le_r, gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, + gen_helper_sve_st4dd_le_r, + gen_helper_sve_st4qq_le_r } }, { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_be_r, gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, + gen_helper_sve_st2dd_be_r, + gen_helper_sve_st2qq_be_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_be_r, gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, + gen_helper_sve_st3dd_be_r, + gen_helper_sve_st3qq_be_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_be_r, gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } } }, + gen_helper_sve_st4dd_be_r, + gen_helper_sve_st4qq_be_r } } }, { { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_le_r_mte, gen_helper_sve_st2ss_le_r_mte, - gen_helper_sve_st2dd_le_r_mte }, + gen_helper_sve_st2dd_le_r_mte, + gen_helper_sve_st2qq_le_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_le_r_mte, gen_helper_sve_st3ss_le_r_mte, - gen_helper_sve_st3dd_le_r_mte }, + gen_helper_sve_st3dd_le_r_mte, + gen_helper_sve_st3qq_le_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_le_r_mte, gen_helper_sve_st4ss_le_r_mte, - gen_helper_sve_st4dd_le_r_mte } }, + gen_helper_sve_st4dd_le_r_mte, + gen_helper_sve_st4qq_le_r_mte } }, { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_be_r_mte, gen_helper_sve_st2ss_be_r_mte, - gen_helper_sve_st2dd_be_r_mte }, + gen_helper_sve_st2dd_be_r_mte, + gen_helper_sve_st2qq_be_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_be_r_mte, gen_helper_sve_st3ss_be_r_mte, - gen_helper_sve_st3dd_be_r_mte }, + gen_helper_sve_st3dd_be_r_mte, + gen_helper_sve_st3qq_be_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_be_r_mte, gen_helper_sve_st4ss_be_r_mte, - gen_helper_sve_st4dd_be_r_mte } } }, + gen_helper_sve_st4dd_be_r_mte, + gen_helper_sve_st4qq_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; @@ -5182,12 +5741,32 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; - } if (a->rm == 31 || a->msz > a->esz) { return false; } + switch (a->esz) { + case MO_8 ... MO_64: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case MO_128: + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); @@ -5199,12 +5778,32 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; - } if (a->msz > a->esz) { return false; } + switch (a->esz) { + case MO_8 ... MO_64: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case MO_128: + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> a->esz; @@ -5228,14 +5827,14 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_zm = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); - uint32_t desc; + uint64_t desc; tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); - fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); + fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i64(desc)); } /* Indexed by [mte][be][ff][xs][u][msz]. */ @@ -5566,6 +6165,14 @@ gather_load_fn64[2][2][2][3][2][4] = { gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; +static gen_helper_gvec_mem_scatter * const +gather_load_fn128[2][2] = { + { gen_helper_sve_ldqq_le_zd, + gen_helper_sve_ldqq_be_zd }, + { gen_helper_sve_ldqq_le_zd_mte, + gen_helper_sve_ldqq_be_zd_mte } +}; + static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5587,6 +6194,8 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) case MO_64: fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; + default: + g_assert_not_reached(); } assert(fn != NULL); @@ -5595,6 +6204,32 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) return true; } +static bool trans_LD1Q(DisasContext *s, arg_LD1Q *a) +{ + gen_helper_gvec_mem_scatter *fn = NULL; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + + fn = gather_load_fn128[mte][be]; + assert(fn != NULL); + + /* + * Unlike LD1_zprz, a->rm is the scalar register and it can be XZR, not XSP. + * a->rn is the vector register. + */ + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), MO_128, false, fn); + return true; +} + static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5754,6 +6389,14 @@ static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { gen_helper_sve_stdd_be_zd_mte, } } }, }; +static gen_helper_gvec_mem_scatter * const +scatter_store_fn128[2][2] = { + { gen_helper_sve_stqq_le_zd, + gen_helper_sve_stqq_be_zd }, + { gen_helper_sve_stqq_le_zd_mte, + gen_helper_sve_stqq_be_zd_mte } +}; + static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn; @@ -5785,6 +6428,29 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) return true; } +static bool trans_ST1Q(DisasContext *s, arg_ST1Q *a) +{ + gen_helper_gvec_mem_scatter *fn; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + fn = scatter_store_fn128[mte][be]; + /* + * Unlike ST1_zprz, a->rm is the scalar register, and it + * can be XZR, not XSP. a->rn is the vector register. + */ + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), MO_128, true, fn); + return true; +} + static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5911,6 +6577,7 @@ TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) */ TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) +TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a) static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, @@ -5929,13 +6596,6 @@ TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, gen_helper_gvec_pmul_b, a, 0) -static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { - gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, - gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, -}; -TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, - sqdmulh_zzz_fns[a->esz], a, 0) - static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, @@ -7008,17 +7668,26 @@ DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) +static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + if (sve_access_check(s)) { + if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) { + unallocated_encoding(s); + } else { + gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64); + } + } + return true; +} + +TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s) +TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d) + /* * SVE Integer Multiply-Add (unpredicated) */ -TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, - gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, - 0, FPST_A64) -TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, - gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, - 0, FPST_A64) - static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { NULL, gen_helper_sve2_sqdmlal_zzzw_h, gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, @@ -7111,8 +7780,13 @@ static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) -TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, - a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) +TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_usdot_4b, a, 0) + +TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_sdot_2h, a, 0) +TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_udot_2h, a, 0) TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, gen_helper_crypto_aesmc, a->rd, a->rd, 0) @@ -7174,7 +7848,7 @@ static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) { return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, a->rd, a->rn, a->rm, a->ra, - (a->index << 2) | (sel << 1) | sub, tcg_env); + (a->index << 3) | (sel << 1) | sub, tcg_env); } TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) @@ -7189,6 +7863,11 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, gen_helper_gvec_ummla_b, a, 0) +TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz, + gen_helper_sme2_fdot_h, a, 0) +TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz, + gen_helper_sme2_fdot_idx_h, a) + TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, gen_helper_gvec_bfdot, a, 0) TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, @@ -7218,6 +7897,36 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) +static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) +{ + if (s->fpcr_ah) { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl, + a->rd, a->rn, a->rm, a->ra, sel, FPST_AH); + } else { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl, + a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); + } +} + +TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false) +TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true) + +static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) +{ + if (s->fpcr_ah) { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx, + a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sel, FPST_AH); + } else { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx, + a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sel, FPST_A64); + } +} + +TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false) +TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true) + static bool trans_PSEL(DisasContext *s, arg_psel *a) { int vl = vec_full_reg_size(s); @@ -7226,7 +7935,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) TCGv_i64 tmp, didx, dbit; TCGv_ptr ptr; - if (!dc_isar_feature(aa64_sme, s)) { + if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) { return false; } if (!sve_access_check(s)) { @@ -7265,6 +7974,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) tcg_gen_neg_i64(tmp, tmp); /* Apply to either copy the source, or write zeros. */ + pl = size_for_gvec(pl); tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), pred_full_reg_offset(s, a->pn), tmp, pl, pl); return true; @@ -7319,7 +8029,7 @@ static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) +TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a) static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) { @@ -7370,4 +8080,137 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) +TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) + +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) +{ + static gen_helper_gvec_3_ptr * const fn[] = { + gen_helper_sme2_bfclamp, + gen_helper_sme2_fclamp_h, + gen_helper_sme2_fclamp_s, + gen_helper_sme2_fclamp_d, + }; + + /* This insn uses MO_8 to encode BFloat16. */ + if (a->esz == MO_8 + ? !dc_isar_feature(aa64_sve_b16b16, s) + : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) { + return false; + } + + /* So far we never optimize rda with MOVPRFX */ + assert(a->rd == a->ra); + return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + +TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0) +TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0) +TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0) + +static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png, + MemOp esz, bool is_write, int n, bool strided) +{ + typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64, + TCGv_i32, TCGv_i64); + static ldst_c_fn * const f_ldst[2][2][4] = { + { { gen_helper_sve2p1_ld1bb_c, + gen_helper_sve2p1_ld1hh_le_c, + gen_helper_sve2p1_ld1ss_le_c, + gen_helper_sve2p1_ld1dd_le_c, }, + { gen_helper_sve2p1_ld1bb_c, + gen_helper_sve2p1_ld1hh_be_c, + gen_helper_sve2p1_ld1ss_be_c, + gen_helper_sve2p1_ld1dd_be_c, } }, + + { { gen_helper_sve2p1_st1bb_c, + gen_helper_sve2p1_st1hh_le_c, + gen_helper_sve2p1_st1ss_le_c, + gen_helper_sve2p1_st1dd_le_c, }, + { gen_helper_sve2p1_st1bb_c, + gen_helper_sve2p1_st1hh_be_c, + gen_helper_sve2p1_st1ss_be_c, + gen_helper_sve2p1_st1dd_be_c, } } + }; + + TCGv_i32 t_png; + TCGv_i64 t_desc; + TCGv_ptr t_zd; + uint64_t desc, lg2_rstride = 0; + bool be = s->be_data == MO_BE; + + assert(n == 2 || n == 4); + if (strided) { + lg2_rstride = 3; + if (n == 4) { + /* Validate ZD alignment. */ + if (zd & 4) { + return false; + } + lg2_rstride = 2; + } + /* Ignore non-temporal bit */ + zd &= ~8; + } + + if (strided || !dc_isar_feature(aa64_sve2p1, s) + ? !sme_sm_enabled_check(s) + : !sve_access_check(s)) { + return true; + } + + if (!s->mte_active[0]) { + addr = clean_data_tbi(s, addr); + } + + desc = n == 2 ? 0 : 1; + desc = desc | (lg2_rstride << 1); + desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc); + t_desc = tcg_constant_i64(desc); + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, png) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + t_zd = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); + + f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc); + return true; +} + +static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a, + bool is_write, bool strided) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); + tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); + return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, + a->nreg, strided); +} + +static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a, + bool is_write, bool strided) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), + a->imm * a->nreg * vec_full_reg_size(s)); + return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, + a->nreg, strided); +} + +TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false) +TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false) +TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false) +TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false) + +TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true) +TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true) +TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true) +TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true) diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index d280018..3df0bbc 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -27,6 +27,7 @@ #include "semihosting/semihost.h" #include "cpregs.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" #define HELPER_H "helper.h" #include "exec/helper-info.c.inc" @@ -43,8 +44,6 @@ #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7) #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8) -/* These are TCG temporaries used only by the legacy iwMMXt decoder */ -static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; /* These are TCG globals which alias CPUARMState fields */ static TCGv_i32 cpu_R[16]; TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; @@ -371,7 +370,7 @@ static void gen_rebuild_hflags(DisasContext *s, bool new_el) } } -static void gen_exception_internal(int excp) +void gen_exception_internal(int excp) { assert(excp_is_internal(excp)); gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); @@ -493,20 +492,9 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp = tcg_temp_new_i32(); - if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_movi_i32(tmp, 0); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp); - } else { - TCGv_i64 q0 = tcg_temp_new_i64(); - TCGv_i64 q1 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(q0, t0); - tcg_gen_extu_i32_i64(q1, t1); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extu_i32_i64(q1, cpu_CF); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); - } + + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0, t1, cpu_CF); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); @@ -1100,6 +1088,57 @@ void gen_exception_insn(DisasContext *s, target_long pc_diff, s->base.is_jmp = DISAS_NORETURN; } +TCGLabel *delay_exception_el(DisasContext *s, int excp, + uint32_t syn, uint32_t target_el) +{ + /* Use tcg_malloc for automatic release on longjmp out of translation. */ + DisasDelayException *e = tcg_malloc(sizeof(DisasDelayException)); + + memset(e, 0, sizeof(*e)); + + /* Save enough of the current state to satisfy gen_exception_insn. */ + e->pc_curr = s->pc_curr; + e->pc_save = s->pc_save; + if (!s->aarch64) { + e->condexec_cond = s->condexec_cond; + e->condexec_mask = s->condexec_mask; + } + + e->excp = excp; + e->syn = syn; + e->target_el = target_el; + + e->next = s->delay_excp_list; + s->delay_excp_list = e; + + e->lab = gen_new_label(); + return e->lab; +} + +TCGLabel *delay_exception(DisasContext *s, int excp, uint32_t syn) +{ + return delay_exception_el(s, excp, syn, 0); +} + +void emit_delayed_exceptions(DisasContext *s) +{ + for (DisasDelayException *e = s->delay_excp_list; e ; e = e->next) { + gen_set_label(e->lab); + + /* Restore the insn state to satisfy gen_exception_insn. */ + s->pc_curr = e->pc_curr; + s->pc_save = e->pc_save; + s->condexec_cond = e->condexec_cond; + s->condexec_mask = e->condexec_mask; + + if (e->target_el) { + gen_exception_insn_el(s, 0, e->excp, e->syn, e->target_el); + } else { + gen_exception_insn(s, 0, e->excp, e->syn); + } + } +} + static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn) { gen_set_condexec(s); @@ -1262,1263 +1301,6 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop) } } -#define ARM_CP_RW_BIT (1 << 20) - -static inline void iwmmxt_load_reg(TCGv_i64 var, int reg) -{ - tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); -} - -static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) -{ - tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); -} - -static inline TCGv_i32 iwmmxt_load_creg(int reg) -{ - TCGv_i32 var = tcg_temp_new_i32(); - tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); - return var; -} - -static inline void iwmmxt_store_creg(int reg, TCGv_i32 var) -{ - tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); -} - -static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) -{ - iwmmxt_store_reg(cpu_M0, rn); -} - -static inline void gen_op_iwmmxt_movq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_M0, rn); -} - -static inline void gen_op_iwmmxt_orq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline void gen_op_iwmmxt_andq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1); -} - -#define IWMMXT_OP(name) \ -static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ -{ \ - iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \ -} - -#define IWMMXT_OP_ENV(name) \ -static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ -{ \ - iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \ -} - -#define IWMMXT_OP_ENV_SIZE(name) \ -IWMMXT_OP_ENV(name##b) \ -IWMMXT_OP_ENV(name##w) \ -IWMMXT_OP_ENV(name##l) - -#define IWMMXT_OP_ENV1(name) \ -static inline void gen_op_iwmmxt_##name##_M0(void) \ -{ \ - gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \ -} - -IWMMXT_OP(maddsq) -IWMMXT_OP(madduq) -IWMMXT_OP(sadb) -IWMMXT_OP(sadw) -IWMMXT_OP(mulslw) -IWMMXT_OP(mulshw) -IWMMXT_OP(mululw) -IWMMXT_OP(muluhw) -IWMMXT_OP(macsw) -IWMMXT_OP(macuw) - -IWMMXT_OP_ENV_SIZE(unpackl) -IWMMXT_OP_ENV_SIZE(unpackh) - -IWMMXT_OP_ENV1(unpacklub) -IWMMXT_OP_ENV1(unpackluw) -IWMMXT_OP_ENV1(unpacklul) -IWMMXT_OP_ENV1(unpackhub) -IWMMXT_OP_ENV1(unpackhuw) -IWMMXT_OP_ENV1(unpackhul) -IWMMXT_OP_ENV1(unpacklsb) -IWMMXT_OP_ENV1(unpacklsw) -IWMMXT_OP_ENV1(unpacklsl) -IWMMXT_OP_ENV1(unpackhsb) -IWMMXT_OP_ENV1(unpackhsw) -IWMMXT_OP_ENV1(unpackhsl) - -IWMMXT_OP_ENV_SIZE(cmpeq) -IWMMXT_OP_ENV_SIZE(cmpgtu) -IWMMXT_OP_ENV_SIZE(cmpgts) - -IWMMXT_OP_ENV_SIZE(mins) -IWMMXT_OP_ENV_SIZE(minu) -IWMMXT_OP_ENV_SIZE(maxs) -IWMMXT_OP_ENV_SIZE(maxu) - -IWMMXT_OP_ENV_SIZE(subn) -IWMMXT_OP_ENV_SIZE(addn) -IWMMXT_OP_ENV_SIZE(subu) -IWMMXT_OP_ENV_SIZE(addu) -IWMMXT_OP_ENV_SIZE(subs) -IWMMXT_OP_ENV_SIZE(adds) - -IWMMXT_OP_ENV(avgb0) -IWMMXT_OP_ENV(avgb1) -IWMMXT_OP_ENV(avgw0) -IWMMXT_OP_ENV(avgw1) - -IWMMXT_OP_ENV(packuw) -IWMMXT_OP_ENV(packul) -IWMMXT_OP_ENV(packuq) -IWMMXT_OP_ENV(packsw) -IWMMXT_OP_ENV(packsl) -IWMMXT_OP_ENV(packsq) - -static void gen_op_iwmmxt_set_mup(void) -{ - TCGv_i32 tmp; - tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); - tcg_gen_ori_i32(tmp, tmp, 2); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); -} - -static void gen_op_iwmmxt_set_cup(void) -{ - TCGv_i32 tmp; - tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); - tcg_gen_ori_i32(tmp, tmp, 1); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); -} - -static void gen_op_iwmmxt_setpsr_nz(void) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]); -} - -static inline void gen_op_iwmmxt_addl_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_ext32u_i64(cpu_V1, cpu_V1); - tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, - TCGv_i32 dest) -{ - int rd; - uint32_t offset; - TCGv_i32 tmp; - - rd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - - offset = (insn & 0xff) << ((insn >> 7) & 2); - if (insn & (1 << 24)) { - /* Pre indexed */ - if (insn & (1 << 23)) - tcg_gen_addi_i32(tmp, tmp, offset); - else - tcg_gen_addi_i32(tmp, tmp, -offset); - tcg_gen_mov_i32(dest, tmp); - if (insn & (1 << 21)) { - store_reg(s, rd, tmp); - } - } else if (insn & (1 << 21)) { - /* Post indexed */ - tcg_gen_mov_i32(dest, tmp); - if (insn & (1 << 23)) - tcg_gen_addi_i32(tmp, tmp, offset); - else - tcg_gen_addi_i32(tmp, tmp, -offset); - store_reg(s, rd, tmp); - } else if (!(insn & (1 << 23))) - return 1; - return 0; -} - -static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest) -{ - int rd = (insn >> 0) & 0xf; - TCGv_i32 tmp; - - if (insn & (1 << 8)) { - if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) { - return 1; - } else { - tmp = iwmmxt_load_creg(rd); - } - } else { - tmp = tcg_temp_new_i32(); - iwmmxt_load_reg(cpu_V0, rd); - tcg_gen_extrl_i64_i32(tmp, cpu_V0); - } - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_mov_i32(dest, tmp); - return 0; -} - -/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred - (ie. an undefined instruction). */ -static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) -{ - int rd, wrd; - int rdhi, rdlo, rd0, rd1, i; - TCGv_i32 addr; - TCGv_i32 tmp, tmp2, tmp3; - - if ((insn & 0x0e000e00) == 0x0c000000) { - if ((insn & 0x0fe00ff0) == 0x0c400000) { - wrd = insn & 0xf; - rdlo = (insn >> 12) & 0xf; - rdhi = (insn >> 16) & 0xf; - if (insn & ARM_CP_RW_BIT) { /* TMRRC */ - iwmmxt_load_reg(cpu_V0, wrd); - tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); - tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0); - } else { /* TMCRR */ - tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); - iwmmxt_store_reg(cpu_V0, wrd); - gen_op_iwmmxt_set_mup(); - } - return 0; - } - - wrd = (insn >> 12) & 0xf; - addr = tcg_temp_new_i32(); - if (gen_iwmmxt_address(s, insn, addr)) { - return 1; - } - if (insn & ARM_CP_RW_BIT) { - if ((insn >> 28) == 0xf) { /* WLDRW wCx */ - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - iwmmxt_store_creg(wrd, tmp); - } else { - i = 1; - if (insn & (1 << 8)) { - if (insn & (1 << 22)) { /* WLDRD */ - gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s)); - i = 0; - } else { /* WLDRW wRd */ - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - } - } else { - tmp = tcg_temp_new_i32(); - if (insn & (1 << 22)) { /* WLDRH */ - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - } else { /* WLDRB */ - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - } - } - if (i) { - tcg_gen_extu_i32_i64(cpu_M0, tmp); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - } - } else { - if ((insn >> 28) == 0xf) { /* WSTRW wCx */ - tmp = iwmmxt_load_creg(wrd); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - } else { - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = tcg_temp_new_i32(); - if (insn & (1 << 8)) { - if (insn & (1 << 22)) { /* WSTRD */ - gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s)); - } else { /* WSTRW wRd */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - } - } else { - if (insn & (1 << 22)) { /* WSTRH */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - } else { /* WSTRB */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - } - } - } - } - return 0; - } - - if ((insn & 0x0f000000) != 0x0e000000) - return 1; - - switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) { - case 0x000: /* WOR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_orq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x011: /* TMCR */ - if (insn & 0xf) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - switch (wrd) { - case ARM_IWMMXT_wCID: - case ARM_IWMMXT_wCASF: - break; - case ARM_IWMMXT_wCon: - gen_op_iwmmxt_set_cup(); - /* Fall through. */ - case ARM_IWMMXT_wCSSF: - tmp = iwmmxt_load_creg(wrd); - tmp2 = load_reg(s, rd); - tcg_gen_andc_i32(tmp, tmp, tmp2); - iwmmxt_store_creg(wrd, tmp); - break; - case ARM_IWMMXT_wCGR0: - case ARM_IWMMXT_wCGR1: - case ARM_IWMMXT_wCGR2: - case ARM_IWMMXT_wCGR3: - gen_op_iwmmxt_set_cup(); - tmp = load_reg(s, rd); - iwmmxt_store_creg(wrd, tmp); - break; - default: - return 1; - } - break; - case 0x100: /* WXOR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_xorq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x111: /* TMRC */ - if (insn & 0xf) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = iwmmxt_load_creg(wrd); - store_reg(s, rd, tmp); - break; - case 0x300: /* WANDN */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tcg_gen_neg_i64(cpu_M0, cpu_M0); - gen_op_iwmmxt_andq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x200: /* WAND */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_andq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x810: case 0xa10: /* WMADD */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) - gen_op_iwmmxt_maddsq_M0_wRn(rd1); - else - gen_op_iwmmxt_madduq_M0_wRn(rd1); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_unpacklb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_unpacklw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_unpackll_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_unpackhb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_unpackhw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_unpackhl_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 22)) - gen_op_iwmmxt_sadw_M0_wRn(rd1); - else - gen_op_iwmmxt_sadb_M0_wRn(rd1); - if (!(insn & (1 << 20))) - gen_op_iwmmxt_addl_M0_wRn(wrd); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) { - if (insn & (1 << 20)) - gen_op_iwmmxt_mulshw_M0_wRn(rd1); - else - gen_op_iwmmxt_mulslw_M0_wRn(rd1); - } else { - if (insn & (1 << 20)) - gen_op_iwmmxt_muluhw_M0_wRn(rd1); - else - gen_op_iwmmxt_mululw_M0_wRn(rd1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) - gen_op_iwmmxt_macsw_M0_wRn(rd1); - else - gen_op_iwmmxt_macuw_M0_wRn(rd1); - if (!(insn & (1 << 20))) { - iwmmxt_load_reg(cpu_V1, wrd); - tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_cmpeqb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_cmpeqw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_cmpeql_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 22)) { - if (insn & (1 << 20)) - gen_op_iwmmxt_avgw1_M0_wRn(rd1); - else - gen_op_iwmmxt_avgw0_M0_wRn(rd1); - } else { - if (insn & (1 << 20)) - gen_op_iwmmxt_avgb1_M0_wRn(rd1); - else - gen_op_iwmmxt_avgb0_M0_wRn(rd1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3)); - tcg_gen_andi_i32(tmp, tmp, 7); - iwmmxt_load_reg(cpu_V1, rd1); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */ - if (((insn >> 6) & 3) == 3) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - gen_op_iwmmxt_movq_M0_wRn(wrd); - switch ((insn >> 6) & 3) { - case 0: - tmp2 = tcg_constant_i32(0xff); - tmp3 = tcg_constant_i32((insn & 7) << 3); - break; - case 1: - tmp2 = tcg_constant_i32(0xffff); - tmp3 = tcg_constant_i32((insn & 3) << 4); - break; - case 2: - tmp2 = tcg_constant_i32(0xffffffff); - tmp3 = tcg_constant_i32((insn & 1) << 5); - break; - default: - g_assert_not_reached(); - } - gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */ - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - if (rd == 15 || ((insn >> 22) & 3) == 3) - return 1; - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 0: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - if (insn & 8) { - tcg_gen_ext8s_i32(tmp, tmp); - } else { - tcg_gen_andi_i32(tmp, tmp, 0xff); - } - break; - case 1: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - if (insn & 8) { - tcg_gen_ext16s_i32(tmp, tmp); - } else { - tcg_gen_andi_i32(tmp, tmp, 0xffff); - } - break; - case 2: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - break; - } - store_reg(s, rd, tmp); - break; - case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */ - if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - switch ((insn >> 22) & 3) { - case 0: - tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0); - break; - case 1: - tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4); - break; - case 2: - tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12); - break; - } - tcg_gen_shli_i32(tmp, tmp, 28); - gen_set_nzcv(tmp); - break; - case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ - if (((insn >> 6) & 3) == 3) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - switch ((insn >> 6) & 3) { - case 0: - gen_helper_iwmmxt_bcstb(cpu_M0, tmp); - break; - case 1: - gen_helper_iwmmxt_bcstw(cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_bcstl(cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */ - if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - switch ((insn >> 22) & 3) { - case 0: - for (i = 0; i < 7; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 4); - tcg_gen_and_i32(tmp, tmp, tmp2); - } - break; - case 1: - for (i = 0; i < 3; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 8); - tcg_gen_and_i32(tmp, tmp, tmp2); - } - break; - case 2: - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - } - gen_set_nzcv(tmp); - break; - case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0); - break; - case 1: - gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0); - break; - case 2: - gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */ - if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - switch ((insn >> 22) & 3) { - case 0: - for (i = 0; i < 7; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 4); - tcg_gen_or_i32(tmp, tmp, tmp2); - } - break; - case 1: - for (i = 0; i < 3; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 8); - tcg_gen_or_i32(tmp, tmp, tmp2); - } - break; - case 2: - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - } - gen_set_nzcv(tmp); - break; - case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ - rd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3) - return 1; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 0: - gen_helper_iwmmxt_msbb(tmp, cpu_M0); - break; - case 1: - gen_helper_iwmmxt_msbw(tmp, cpu_M0); - break; - case 2: - gen_helper_iwmmxt_msbl(tmp, cpu_M0); - break; - } - store_reg(s, rd, tmp); - break; - case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */ - case 0x906: case 0xb06: case 0xd06: case 0xf06: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */ - case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsb_M0(); - else - gen_op_iwmmxt_unpacklub_M0(); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsw_M0(); - else - gen_op_iwmmxt_unpackluw_M0(); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsl_M0(); - else - gen_op_iwmmxt_unpacklul_M0(); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */ - case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsb_M0(); - else - gen_op_iwmmxt_unpackhub_M0(); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsw_M0(); - else - gen_op_iwmmxt_unpackhuw_M0(); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsl_M0(); - else - gen_op_iwmmxt_unpackhul_M0(); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */ - case 0x214: case 0x614: case 0xa14: case 0xe14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */ - case 0x014: case 0x414: case 0x814: case 0xc14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */ - case 0x114: case 0x514: case 0x914: case 0xd14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */ - case 0x314: case 0x714: case 0xb14: case 0xf14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 1: - if (gen_iwmmxt_shift(insn, 0xf, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */ - case 0x916: case 0xb16: case 0xd16: case 0xf16: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsb_M0_wRn(rd1); - else - gen_op_iwmmxt_minub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsw_M0_wRn(rd1); - else - gen_op_iwmmxt_minuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsl_M0_wRn(rd1); - else - gen_op_iwmmxt_minul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */ - case 0x816: case 0xa16: case 0xc16: case 0xe16: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsb_M0_wRn(rd1); - else - gen_op_iwmmxt_maxub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsw_M0_wRn(rd1); - else - gen_op_iwmmxt_maxuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsl_M0_wRn(rd1); - else - gen_op_iwmmxt_maxul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */ - case 0x402: case 0x502: case 0x602: case 0x702: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - iwmmxt_load_reg(cpu_V1, rd1); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, - tcg_constant_i32((insn >> 20) & 3)); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */ - case 0x41a: case 0x51a: case 0x61a: case 0x71a: - case 0x81a: case 0x91a: case 0xa1a: case 0xb1a: - case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 20) & 0xf) { - case 0x0: - gen_op_iwmmxt_subnb_M0_wRn(rd1); - break; - case 0x1: - gen_op_iwmmxt_subub_M0_wRn(rd1); - break; - case 0x3: - gen_op_iwmmxt_subsb_M0_wRn(rd1); - break; - case 0x4: - gen_op_iwmmxt_subnw_M0_wRn(rd1); - break; - case 0x5: - gen_op_iwmmxt_subuw_M0_wRn(rd1); - break; - case 0x7: - gen_op_iwmmxt_subsw_M0_wRn(rd1); - break; - case 0x8: - gen_op_iwmmxt_subnl_M0_wRn(rd1); - break; - case 0x9: - gen_op_iwmmxt_subul_M0_wRn(rd1); - break; - case 0xb: - gen_op_iwmmxt_subsl_M0_wRn(rd1); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */ - case 0x41e: case 0x51e: case 0x61e: case 0x71e: - case 0x81e: case 0x91e: case 0xa1e: case 0xb1e: - case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); - gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */ - case 0x418: case 0x518: case 0x618: case 0x718: - case 0x818: case 0x918: case 0xa18: case 0xb18: - case 0xc18: case 0xd18: case 0xe18: case 0xf18: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 20) & 0xf) { - case 0x0: - gen_op_iwmmxt_addnb_M0_wRn(rd1); - break; - case 0x1: - gen_op_iwmmxt_addub_M0_wRn(rd1); - break; - case 0x3: - gen_op_iwmmxt_addsb_M0_wRn(rd1); - break; - case 0x4: - gen_op_iwmmxt_addnw_M0_wRn(rd1); - break; - case 0x5: - gen_op_iwmmxt_adduw_M0_wRn(rd1); - break; - case 0x7: - gen_op_iwmmxt_addsw_M0_wRn(rd1); - break; - case 0x8: - gen_op_iwmmxt_addnl_M0_wRn(rd1); - break; - case 0x9: - gen_op_iwmmxt_addul_M0_wRn(rd1); - break; - case 0xb: - gen_op_iwmmxt_addsl_M0_wRn(rd1); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */ - case 0x408: case 0x508: case 0x608: case 0x708: - case 0x808: case 0x908: case 0xa08: case 0xb08: - case 0xc08: case 0xd08: case 0xe08: case 0xf08: - if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsw_M0_wRn(rd1); - else - gen_op_iwmmxt_packuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsl_M0_wRn(rd1); - else - gen_op_iwmmxt_packul_M0_wRn(rd1); - break; - case 3: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsq_M0_wRn(rd1); - else - gen_op_iwmmxt_packuq_M0_wRn(rd1); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x201: case 0x203: case 0x205: case 0x207: - case 0x209: case 0x20b: case 0x20d: case 0x20f: - case 0x211: case 0x213: case 0x215: case 0x217: - case 0x219: case 0x21b: case 0x21d: case 0x21f: - wrd = (insn >> 5) & 0xf; - rd0 = (insn >> 12) & 0xf; - rd1 = (insn >> 0) & 0xf; - if (rd0 == 0xf || rd1 == 0xf) - return 1; - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = load_reg(s, rd0); - tmp2 = load_reg(s, rd1); - switch ((insn >> 16) & 0xf) { - case 0x0: /* TMIA */ - gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0x8: /* TMIAPH */ - gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */ - if (insn & (1 << 16)) - tcg_gen_shri_i32(tmp, tmp, 16); - if (insn & (1 << 17)) - tcg_gen_shri_i32(tmp2, tmp2, 16); - gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - default: - return 1; - } - - return 0; -} - -/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred - (ie. an undefined instruction). */ -static int disas_dsp_insn(DisasContext *s, uint32_t insn) -{ - int acc, rd0, rd1, rdhi, rdlo; - TCGv_i32 tmp, tmp2; - - if ((insn & 0x0ff00f10) == 0x0e200010) { - /* Multiply with Internal Accumulate Format */ - rd0 = (insn >> 12) & 0xf; - rd1 = insn & 0xf; - acc = (insn >> 5) & 7; - - if (acc != 0) - return 1; - - tmp = load_reg(s, rd0); - tmp2 = load_reg(s, rd1); - switch ((insn >> 16) & 0xf) { - case 0x0: /* MIA */ - gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0x8: /* MIAPH */ - gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0xc: /* MIABB */ - case 0xd: /* MIABT */ - case 0xe: /* MIATB */ - case 0xf: /* MIATT */ - if (insn & (1 << 16)) - tcg_gen_shri_i32(tmp, tmp, 16); - if (insn & (1 << 17)) - tcg_gen_shri_i32(tmp2, tmp2, 16); - gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); - break; - default: - return 1; - } - - gen_op_iwmmxt_movq_wRn_M0(acc); - return 0; - } - - if ((insn & 0x0fe00ff8) == 0x0c400000) { - /* Internal Accumulator Access Format */ - rdhi = (insn >> 16) & 0xf; - rdlo = (insn >> 12) & 0xf; - acc = insn & 7; - - if (acc != 0) - return 1; - - if (insn & ARM_CP_RW_BIT) { /* MRA */ - iwmmxt_load_reg(cpu_V0, acc); - tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); - tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0); - tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1); - } else { /* MAR */ - tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); - iwmmxt_store_reg(cpu_V0, acc); - } - return 0; - } - - return 1; -} - static void gen_goto_ptr(void) { tcg_gen_lookup_and_goto_ptr(); @@ -2992,21 +1774,11 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, if (maskbit != 4 && maskbit != 14) { /* T4 and T14 are RES0 so never cause traps */ - TCGv_i32 t; - DisasLabel over = gen_disas_label(s); - - t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2)); - tcg_gen_andi_i32(t, t, 1u << maskbit); - tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label); + TCGLabel *fail = delay_exception_el(s, EXCP_UDEF, syndrome, 2); + TCGv_i32 t = + load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2)); - gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); - /* - * gen_exception_insn() will set is_jmp to DISAS_NORETURN, - * but since we're conditionally branching over it, we want - * to assume continue-to-next-instruction. - */ - s->base.is_jmp = DISAS_NEXT; - set_disas_label(s, over); + tcg_gen_brcondi_i32(TCG_COND_TSTNE, t, 1u << maskbit, fail); } } @@ -3058,13 +1830,10 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, } if ((s->hstr_active && s->current_el == 0) || ri->accessfn || - (ri->fgt && s->fgt_active) || - (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) { + (ri->fgt && s->fgt_active)) { /* * Emit code to perform further access permissions checks at * runtime; this may result in an exception. - * Note that on XScale all cp0..c13 registers do an access check - * call in order to handle c15_cpar. */ gen_set_condexec(s); gen_update_pc(s, 0); @@ -3202,24 +1971,6 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, } } -/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */ -static void disas_xscale_insn(DisasContext *s, uint32_t insn) -{ - int cpnum = (insn >> 8) & 0xf; - - if (extract32(s->c15_cpar, cpnum, 1) == 0) { - unallocated_encoding(s); - } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { - if (disas_iwmmxt_insn(s, insn)) { - unallocated_encoding(s); - } - } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { - if (disas_dsp_insn(s, insn)) { - unallocated_encoding(s); - } - } -} - /* Store a 64-bit value to a register pair. Clobbers val. */ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) { @@ -3579,14 +2330,7 @@ static bool valid_cp(DisasContext *s, int cp) * only cp14 and cp15 are valid, and other values aren't considered * to be in the coprocessor-instruction space at all. v8M still * permits coprocessors 0..7. - * For XScale, we must not decode the XScale cp0, cp1 space as - * a standard coprocessor insn, because we want to fall through to - * the legacy disas_xscale_insn() decoder after decodetree is done. */ - if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) { - return false; - } - if (arm_dc_feature(s, ARM_FEATURE_V8) && !arm_dc_feature(s, ARM_FEATURE_M)) { return cp >= 14; @@ -6854,11 +5598,10 @@ static bool trans_LE(DisasContext *s, arg_LE *a) if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) { /* Need to do a runtime check for LTPSIZE != 4 */ - DisasLabel skipexc = gen_disas_label(s); + TCGLabel *fail = delay_exception(s, EXCP_INVSTATE, syn_uncategorized()); + tmp = load_cpu_field(v7m.ltpsize); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label); - gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); - set_disas_label(s, skipexc); + tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 4, fail); } if (a->f) { @@ -7353,18 +6096,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) disas_neon_shared(s, insn)) { return; } - /* fall back to legacy decoder */ - - if ((insn & 0x0e000f00) == 0x0c000100) { - if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { - /* iWMMXt register transfer. */ - if (extract32(s->c15_cpar, 1, 1)) { - if (!disas_iwmmxt_insn(s, insn)) { - return; - } - } - } - } goto illegal_op; } if (cond != 0xe) { @@ -7378,16 +6109,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) disas_vfp(s, insn)) { return; } - /* fall back to legacy decoder */ - /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */ - if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { - if (((insn & 0x0c000e00) == 0x0c000000) - && ((insn & 0x03000000) != 0x03000000)) { - /* Coprocessor insn, coprocessor 0 or 1 */ - disas_xscale_insn(s, insn); - return; - } - } + /* We didn't match anything in the decoder: UNDEF */ illegal_op: unallocated_encoding(s); @@ -7616,12 +6338,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE); dc->ns = EX_TBFLAG_A32(tb_flags, NS); dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN); - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR); - } else { - dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); - dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); - } + dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); + dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); dc->sme_trap_nonstreaming = EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); } @@ -7661,10 +6379,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; dc->base.max_insns = MIN(dc->base.max_insns, bound); } - - cpu_V0 = tcg_temp_new_i64(); - cpu_V1 = tcg_temp_new_i64(); - cpu_M0 = tcg_temp_new_i64(); } static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) @@ -7770,7 +6484,8 @@ static bool arm_check_ss_active(DisasContext *dc) static void arm_post_translate_insn(DisasContext *dc) { - if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) { + if (dc->condjmp && + (dc->base.is_jmp == DISAS_NEXT || dc->base.is_jmp == DISAS_TOO_MANY)) { if (dc->pc_save != dc->condlabel.pc_save) { gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save); } @@ -7800,7 +6515,7 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * be possible after an indirect branch, at the start of the TB. */ assert(dc->base.num_insns == 1); - gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); dc->base.is_jmp = DISAS_NORETURN; dc->base.pc_next = QEMU_ALIGN_UP(pc, 4); return; @@ -8116,6 +6831,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, curr_insn_len(dc)); } } + + emit_delayed_exceptions(dc); } static const TranslatorOps arm_translator_ops = { diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 53e485d..9a85ea7 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -4,7 +4,6 @@ #include "cpu.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" -#include "exec/exec-all.h" #include "exec/translator.h" #include "exec/translation-block.h" #include "exec/helper-gen.h" @@ -22,9 +21,25 @@ typedef struct DisasLabel { target_ulong pc_save; } DisasLabel; +/* + * Emit an exception call out of line. + */ +typedef struct DisasDelayException { + struct DisasDelayException *next; + TCGLabel *lab; + target_long pc_curr; + target_long pc_save; + int condexec_mask; + int condexec_cond; + uint32_t excp; + uint32_t syn; + uint32_t target_el; +} DisasDelayException; + typedef struct DisasContext { DisasContextBase base; const ARMISARegisters *isar; + DisasDelayException *delay_excp_list; /* The address of the current instruction being translated. */ target_ulong pc_curr; @@ -71,8 +86,10 @@ typedef struct DisasContext { int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ int sme_excp_el; /* SME exception EL or 0 if enabled */ + int zt0_excp_el; /* ZT0 exception EL or 0 if enabled */ int vl; /* current vector length in bytes */ int svl; /* current streaming vector length in bytes */ + int max_svl; /* maximum implemented streaming vector length */ bool vfp_enabled; /* FP enabled via FPSCR.EN */ int vec_len; int vec_stride; @@ -149,6 +166,8 @@ typedef struct DisasContext { bool trap_eret; /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */ bool naa; + /* True if HCR_EL2.E2H is set */ + bool e2h; /* True if FEAT_NV HCR_EL2.NV is enabled */ bool nv; /* True if NV enabled and HCR_EL2.NV1 is set */ @@ -163,6 +182,12 @@ typedef struct DisasContext { bool fpcr_ah; /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ bool fpcr_nep; + /* True if GCSEnabled. */ + bool gcs_en; + /* True if GCSReturnValueCheckEnabled. */ + bool gcs_rvcen; + /* GCSSTR exception EL or 0 if enabled */ + uint8_t gcsstr_el; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -174,8 +199,6 @@ typedef struct DisasContext { uint8_t gm_blocksize; /* True if the current insn_start has been updated. */ bool insn_start_updated; - /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ - int c15_cpar; /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */ uint32_t nv2_redirect_offset; } DisasContext; @@ -209,6 +232,11 @@ static inline int plus_2(DisasContext *s, int x) return x + 2; } +static inline int plus_8(DisasContext *s, int x) +{ + return x + 8; +} + static inline int plus_12(DisasContext *s, int x) { return x + 12; @@ -348,10 +376,15 @@ void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); MemOp pow2_align(unsigned i); void unallocated_encoding(DisasContext *s); +void gen_exception_internal(int excp); void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp, uint32_t syn, uint32_t target_el); void gen_exception_insn(DisasContext *s, target_long pc_diff, int excp, uint32_t syn); +TCGLabel *delay_exception_el(DisasContext *s, int excp, + uint32_t syn, uint32_t target_el); +TCGLabel *delay_exception(DisasContext *s, int excp, uint32_t syn); +void emit_delayed_exceptions(DisasContext *s); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) @@ -365,27 +398,27 @@ static inline TCGv_i32 get_ahp_flag(void) } /* Set bits within PSTATE. */ -static inline void set_pstate_bits(uint32_t bits) +static inline void set_pstate_bits(uint64_t bits) { - TCGv_i32 p = tcg_temp_new_i32(); + TCGv_i64 p = tcg_temp_new_i64(); tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); - tcg_gen_ori_i32(p, p, bits); - tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i64(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ori_i64(p, p, bits); + tcg_gen_st_i64(p, tcg_env, offsetof(CPUARMState, pstate)); } /* Clear bits within PSTATE. */ -static inline void clear_pstate_bits(uint32_t bits) +static inline void clear_pstate_bits(uint64_t bits) { - TCGv_i32 p = tcg_temp_new_i32(); + TCGv_i64 p = tcg_temp_new_i64(); tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); - tcg_gen_andi_i32(p, p, ~bits); - tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i64(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_andi_i64(p, p, ~bits); + tcg_gen_st_i64(p, tcg_env, offsetof(CPUARMState, pstate)); } /* If the singlestep state is Active-not-pending, advance to Active-pending. */ @@ -636,6 +669,8 @@ typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void GVecGen3FnVar(unsigned, TCGv_ptr, uint32_t, TCGv_ptr, uint32_t, + TCGv_ptr, uint32_t, uint32_t, uint32_t); /* Function prototype for gen_ functions for calling Neon helpers */ typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32); diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 986eaf8..33a136b 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -825,11 +825,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } -DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t) -DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t) -DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t) -DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t) -DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t) +DO_DOT(gvec_sdot_4b, int32_t, int8_t, int8_t) +DO_DOT(gvec_udot_4b, uint32_t, uint8_t, uint8_t) +DO_DOT(gvec_usdot_4b, uint32_t, uint8_t, int8_t) +DO_DOT(gvec_sdot_4h, int64_t, int16_t, int16_t) +DO_DOT(gvec_udot_4h, uint64_t, uint16_t, uint16_t) #define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ @@ -865,12 +865,63 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } -DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4) -DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4) -DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4) -DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4) -DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8) -DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8) +DO_DOT_IDX(gvec_sdot_idx_4b, int32_t, int8_t, int8_t, H4) +DO_DOT_IDX(gvec_udot_idx_4b, uint32_t, uint8_t, uint8_t, H4) +DO_DOT_IDX(gvec_sudot_idx_4b, int32_t, int8_t, uint8_t, H4) +DO_DOT_IDX(gvec_usdot_idx_4b, int32_t, uint8_t, int8_t, H4) +DO_DOT_IDX(gvec_sdot_idx_4h, int64_t, int16_t, int16_t, H8) +DO_DOT_IDX(gvec_udot_idx_4h, uint64_t, uint16_t, uint16_t, H8) + +#undef DO_DOT +#undef DO_DOT_IDX + +/* Similar for 2-way dot product */ +#define DO_DOT(NAME, TYPED, TYPEN, TYPEM) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd, *a = va; \ + TYPEN *n = vn; \ + TYPEM *m = vm; \ + for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \ + d[i] = (a[i] + \ + (TYPED)n[i * 2 + 0] * m[i * 2 + 0] + \ + (TYPED)n[i * 2 + 1] * m[i * 2 + 1]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i = 0, opr_sz = simd_oprsz(desc); \ + intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ + intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ + intptr_t index = simd_data(desc); \ + TYPED *d = vd, *a = va; \ + TYPEN *n = vn; \ + TYPEM *m_indexed = (TYPEM *)vm + HD(index) * 2; \ + do { \ + TYPED m0 = m_indexed[i * 2 + 0]; \ + TYPED m1 = m_indexed[i * 2 + 1]; \ + do { \ + d[i] = (a[i] + \ + n[i * 2 + 0] * m0 + \ + n[i * 2 + 1] * m1); \ + } while (++i < segend); \ + segend = i + (16 / sizeof(TYPED)); \ + } while (i < opr_sz_n); \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_DOT(gvec_sdot_2h, int32_t, int16_t, int16_t) +DO_DOT(gvec_udot_2h, uint32_t, uint16_t, uint16_t) + +DO_DOT_IDX(gvec_sdot_idx_2h, int32_t, int16_t, int16_t, H4) +DO_DOT_IDX(gvec_udot_idx_2h, uint32_t, uint16_t, uint16_t, H4) + +#undef DO_DOT +#undef DO_DOT_IDX void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, float_status *fpst, uint32_t desc) @@ -1416,14 +1467,19 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } +DO_3OP(gvec_fadd_b16, bfloat16_add, float16) DO_3OP(gvec_fadd_h, float16_add, float16) DO_3OP(gvec_fadd_s, float32_add, float32) DO_3OP(gvec_fadd_d, float64_add, float64) +DO_3OP(gvec_bfadd, bfloat16_add, bfloat16) +DO_3OP(gvec_fsub_b16, bfloat16_sub, float16) DO_3OP(gvec_fsub_h, float16_sub, float16) DO_3OP(gvec_fsub_s, float32_sub, float32) DO_3OP(gvec_fsub_d, float64_sub, float64) +DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16) +DO_3OP(gvec_fmul_b16, bfloat16_mul, float16) DO_3OP(gvec_fmul_h, float16_mul, float16) DO_3OP(gvec_fmul_s, float32_mul, float32) DO_3OP(gvec_fmul_d, float64_mul, float64) @@ -1515,6 +1571,13 @@ DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) +DO_3OP(gvec_fmax_b16, bfloat16_max, bfloat16) +DO_3OP(gvec_fmin_b16, bfloat16_min, bfloat16) +DO_3OP(gvec_fmaxnum_b16, bfloat16_maxnum, bfloat16) +DO_3OP(gvec_fminnum_b16, bfloat16_minnum, bfloat16) +DO_3OP(gvec_ah_fmax_b16, helper_sme2_ah_fmax_b16, bfloat16) +DO_3OP(gvec_ah_fmin_b16, helper_sme2_ah_fmin_b16, bfloat16) + #endif #undef DO_3OP @@ -1550,6 +1613,12 @@ static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2, return float16_muladd(op1, op2, dest, 0, stat); } +static bfloat16 bfloat16_muladd_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(op1, op2, dest, 0, stat); +} + static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1568,6 +1637,12 @@ static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2, return float16_muladd(float16_chs(op1), op2, dest, 0, stat); } +static bfloat16 bfloat16_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(bfloat16_chs(op1), op2, dest, 0, stat); +} + static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1586,6 +1661,12 @@ static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); } +static bfloat16 bfloat16_ah_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1610,23 +1691,28 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16) -DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32) +DO_MULADD(gvec_fmla_nf_h, float16_muladd_nf, float16) +DO_MULADD(gvec_fmla_nf_s, float32_muladd_nf, float32) -DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16) -DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32) +DO_MULADD(gvec_fmls_nf_h, float16_mulsub_nf, float16) +DO_MULADD(gvec_fmls_nf_s, float32_mulsub_nf, float32) DO_MULADD(gvec_vfma_h, float16_muladd_f, float16) DO_MULADD(gvec_vfma_s, float32_muladd_f, float32) DO_MULADD(gvec_vfma_d, float64_muladd_f, float64) +DO_MULADD(gvec_bfmla, bfloat16_muladd_f, bfloat16) DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) +DO_MULADD(gvec_bfmls, bfloat16_mulsub_f, bfloat16) DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) +DO_MULADD(gvec_ah_bfmls, bfloat16_ah_mulsub_f, bfloat16) + +#undef DO_MULADD /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. @@ -1699,6 +1785,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ #define nop(N, M, S) (M) +DO_FMUL_IDX(gvec_fmul_idx_b16, nop, bfloat16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4) DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8) @@ -1745,14 +1832,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) +DO_FMLA_IDX(gvec_bfmla_idx, bfloat16, H2, 0, 0) DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) +DO_FMLA_IDX(gvec_bfmls_idx, bfloat16, H2, INT16_MIN, 0) DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) +DO_FMLA_IDX(gvec_ah_bfmls_idx, bfloat16, H2, 0, float_muladd_negate_product) #undef DO_FMLA_IDX @@ -2184,7 +2274,8 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, intptr_t i, oprsz = simd_oprsz(desc); bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); - float_status *status = &env->vfp.fp_status[FPST_A64]; + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; bool fz16 = env->vfp.fpcr & FPCR_FZ16; int negx = 0, negf = 0; @@ -2267,8 +2358,9 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, intptr_t i, j, oprsz = simd_oprsz(desc); bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); - intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); - float_status *status = &env->vfp.fp_status[FPST_A64]; + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 3, 3) * sizeof(float16); + float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; bool fz16 = env->vfp.fpcr & FPCR_FZ16; int negx = 0, negf = 0; @@ -2989,31 +3081,62 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst, float_status *fpst_odd) { - /* - * Compare f16_dotadd() in sme_helper.c, but here we have - * bfloat16 inputs. In particular that means that we do not - * want the FPCR.FZ16 flush semantics, so we use the normal - * float_status for the input handling here. - */ - float64 e1r = float32_to_float64(e1 << 16, fpst); - float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst); - float64 e2r = float32_to_float64(e2 << 16, fpst); - float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst); - float64 t64; + float32 s1r = e1 << 16; + float32 s1c = e1 & 0xffff0000u; + float32 s2r = e2 << 16; + float32 s2c = e2 & 0xffff0000u; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, fpst_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); + /* C.f. FPProcessNaNs4 */ + if (float32_is_any_nan(s1r) || float32_is_any_nan(s1c) || + float32_is_any_nan(s2r) || float32_is_any_nan(s2c)) { + if (float32_is_signaling_nan(s1r, fpst)) { + t32 = s1r; + } else if (float32_is_signaling_nan(s1c, fpst)) { + t32 = s1c; + } else if (float32_is_signaling_nan(s2r, fpst)) { + t32 = s2r; + } else if (float32_is_signaling_nan(s2c, fpst)) { + t32 = s2c; + } else if (float32_is_any_nan(s1r)) { + t32 = s1r; + } else if (float32_is_any_nan(s1c)) { + t32 = s1c; + } else if (float32_is_any_nan(s2r)) { + t32 = s2r; + } else { + t32 = s2c; + } + /* + * FPConvertNaN(FPProcessNaN(t32)) will be done as part + * of the final addition below. + */ + } else { + /* + * Compare f16_dotadd() in sme_helper.c, but here we have + * bfloat16 inputs. In particular that means that we do not + * want the FPCR.FZ16 flush semantics, so we use the normal + * float_status for the input handling here. + */ + float64 e1r = float32_to_float64(s1r, fpst); + float64 e1c = float32_to_float64(s1c, fpst); + float64 e2r = float32_to_float64(s2r, fpst); + float64 e2c = float32_to_float64(s2c, fpst); + float64 t64; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, fpst_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, fpst); + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, fpst); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, fpst); @@ -3070,6 +3193,45 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(sme2_bfvdot_idx)(void *vd, void *vn, void *vm, + void *va, CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT, 2); + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + float32 *d = vd, *a = va; + uint16_t *n0 = vn; + uint16_t *n1 = vn + sizeof(ARMVectorReg); + uint32_t *m = vm; + float_status fpst, fpst_odd; + + if (is_ebf(env, &fpst, &fpst_odd)) { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(idx)]; + + for (j = 0; j < eltspersegment; j++) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx, + &fpst, &fpst_odd); + } + } + } else { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(idx)]; + + for (j = 0; j < eltspersegment; j++) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = bfdotadd(a[i + H4(j)], nn, m_idx, &fpst); + } + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, CPUARMState *env, uint32_t desc) { @@ -3146,44 +3308,76 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, clear_tail(d, opr_sz, simd_maxsz(desc)); } -void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, - float_status *stat, uint32_t desc) +static void do_bfmlal(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a, + float_status *stat, uint32_t desc, int negx, int negf) { intptr_t i, opr_sz = simd_oprsz(desc); - intptr_t sel = simd_data(desc); - float32 *d = vd, *a = va; - bfloat16 *n = vn, *m = vm; + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); for (i = 0; i < opr_sz / 4; ++i) { - float32 nn = n[H2(i * 2 + sel)] << 16; + float32 nn = (negx ^ n[H2(i * 2 + sel)]) << 16; float32 mm = m[H2(i * 2 + sel)] << 16; - d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat); + d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], negf, stat); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, - void *va, float_status *stat, uint32_t desc) +void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0, 0); +} + +void HELPER(gvec_bfmlsl)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0x8000, 0); +} + +void HELPER(gvec_ah_bfmlsl)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product); +} + +static void do_bfmlal_idx(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a, + float_status *stat, uint32_t desc, int negx, int negf) { intptr_t i, j, opr_sz = simd_oprsz(desc); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3); intptr_t elements = opr_sz / 4; intptr_t eltspersegment = MIN(16 / 4, elements); - float32 *d = vd, *a = va; - bfloat16 *n = vn, *m = vm; for (i = 0; i < elements; i += eltspersegment) { float32 m_idx = m[H2(2 * i + index)] << 16; for (j = i; j < i + eltspersegment; j++) { - float32 n_j = n[H2(2 * j + sel)] << 16; - d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat); + float32 n_j = (negx ^ n[H2(2 * j + sel)]) << 16; + d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], negf, stat); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, 0); +} + +void HELPER(gvec_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0x8000, 0); +} + +void HELPER(gvec_ah_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product); +} + #define DO_CLAMP(NAME, TYPE) \ void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \ { \ @@ -3253,3 +3447,90 @@ void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +static inline void do_lut_b(void *zd, uint64_t *indexes, uint64_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint8_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H1(e)] = extractn(table, index * tsize, 8); + } + } +} + +static inline void do_lut_h(void *zd, uint64_t *indexes, uint64_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint16_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H2(e)] = extractn(table, index * tsize, 16); + } + } +} + +static inline void do_lut_s(void *zd, uint64_t *indexes, uint32_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint32_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H4(e)] = table[H4(index)]; + } + } +} + +#define DO_SME2_LUT(ISIZE, NREG, SUFF, ESIZE) \ +void helper_sme2_luti##ISIZE##_##NREG##SUFF \ + (void *zd, void *zn, CPUARMState *env, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned strided = extract32(desc, SIMD_DATA_SHIFT, 1); \ + unsigned idx = extract32(desc, SIMD_DATA_SHIFT + 1, 4); \ + unsigned elements = vl / ESIZE; \ + unsigned dstride = (!strided ? 1 : NREG == 4 ? 4 : 8); \ + unsigned segments = (ESIZE * 8) / (ISIZE * NREG); \ + unsigned segment = idx & (segments - 1); \ + ARMVectorReg indexes; \ + memcpy(&indexes, zn, vl); \ + do_lut_##SUFF(zd, indexes.d, (void *)env->za_state.zt0, elements, \ + segment * NREG * elements, \ + dstride * sizeof(ARMVectorReg), ISIZE, 32, NREG); \ +} + +DO_SME2_LUT(2,1,b, 1) +DO_SME2_LUT(2,1,h, 2) +DO_SME2_LUT(2,1,s, 4) +DO_SME2_LUT(2,2,b, 1) +DO_SME2_LUT(2,2,h, 2) +DO_SME2_LUT(2,2,s, 4) +DO_SME2_LUT(2,4,b, 1) +DO_SME2_LUT(2,4,h, 2) +DO_SME2_LUT(2,4,s, 4) + +DO_SME2_LUT(4,1,b, 1) +DO_SME2_LUT(4,1,h, 2) +DO_SME2_LUT(4,1,s, 4) +DO_SME2_LUT(4,2,b, 1) +DO_SME2_LUT(4,2,h, 2) +DO_SME2_LUT(4,2,s, 4) +DO_SME2_LUT(4,4,h, 2) +DO_SME2_LUT(4,4,s, 4) + +#undef DO_SME2_LUT diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index 6b93b5a..cf41b03 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -22,6 +22,8 @@ #include "fpu/softfloat.h" +typedef struct CPUArchState CPUARMState; + /* * Note that vector data is stored in host-endian 64-bit chunks, * so addressing units smaller than that needs a host-endian fixup. @@ -221,6 +223,34 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); +#define do_ssat_b(val) MIN(MAX(val, INT8_MIN), INT8_MAX) +#define do_ssat_h(val) MIN(MAX(val, INT16_MIN), INT16_MAX) +#define do_ssat_s(val) MIN(MAX(val, INT32_MIN), INT32_MAX) +#define do_usat_b(val) MIN(MAX(val, 0), UINT8_MAX) +#define do_usat_h(val) MIN(MAX(val, 0), UINT16_MAX) +#define do_usat_s(val) MIN(MAX(val, 0), UINT32_MAX) + +static inline uint64_t do_urshr(uint64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else if (sh == 64) { + return x >> 63; + } else { + return 0; + } +} + +static inline int64_t do_srshr(int64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else { + /* Rounding the sign bit always produces 0. */ + return 0; + } +} + /** * bfdotadd: * @sum: addend @@ -270,6 +300,11 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); /* * Negate as for FPCR.AH=1 -- do not negate NaNs. */ +static inline float16 bfloat16_ah_chs(float16 a) +{ + return bfloat16_is_any_nan(a) ? a : bfloat16_chs(a); +} + static inline float16 float16_ah_chs(float16 a) { return float16_is_any_nan(a) ? a : float16_chs(a); @@ -300,4 +335,119 @@ static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); } +/* Not actually called directly as a helper, but uses similar machinery. */ +bfloat16 helper_sme2_ah_fmax_b16(bfloat16 a, bfloat16 b, float_status *fpst); +bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst); + +float32 sve_f16_to_f32(float16 f, float_status *fpst); +float16 sve_f32_to_f16(float32 f, float_status *fpst); + +/* + * Decode helper functions for predicate as counter. + */ + +typedef struct { + unsigned count; + unsigned lg2_stride; + bool invert; +} DecodeCounter; + +static inline DecodeCounter +decode_counter(unsigned png, unsigned vl, unsigned v_esz) +{ + DecodeCounter ret = { }; + + /* C.f. Arm pseudocode CounterToPredicate. */ + if (likely(png & 0xf)) { + unsigned p_esz = ctz32(png); + + /* + * maxbit = log2(pl(bits) * 4) + * = log2(vl(bytes) * 4) + * = log2(vl) + 2 + * maxbit_mask = ones<maxbit:0> + * = (1 << (maxbit + 1)) - 1 + * = (1 << (log2(vl) + 2 + 1)) - 1 + * = (1 << (log2(vl) + 3)) - 1 + * = (pow2ceil(vl) << 3) - 1 + */ + ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1); + ret.count >>= p_esz + 1; + + ret.invert = (png >> 15) & 1; + + /* + * The Arm pseudocode for CounterToPredicate expands the count to + * a set of bits, and then the operation proceeds as for the original + * interpretation of predicates as a set of bits. + * + * We can avoid the expansion by adjusting the count and supplying + * an element stride. + */ + if (unlikely(p_esz != v_esz)) { + if (p_esz < v_esz) { + /* + * For predicate esz < vector esz, the expanded predicate + * will have more bits set than will be consumed. + * Adjust the count down, rounding up. + * Consider p_esz = MO_8, v_esz = MO_64, count 14: + * The expanded predicate would be + * 0011 1111 1111 1111 + * The significant bits are + * ...1 ...1 ...1 ...1 + */ + unsigned shift = v_esz - p_esz; + unsigned trunc = ret.count >> shift; + ret.count = trunc + (ret.count != (trunc << shift)); + } else { + /* + * For predicate esz > vector esz, the expanded predicate + * will have bits set only at power-of-two multiples of + * the vector esz. Bits at other multiples will all be + * false. Adjust the count up, and supply the caller + * with a stride of elements to skip. + */ + unsigned shift = p_esz - v_esz; + ret.count <<= shift; + ret.lg2_stride = shift; + } + } + } + return ret; +} + +/* Extract @len bits from an array of uint64_t at offset @pos bits. */ +static inline uint64_t extractn(uint64_t *p, unsigned pos, unsigned len) +{ + uint64_t x; + + p += pos / 64; + pos = pos % 64; + + x = p[0]; + if (pos + len > 64) { + x = (x >> pos) | (p[1] << (-pos & 63)); + pos = 0; + } + return extract64(x, pos, len); +} + +/* Deposit @len bits into an array of uint64_t at offset @pos bits. */ +static inline void depositn(uint64_t *p, unsigned pos, + unsigned len, uint64_t val) +{ + p += pos / 64; + pos = pos % 64; + + if (pos + len <= 64) { + p[0] = deposit64(p[0], pos, len, val); + } else { + unsigned len0 = 64 - pos; + unsigned len1 = len - len0; + + p[0] = deposit64(p[0], pos, len0, val); + p[1] = deposit64(p[1], 0, len1, val >> len0); + } +} + #endif /* TARGET_ARM_VEC_INTERNAL_H */ diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c index b32e2f4..e156e37 100644 --- a/target/arm/tcg/vfp_helper.c +++ b/target/arm/tcg/vfp_helper.c @@ -19,12 +19,14 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/helper-proto.h" #include "internals.h" #include "cpu-features.h" #include "fpu/softfloat.h" #include "qemu/log.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + /* * Set the float_status behaviour to match the Arm defaults: * * tininess-before-rounding @@ -121,7 +123,7 @@ uint32_t vfp_get_fpsr_from_host(CPUARMState *env) a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); /* - * We do not merge in flags from FPST_AH or FPST_AH_F16, because + * We do not merge in flags from FPST_{AH,ZA} or FPST_{AH,ZA}_F16, because * they are used for insns that must not set the cumulative exception bits. */ @@ -194,6 +196,8 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA_F16]); } if (changed & FPCR_FZ16) { bool ftz_enabled = val & FPCR_FZ16; @@ -201,15 +205,18 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]); } if (changed & FPCR_FZ) { bool ftz_enabled = val & FPCR_FZ; set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA]); /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); } @@ -221,6 +228,7 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) bool fitz_enabled = (val & FPCR_FIZ) || (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_ZA]); } if (changed & FPCR_DN) { bool dnan_enabled = val & FPCR_DN; @@ -238,9 +246,13 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) /* Change behaviours for A64 FP operations */ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); } else { arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); } } /* diff --git a/target/arm/trace-events b/target/arm/trace-events index 4438dce..72a2c7d 100644 --- a/target/arm/trace-events +++ b/target/arm/trace-events @@ -13,3 +13,13 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d" # kvm.c kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64 + +# cpu.c +arm_cpu_reset(uint64_t mp_aff) "cpu %" PRIu64 +arm_emulate_firmware_reset(uint64_t mp_aff, unsigned target_el) "cpu %" PRIu64 " @EL%u" + +# arm-powerctl.c +arm_powerctl_set_cpu_on(uint64_t mp_aff, unsigned target_el, const char *mode, uint64_t entry, uint64_t context_id) "cpu %" PRIu64 " (EL %u, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 +arm_powerctl_set_cpu_on_and_reset(uint64_t mp_aff) "cpu %" PRIu64 +arm_powerctl_set_cpu_off(uint64_t mp_aff) "cpu %" PRIu64 +arm_powerctl_reset_cpu(uint64_t mp_aff) "cpu %" PRIu64 diff --git a/target/avr/cpu-param.h b/target/avr/cpu-param.h index 81f3f49..f74bfc25 100644 --- a/target/avr/cpu-param.h +++ b/target/avr/cpu-param.h @@ -21,16 +21,10 @@ #ifndef AVR_CPU_PARAM_H #define AVR_CPU_PARAM_H -/* - * TARGET_PAGE_BITS cannot be more than 8 bits because - * 1. all IO registers occupy [0x0000 .. 0x00ff] address range, and they - * should be implemented as a device and not memory - * 2. SRAM starts at the address 0x0100 - */ -#define TARGET_PAGE_BITS 8 +#define TARGET_PAGE_BITS 10 #define TARGET_PHYS_ADDR_SPACE_BITS 24 #define TARGET_VIRT_ADDR_SPACE_BITS 24 -#define TCG_GUEST_DEFAULT_MO 0 +#define TARGET_INSN_START_EXTRA_WORDS 0 #endif diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 834c708..a6df71d 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -21,12 +21,13 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/qemu-print.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" +#include "system/address-spaces.h" #include "cpu.h" #include "disas/dis-asm.h" #include "tcg/debug-assert.h" #include "hw/qdev-properties.h" +#include "accel/tcg/cpu-ops.h" static void avr_cpu_set_pc(CPUState *cs, vaddr value) { @@ -44,7 +45,7 @@ static vaddr avr_cpu_get_pc(CPUState *cs) static bool avr_cpu_has_work(CPUState *cs) { - return (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET)) + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET) && cpu_interrupts_enabled(cpu_env(cs)); } @@ -53,6 +54,21 @@ static int avr_cpu_mmu_index(CPUState *cs, bool ifetch) return ifetch ? MMU_CODE_IDX : MMU_DATA_IDX; } +static TCGTBCPUState avr_get_tb_cpu_state(CPUState *cs) +{ + CPUAVRState *env = cpu_env(cs); + uint32_t flags = 0; + + if (env->fullacc) { + flags |= TB_FLAGS_FULL_ACCESS; + } + if (env->skip) { + flags |= TB_FLAGS_SKIP; + } + + return (TCGTBCPUState){ .pc = env->pc_w * 2, .flags = flags }; +} + static void avr_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -110,6 +126,8 @@ static void avr_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) static void avr_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); + CPUAVRState *env = cpu_env(cs); + AVRCPU *cpu = env_archcpu(env); AVRCPUClass *mcc = AVR_CPU_GET_CLASS(dev); Error *local_err = NULL; @@ -122,6 +140,19 @@ static void avr_cpu_realizefn(DeviceState *dev, Error **errp) cpu_reset(cs); mcc->parent_realize(dev, errp); + + /* + * Two blocks in the low data space loop back into cpu registers. + */ + memory_region_init_io(&cpu->cpu_reg1, OBJECT(cpu), &avr_cpu_reg1, env, + "avr-cpu-reg1", 32); + memory_region_add_subregion(get_system_memory(), + OFFSET_DATA, &cpu->cpu_reg1); + + memory_region_init_io(&cpu->cpu_reg2, OBJECT(cpu), &avr_cpu_reg2, env, + "avr-cpu-reg2", 8); + memory_region_add_subregion(get_system_memory(), + OFFSET_DATA + 0x58, &cpu->cpu_reg2); } static void avr_cpu_set_int(void *opaque, int irq, int level) @@ -205,20 +236,29 @@ static const struct SysemuCPUOps avr_sysemu_ops = { .get_phys_page_debug = avr_cpu_get_phys_page_debug, }; -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps avr_tcg_ops = { + .guest_default_memory_order = 0, + .mttcg_supported = false, .initialize = avr_cpu_tcg_init, .translate_code = avr_cpu_translate_code, + .get_tb_cpu_state = avr_get_tb_cpu_state, .synchronize_from_tb = avr_cpu_synchronize_from_tb, .restore_state_to_opc = avr_restore_state_to_opc, + .mmu_index = avr_cpu_mmu_index, .cpu_exec_interrupt = avr_cpu_exec_interrupt, .cpu_exec_halt = avr_cpu_has_work, + .cpu_exec_reset = cpu_reset, .tlb_fill = avr_cpu_tlb_fill, .do_interrupt = avr_cpu_do_interrupt, + /* + * TODO: code and data wrapping are different, but for the most part + * AVR only references bytes or aligned code fetches. But we use + * non-aligned MO_16 accesses for stack push/pop. + */ + .pointer_wrap = cpu_pointer_wrap_uint32, }; -static void avr_cpu_class_init(ObjectClass *oc, void *data) +static void avr_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -234,7 +274,6 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data) cc->class_by_name = avr_cpu_class_by_name; - cc->mmu_index = avr_cpu_mmu_index; cc->dump_state = avr_cpu_dump_state; cc->set_pc = avr_cpu_set_pc; cc->get_pc = avr_cpu_get_pc; diff --git a/target/avr/cpu.h b/target/avr/cpu.h index 06f5ae4..518e243 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -22,7 +22,10 @@ #define QEMU_AVR_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" +#include "system/memory.h" #ifdef CONFIG_USER_ONLY #error "AVR 8-bit does not support user mode" @@ -44,8 +47,16 @@ /* Number of CPU registers */ #define NUMBER_OF_CPU_REGISTERS 32 -/* Number of IO registers accessible by ld/st/in/out */ -#define NUMBER_OF_IO_REGISTERS 64 + +/* CPU registers mapped into i/o ports 0x38-0x3f. */ +#define REG_38_RAMPD 0 +#define REG_38_RAMPX 1 +#define REG_38_RAMPY 2 +#define REG_38_RAMPZ 3 +#define REG_38_EIDN 4 +#define REG_38_SPL 5 +#define REG_38_SPH 6 +#define REG_38_SREG 7 /* * Offsets of AVR memory regions in host memory space. @@ -60,8 +71,6 @@ #define OFFSET_CODE 0x00000000 /* CPU registers, IO registers, and SRAM */ #define OFFSET_DATA 0x00800000 -/* CPU registers specifically, these are mapped at the start of data */ -#define OFFSET_CPU_REGISTERS OFFSET_DATA /* * IO registers, including status register, stack pointer, and memory * mapped peripherals, mapped just after CPU registers @@ -144,6 +153,9 @@ struct ArchCPU { CPUAVRState env; + MemoryRegion cpu_reg1; + MemoryRegion cpu_reg2; + /* Initial value of stack pointer */ uint32_t init_sp; }; @@ -193,24 +205,6 @@ enum { TB_FLAGS_SKIP = 2, }; -static inline void cpu_get_tb_cpu_state(CPUAVRState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - uint32_t flags = 0; - - *pc = env->pc_w * 2; - *cs_base = 0; - - if (env->fullacc) { - flags |= TB_FLAGS_FULL_ACCESS; - } - if (env->skip) { - flags |= TB_FLAGS_SKIP; - } - - *pflags = flags; -} - static inline int cpu_interrupts_enabled(CPUAVRState *env) { return env->sregI != 0; @@ -244,6 +238,7 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); -#include "exec/cpu-all.h" +extern const MemoryRegionOps avr_cpu_reg1; +extern const MemoryRegionOps avr_cpu_reg2; #endif /* QEMU_AVR_CPU_H */ diff --git a/target/avr/helper.c b/target/avr/helper.c index 3412312..4b29ab3 100644 --- a/target/avr/helper.c +++ b/target/avr/helper.c @@ -25,8 +25,8 @@ #include "accel/tcg/cpu-ops.h" #include "exec/cputlb.h" #include "exec/page-protection.h" -#include "exec/cpu_ldst.h" -#include "exec/address-spaces.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) @@ -47,7 +47,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) cs->exception_index = EXCP_RESET; avr_cpu_do_interrupt(cs); - cs->interrupt_request &= ~CPU_INTERRUPT_RESET; + cpu_reset_interrupt(cs, CPU_INTERRUPT_RESET); return true; } } @@ -59,7 +59,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) env->intsrc &= env->intsrc - 1; /* clear the interrupt */ if (!env->intsrc) { - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } return true; } @@ -67,6 +67,11 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) return false; } +static void do_stb(CPUAVRState *env, uint32_t addr, uint8_t data, uintptr_t ra) +{ + cpu_stb_mmuidx_ra(env, addr, data, MMU_DATA_IDX, ra); +} + void avr_cpu_do_interrupt(CPUState *cs) { CPUAVRState *env = cpu_env(cs); @@ -83,14 +88,14 @@ void avr_cpu_do_interrupt(CPUState *cs) } if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) { - cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); - cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8); - cpu_stb_data(env, env->sp--, (ret & 0xff0000) >> 16); + do_stb(env, env->sp--, ret, 0); + do_stb(env, env->sp--, ret >> 8, 0); + do_stb(env, env->sp--, ret >> 16, 0); } else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) { - cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); - cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8); + do_stb(env, env->sp--, ret, 0); + do_stb(env, env->sp--, ret >> 8, 0); } else { - cpu_stb_data(env, env->sp--, (ret & 0x0000ff)); + do_stb(env, env->sp--, ret, 0); } env->pc_w = base + vector * size; @@ -108,7 +113,7 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr) { - int prot, page_size = TARGET_PAGE_SIZE; + int prot; uint32_t paddr; address &= TARGET_PAGE_MASK; @@ -133,23 +138,9 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, /* Access to memory. */ paddr = OFFSET_DATA + address; prot = PAGE_READ | PAGE_WRITE; - if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { - /* - * Access to CPU registers, exit and rebuilt this TB to use - * full access in case it touches specially handled registers - * like SREG or SP. For probing, set page_size = 1, in order - * to force tlb_fill to be called for the next access. - */ - if (probe) { - page_size = 1; - } else { - cpu_env(cs)->fullacc = 1; - cpu_loop_exit_restore(cs, retaddr); - } - } } - tlb_set_page(cs, address, paddr, prot, mmu_idx, page_size); + tlb_set_page(cs, address, paddr, prot, mmu_idx, TARGET_PAGE_SIZE); return true; } @@ -203,156 +194,129 @@ void helper_wdr(CPUAVRState *env) } /* - * This function implements IN instruction - * - * It does the following - * a. if an IO register belongs to CPU, its value is read and returned - * b. otherwise io address is translated to mem address and physical memory - * is read. - * c. it caches the value for sake of SBI, SBIC, SBIS & CBI implementation - * + * The first 32 bytes of the data space are mapped to the cpu regs. + * We cannot write these from normal store operations because TCG + * does not expect global temps to be modified -- a global may be + * live in a host cpu register across the store. We can however + * read these, as TCG does make sure the global temps are saved + * in case the load operation traps. */ -target_ulong helper_inb(CPUAVRState *env, uint32_t port) + +static uint64_t avr_cpu_reg1_read(void *opaque, hwaddr addr, unsigned size) { - target_ulong data = 0; + CPUAVRState *env = opaque; - switch (port) { - case 0x38: /* RAMPD */ - data = 0xff & (env->rampD >> 16); - break; - case 0x39: /* RAMPX */ - data = 0xff & (env->rampX >> 16); - break; - case 0x3a: /* RAMPY */ - data = 0xff & (env->rampY >> 16); - break; - case 0x3b: /* RAMPZ */ - data = 0xff & (env->rampZ >> 16); - break; - case 0x3c: /* EIND */ - data = 0xff & (env->eind >> 16); - break; - case 0x3d: /* SPL */ - data = env->sp & 0x00ff; - break; - case 0x3e: /* SPH */ - data = env->sp >> 8; - break; - case 0x3f: /* SREG */ - data = cpu_get_sreg(env); - break; - default: - /* not a special register, pass to normal memory access */ - data = address_space_ldub(&address_space_memory, - OFFSET_IO_REGISTERS + port, - MEMTXATTRS_UNSPECIFIED, NULL); + assert(addr < 32); + return env->r[addr]; +} + +/* + * The range 0x38-0x3f of the i/o space is mapped to cpu regs. + * As above, we cannot write these from normal store operations. + */ + +static uint64_t avr_cpu_reg2_read(void *opaque, hwaddr addr, unsigned size) +{ + CPUAVRState *env = opaque; + + switch (addr) { + case REG_38_RAMPD: + return 0xff & (env->rampD >> 16); + case REG_38_RAMPX: + return 0xff & (env->rampX >> 16); + case REG_38_RAMPY: + return 0xff & (env->rampY >> 16); + case REG_38_RAMPZ: + return 0xff & (env->rampZ >> 16); + case REG_38_EIDN: + return 0xff & (env->eind >> 16); + case REG_38_SPL: + return env->sp & 0x00ff; + case REG_38_SPH: + return 0xff & (env->sp >> 8); + case REG_38_SREG: + return cpu_get_sreg(env); } + g_assert_not_reached(); +} - return data; +static void avr_cpu_trap_write(void *opaque, hwaddr addr, + uint64_t data64, unsigned size) +{ + CPUAVRState *env = opaque; + CPUState *cs = env_cpu(env); + + env->fullacc = true; + cpu_loop_exit_restore(cs, cs->mem_io_pc); } +const MemoryRegionOps avr_cpu_reg1 = { + .read = avr_cpu_reg1_read, + .write = avr_cpu_trap_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 1, +}; + +const MemoryRegionOps avr_cpu_reg2 = { + .read = avr_cpu_reg2_read, + .write = avr_cpu_trap_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 1, +}; + /* - * This function implements OUT instruction - * - * It does the following - * a. if an IO register belongs to CPU, its value is written into the register - * b. otherwise io address is translated to mem address and physical memory - * is written. - * c. it caches the value for sake of SBI, SBIC, SBIS & CBI implementation - * + * this function implements ST instruction when there is a possibility to write + * into a CPU register */ -void helper_outb(CPUAVRState *env, uint32_t port, uint32_t data) +void helper_fullwr(CPUAVRState *env, uint32_t data, uint32_t addr) { - data &= 0x000000ff; + env->fullacc = false; - switch (port) { - case 0x38: /* RAMPD */ + switch (addr) { + case 0 ... 31: + /* CPU registers */ + env->r[addr] = data; + break; + + case REG_38_RAMPD + 0x38 + NUMBER_OF_CPU_REGISTERS: if (avr_feature(env, AVR_FEATURE_RAMPD)) { - env->rampD = (data & 0xff) << 16; + env->rampD = data << 16; } break; - case 0x39: /* RAMPX */ + case REG_38_RAMPX + 0x38 + NUMBER_OF_CPU_REGISTERS: if (avr_feature(env, AVR_FEATURE_RAMPX)) { - env->rampX = (data & 0xff) << 16; + env->rampX = data << 16; } break; - case 0x3a: /* RAMPY */ + case REG_38_RAMPY + 0x38 + NUMBER_OF_CPU_REGISTERS: if (avr_feature(env, AVR_FEATURE_RAMPY)) { - env->rampY = (data & 0xff) << 16; + env->rampY = data << 16; } break; - case 0x3b: /* RAMPZ */ + case REG_38_RAMPZ + 0x38 + NUMBER_OF_CPU_REGISTERS: if (avr_feature(env, AVR_FEATURE_RAMPZ)) { - env->rampZ = (data & 0xff) << 16; + env->rampZ = data << 16; } break; - case 0x3c: /* EIDN */ - env->eind = (data & 0xff) << 16; + case REG_38_EIDN + 0x38 + NUMBER_OF_CPU_REGISTERS: + env->eind = data << 16; break; - case 0x3d: /* SPL */ - env->sp = (env->sp & 0xff00) | (data); + case REG_38_SPL + 0x38 + NUMBER_OF_CPU_REGISTERS: + env->sp = (env->sp & 0xff00) | data; break; - case 0x3e: /* SPH */ + case REG_38_SPH + 0x38 + NUMBER_OF_CPU_REGISTERS: if (avr_feature(env, AVR_FEATURE_2_BYTE_SP)) { env->sp = (env->sp & 0x00ff) | (data << 8); } break; - case 0x3f: /* SREG */ + case REG_38_SREG + 0x38 + NUMBER_OF_CPU_REGISTERS: cpu_set_sreg(env, data); break; - default: - /* not a special register, pass to normal memory access */ - address_space_stb(&address_space_memory, OFFSET_IO_REGISTERS + port, - data, MEMTXATTRS_UNSPECIFIED, NULL); - } -} - -/* - * this function implements LD instruction when there is a possibility to read - * from a CPU register - */ -target_ulong helper_fullrd(CPUAVRState *env, uint32_t addr) -{ - uint8_t data; - - env->fullacc = false; - - if (addr < NUMBER_OF_CPU_REGISTERS) { - /* CPU registers */ - data = env->r[addr]; - } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { - /* IO registers */ - data = helper_inb(env, addr - NUMBER_OF_CPU_REGISTERS); - } else { - /* memory */ - data = address_space_ldub(&address_space_memory, OFFSET_DATA + addr, - MEMTXATTRS_UNSPECIFIED, NULL); - } - return data; -} -/* - * this function implements ST instruction when there is a possibility to write - * into a CPU register - */ -void helper_fullwr(CPUAVRState *env, uint32_t data, uint32_t addr) -{ - env->fullacc = false; - - /* Following logic assumes this: */ - assert(OFFSET_CPU_REGISTERS == OFFSET_DATA); - assert(OFFSET_IO_REGISTERS == OFFSET_CPU_REGISTERS + - NUMBER_OF_CPU_REGISTERS); - - if (addr < NUMBER_OF_CPU_REGISTERS) { - /* CPU registers */ - env->r[addr] = data; - } else if (addr < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) { - /* IO registers */ - helper_outb(env, addr - NUMBER_OF_CPU_REGISTERS, data); - } else { - /* memory */ - address_space_stb(&address_space_memory, OFFSET_DATA + addr, data, - MEMTXATTRS_UNSPECIFIED, NULL); + default: + do_stb(env, addr, data, GETPC()); + break; } } diff --git a/target/avr/helper.h b/target/avr/helper.h index 4d02e64..e8d13e9 100644 --- a/target/avr/helper.h +++ b/target/avr/helper.h @@ -23,7 +23,4 @@ DEF_HELPER_1(debug, noreturn, env) DEF_HELPER_1(break, noreturn, env) DEF_HELPER_1(sleep, noreturn, env) DEF_HELPER_1(unsupported, noreturn, env) -DEF_HELPER_3(outb, void, env, i32, i32) -DEF_HELPER_2(inb, tl, env, i32) DEF_HELPER_3(fullwr, void, env, i32, i32) -DEF_HELPER_2(fullrd, tl, env, i32) diff --git a/target/avr/insn.decode b/target/avr/insn.decode index 482c23a..cc30224 100644 --- a/target/avr/insn.decode +++ b/target/avr/insn.decode @@ -118,11 +118,8 @@ BRBC 1111 01 ....... ... @op_bit_imm @io_rd_imm .... . .. ..... .... &rd_imm rd=%rd imm=%io_imm @ldst_d .. . . .. . rd:5 . ... &rd_imm imm=%ldst_d_imm -# The 16-bit immediate is completely in the next word. -# Fields cannot be defined with no bits, so we cannot play -# the same trick and append to a zero-bit value. -# Defer reading the immediate until trans_{LDS,STS}. -@ldst_s .... ... rd:5 .... imm=0 +%ldst_imm !function=next_word +@ldst_s .... ... rd:5 .... imm=%ldst_imm MOV 0010 11 . ..... .... @op_rd_rr MOVW 0000 0001 .... .... &rd_rr rd=%rd_d rr=%rr_d diff --git a/target/avr/translate.c b/target/avr/translate.c index 4ab71d8..804b0b2 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -22,13 +22,13 @@ #include "qemu/qemu-print.h" #include "tcg/tcg.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/log.h" #include "exec/translator.h" +#include "exec/target_page.h" #define HELPER_H "helper.h" #include "exec/helper-info.c.inc" @@ -194,6 +194,9 @@ static bool avr_have_feature(DisasContext *ctx, int feature) static bool decode_insn(DisasContext *ctx, uint16_t insn); #include "decode-insn.c.inc" +static void gen_inb(DisasContext *ctx, TCGv data, int port); +static void gen_outb(DisasContext *ctx, TCGv data, int port); + /* * Arithmetic Instructions */ @@ -1293,9 +1296,8 @@ static bool trans_SBRS(DisasContext *ctx, arg_SBRS *a) static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a) { TCGv data = tcg_temp_new_i32(); - TCGv port = tcg_constant_i32(a->reg); - gen_helper_inb(data, tcg_env, port); + gen_inb(ctx, data, a->reg); tcg_gen_andi_tl(data, data, 1 << a->bit); ctx->skip_cond = TCG_COND_EQ; ctx->skip_var0 = data; @@ -1311,9 +1313,8 @@ static bool trans_SBIC(DisasContext *ctx, arg_SBIC *a) static bool trans_SBIS(DisasContext *ctx, arg_SBIS *a) { TCGv data = tcg_temp_new_i32(); - TCGv port = tcg_constant_i32(a->reg); - gen_helper_inb(data, tcg_env, port); + gen_inb(ctx, data, a->reg); tcg_gen_andi_tl(data, data, 1 << a->bit); ctx->skip_cond = TCG_COND_NE; ctx->skip_var0 = data; @@ -1502,11 +1503,18 @@ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr) static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr) { - if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) { - gen_helper_fullrd(data, tcg_env, addr); - } else { - tcg_gen_qemu_ld_tl(data, addr, MMU_DATA_IDX, MO_UB); - } + tcg_gen_qemu_ld_tl(data, addr, MMU_DATA_IDX, MO_UB); +} + +static void gen_inb(DisasContext *ctx, TCGv data, int port) +{ + gen_data_load(ctx, data, tcg_constant_i32(port + NUMBER_OF_CPU_REGISTERS)); +} + +static void gen_outb(DisasContext *ctx, TCGv data, int port) +{ + gen_helper_fullwr(tcg_env, data, + tcg_constant_i32(port + NUMBER_OF_CPU_REGISTERS)); } /* @@ -1578,7 +1586,6 @@ static bool trans_LDS(DisasContext *ctx, arg_LDS *a) TCGv Rd = cpu_r[a->rd]; TCGv addr = tcg_temp_new_i32(); TCGv H = cpu_rampD; - a->imm = next_word(ctx); tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ tcg_gen_shli_tl(addr, addr, 16); @@ -1783,7 +1790,6 @@ static bool trans_STS(DisasContext *ctx, arg_STS *a) TCGv Rd = cpu_r[a->rd]; TCGv addr = tcg_temp_new_i32(); TCGv H = cpu_rampD; - a->imm = next_word(ctx); tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ tcg_gen_shli_tl(addr, addr, 16); @@ -2128,9 +2134,8 @@ static bool trans_SPMX(DisasContext *ctx, arg_SPMX *a) static bool trans_IN(DisasContext *ctx, arg_IN *a) { TCGv Rd = cpu_r[a->rd]; - TCGv port = tcg_constant_i32(a->imm); - gen_helper_inb(Rd, tcg_env, port); + gen_inb(ctx, Rd, a->imm); return true; } @@ -2141,9 +2146,8 @@ static bool trans_IN(DisasContext *ctx, arg_IN *a) static bool trans_OUT(DisasContext *ctx, arg_OUT *a) { TCGv Rd = cpu_r[a->rd]; - TCGv port = tcg_constant_i32(a->imm); - gen_helper_outb(tcg_env, port, Rd); + gen_outb(ctx, Rd, a->imm); return true; } @@ -2409,11 +2413,10 @@ static bool trans_SWAP(DisasContext *ctx, arg_SWAP *a) static bool trans_SBI(DisasContext *ctx, arg_SBI *a) { TCGv data = tcg_temp_new_i32(); - TCGv port = tcg_constant_i32(a->reg); - gen_helper_inb(data, tcg_env, port); + gen_inb(ctx, data, a->reg); tcg_gen_ori_tl(data, data, 1 << a->bit); - gen_helper_outb(tcg_env, port, data); + gen_outb(ctx, data, a->reg); return true; } @@ -2424,11 +2427,10 @@ static bool trans_SBI(DisasContext *ctx, arg_SBI *a) static bool trans_CBI(DisasContext *ctx, arg_CBI *a) { TCGv data = tcg_temp_new_i32(); - TCGv port = tcg_constant_i32(a->reg); - gen_helper_inb(data, tcg_env, port); + gen_inb(ctx, data, a->reg); tcg_gen_andi_tl(data, data, ~(1 << a->bit)); - gen_helper_outb(tcg_env, port, data); + gen_outb(ctx, data, a->reg); return true; } diff --git a/target/hexagon/cpu-param.h b/target/hexagon/cpu-param.h index 45ee7b4..635d509e7 100644 --- a/target/hexagon/cpu-param.h +++ b/target/hexagon/cpu-param.h @@ -23,4 +23,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 36 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +#define TARGET_INSN_START_EXTRA_WORDS 0 + #endif diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 766b678..a5a0417 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -19,13 +19,13 @@ #include "qemu/qemu-print.h" #include "cpu.h" #include "internal.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "qapi/error.h" #include "hw/qdev-properties.h" #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" #include "exec/gdbstub.h" +#include "accel/tcg/cpu-ops.h" static void hexagon_v66_cpu_init(Object *obj) { } static void hexagon_v67_cpu_init(Object *obj) { } @@ -255,6 +255,22 @@ static vaddr hexagon_cpu_get_pc(CPUState *cs) return cpu_env(cs)->gpr[HEX_REG_PC]; } +static TCGTBCPUState hexagon_get_tb_cpu_state(CPUState *cs) +{ + CPUHexagonState *env = cpu_env(cs); + vaddr pc = env->gpr[HEX_REG_PC]; + uint32_t hex_flags = 0; + + if (pc == env->gpr[HEX_REG_SA0]) { + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1); + } + if (pc & PCALIGN_MASK) { + hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0); + } + + return (TCGTBCPUState){ .pc = pc, .flags = hex_flags }; +} + static void hexagon_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -313,20 +329,28 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) mcc->parent_realize(dev, errp); } -static void hexagon_cpu_init(Object *obj) +static int hexagon_cpu_mmu_index(CPUState *cs, bool ifetch) { + return MMU_USER_IDX; } -#include "accel/tcg/cpu-ops.h" +static void hexagon_cpu_init(Object *obj) +{ +} static const TCGCPUOps hexagon_tcg_ops = { + /* MTTCG not yet supported: require strict ordering */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = false, .initialize = hexagon_translate_init, .translate_code = hexagon_translate_code, + .get_tb_cpu_state = hexagon_get_tb_cpu_state, .synchronize_from_tb = hexagon_cpu_synchronize_from_tb, .restore_state_to_opc = hexagon_restore_state_to_opc, + .mmu_index = hexagon_cpu_mmu_index, }; -static void hexagon_cpu_class_init(ObjectClass *c, void *data) +static void hexagon_cpu_class_init(ObjectClass *c, const void *data) { HexagonCPUClass *mcc = HEXAGON_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index f78c8f9..43a854f 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -21,6 +21,7 @@ #include "fpu/softfloat-types.h" #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" #include "hex_regs.h" #include "mmvec/mmvec.h" @@ -136,27 +137,10 @@ G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc); -static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - uint32_t hex_flags = 0; - *pc = env->gpr[HEX_REG_PC]; - *cs_base = 0; - if (*pc == env->gpr[HEX_REG_SA0]) { - hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1); - } - *flags = hex_flags; - if (*pc & PCALIGN_MASK) { - hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0); - } -} - typedef HexagonCPU ArchCPU; void hexagon_translate_init(void); void hexagon_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); -#include "exec/cpu-all.h" - #endif /* HEXAGON_CPU_H */ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 2c5e15c..08fc541 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -329,14 +329,14 @@ void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index) { - tcg_gen_qemu_ld_tl(dest, vaddr, mem_index, MO_TEUL); + tcg_gen_qemu_ld_tl(dest, vaddr, mem_index, MO_LE | MO_UL); tcg_gen_mov_tl(hex_llsc_addr, vaddr); tcg_gen_mov_tl(hex_llsc_val, dest); } static inline void gen_load_locked8u(TCGv_i64 dest, TCGv vaddr, int mem_index) { - tcg_gen_qemu_ld_i64(dest, vaddr, mem_index, MO_TEUQ); + tcg_gen_qemu_ld_i64(dest, vaddr, mem_index, MO_LE | MO_UQ); tcg_gen_mov_tl(hex_llsc_addr, vaddr); tcg_gen_mov_i64(hex_llsc_val_i64, dest); } @@ -756,7 +756,7 @@ static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA) { Insn *insn = ctx->insn; /* Needed for CHECK_NOSHUF */ CHECK_NOSHUF(EA, 8); - tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_LE | MO_UQ); } #ifndef CONFIG_HEXAGON_IDEF_PARSER @@ -1230,7 +1230,7 @@ static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, tcg_gen_andi_tl(src, src, ~((int32_t)sizeof(MMVector) - 1)); } for (int i = 0; i < sizeof(MMVector) / 8; i++) { - tcg_gen_qemu_ld_i64(tmp, src, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_i64(tmp, src, ctx->mem_idx, MO_LE | MO_UQ); tcg_gen_addi_tl(src, src, 8); tcg_gen_st_i64(tmp, tcg_env, dstoff + i * 8); } diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index a7dcd85..542af8d 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1761,7 +1761,7 @@ void gen_load(Context *c, YYLTYPE *locp, HexValue *width, if (signedness == SIGNED) { OUT(c, locp, " | MO_SIGN"); } - OUT(c, locp, " | MO_TE);\n"); + OUT(c, locp, " | MO_LE);\n"); } void gen_store(Context *c, YYLTYPE *locp, HexValue *width, HexValue *ea, diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index ee3d4c8..9ba9be4 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -21,6 +21,7 @@ #include "cpu.h" #include "hex_regs.h" #include "reg_fields.h" +#include "accel/tcg/getpc.h" #define GET_FIELD(FIELD, REGIN) \ fEXTRACTU_BITS(REGIN, reg_field_info[FIELD].width, \ @@ -115,27 +116,27 @@ #define MEM_LOAD2s(DST, VA) \ do { \ CHECK_NOSHUF(VA, 2); \ - tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TESW); \ + tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_LE | MO_SW); \ } while (0) #define MEM_LOAD2u(DST, VA) \ do { \ CHECK_NOSHUF(VA, 2); \ - tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TEUW); \ + tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_LE | MO_UW); \ } while (0) #define MEM_LOAD4s(DST, VA) \ do { \ CHECK_NOSHUF(VA, 4); \ - tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TESL); \ + tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_LE | MO_SL); \ } while (0) #define MEM_LOAD4u(DST, VA) \ do { \ CHECK_NOSHUF(VA, 4); \ - tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TEUL); \ + tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_LE | MO_UL); \ } while (0) #define MEM_LOAD8u(DST, VA) \ do { \ CHECK_NOSHUF(VA, 8); \ - tcg_gen_qemu_ld_i64(DST, VA, ctx->mem_idx, MO_TEUQ); \ + tcg_gen_qemu_ld_i64(DST, VA, ctx->mem_idx, MO_LE | MO_UQ); \ } while (0) #define MEM_STORE1_FUNC(X) \ diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index bcd4a1e..c7840fb 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -21,6 +21,8 @@ #include "qemu/host-utils.h" #include "arch.h" #include "mmvec/system_ext_mmvec.h" +#include "accel/tcg/getpc.h" +#include "accel/tcg/probe.h" #ifndef QEMU_GENERATE #define VdV (*(MMVector *restrict)(VdV_void)) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 6da8db8..444799d 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -17,8 +17,8 @@ #include "qemu/osdep.h" #include "qemu/log.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "cpu.h" diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index fe78587..02fd40c 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -23,7 +23,7 @@ #include "exec/helper-gen.h" #include "exec/helper-proto.h" #include "exec/translation-block.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/log.h" #include "internal.h" #include "attribs.h" @@ -656,17 +656,17 @@ void process_store(DisasContext *ctx, int slot_num) case 2: tcg_gen_qemu_st_tl(hex_store_val32[slot_num], hex_store_addr[slot_num], - ctx->mem_idx, MO_TEUW); + ctx->mem_idx, MO_LE | MO_UW); break; case 4: tcg_gen_qemu_st_tl(hex_store_val32[slot_num], hex_store_addr[slot_num], - ctx->mem_idx, MO_TEUL); + ctx->mem_idx, MO_LE | MO_UL); break; case 8: tcg_gen_qemu_st_i64(hex_store_val64[slot_num], hex_store_addr[slot_num], - ctx->mem_idx, MO_TEUQ); + ctx->mem_idx, MO_LE | MO_UQ); break; default: { diff --git a/target/hppa/cpu-param.h b/target/hppa/cpu-param.h index 7ed6b57..9bf7ac7 100644 --- a/target/hppa/cpu-param.h +++ b/target/hppa/cpu-param.h @@ -19,12 +19,6 @@ #define TARGET_PAGE_BITS 12 -/* PA-RISC 1.x processors have a strong memory model. */ -/* - * ??? While we do not yet implement PA-RISC 2.0, those processors have - * a weak memory model, but with TLB bits that force ordering on a per-page - * basis. It's probably easier to fall back to a strong memory model. - */ -#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL +#define TARGET_INSN_START_EXTRA_WORDS 2 #endif diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 2a85495..0ca79ee 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -24,11 +24,12 @@ #include "qemu/timer.h" #include "cpu.h" #include "qemu/module.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "tcg/tcg.h" #include "hw/hppa/hppa_hardware.h" +#include "accel/tcg/cpu-ops.h" static void hppa_cpu_set_pc(CPUState *cs, vaddr value) { @@ -50,11 +51,12 @@ static vaddr hppa_cpu_get_pc(CPUState *cs) env->iaoq_f & -4); } -void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, - uint64_t *pcsbase, uint32_t *pflags) +static TCGTBCPUState hppa_get_tb_cpu_state(CPUState *cs) { + CPUHPPAState *env = cpu_env(cs); uint32_t flags = 0; uint64_t cs_base = 0; + vaddr pc; /* * TB lookup assumes that PC contains the complete virtual address. @@ -62,7 +64,7 @@ void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, * incomplete virtual address. This also means that we must separate * out current cpu privilege from the low bits of IAOQ_F. */ - *pc = hppa_cpu_get_pc(env_cpu(env)); + pc = hppa_cpu_get_pc(env_cpu(env)); flags |= (env->iaoq_f & 3) << TB_FLAG_PRIV_SHIFT; /* @@ -98,8 +100,7 @@ void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, } #endif - *pcsbase = cs_base; - *pflags = flags; + return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base }; } static void hppa_cpu_synchronize_from_tb(CPUState *cs, @@ -134,7 +135,7 @@ static void hppa_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool hppa_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } #endif /* !CONFIG_USER_ONLY */ @@ -249,25 +250,36 @@ static const struct SysemuCPUOps hppa_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps hppa_tcg_ops = { + /* PA-RISC 1.x processors have a strong memory model. */ + /* + * ??? While we do not yet implement PA-RISC 2.0, those processors have + * a weak memory model, but with TLB bits that force ordering on a per-page + * basis. It's probably easier to fall back to a strong memory model. + */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = true, + .initialize = hppa_translate_init, .translate_code = hppa_translate_code, + .get_tb_cpu_state = hppa_get_tb_cpu_state, .synchronize_from_tb = hppa_cpu_synchronize_from_tb, .restore_state_to_opc = hppa_restore_state_to_opc, + .mmu_index = hppa_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill_align = hppa_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = hppa_cpu_exec_interrupt, .cpu_exec_halt = hppa_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = hppa_cpu_do_interrupt, .do_unaligned_access = hppa_cpu_do_unaligned_access, .do_transaction_failed = hppa_cpu_do_transaction_failed, #endif /* !CONFIG_USER_ONLY */ }; -static void hppa_cpu_class_init(ObjectClass *oc, void *data) +static void hppa_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -281,7 +293,6 @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data) &acc->parent_phases); cc->class_by_name = hppa_cpu_class_by_name; - cc->mmu_index = hppa_cpu_mmu_index; cc->dump_state = hppa_cpu_dump_state; cc->set_pc = hppa_cpu_set_pc; cc->get_pc = hppa_cpu_get_pc; diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index 8b36642..672ab37 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -21,32 +21,33 @@ #define HPPA_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" +#include "system/memory.h" #include "qemu/cpu-float.h" #include "qemu/interval-tree.h" #include "hw/registerfields.h" -#define MMU_ABS_W_IDX 6 -#define MMU_ABS_IDX 7 -#define MMU_KERNEL_IDX 8 -#define MMU_KERNEL_P_IDX 9 -#define MMU_PL1_IDX 10 -#define MMU_PL1_P_IDX 11 -#define MMU_PL2_IDX 12 -#define MMU_PL2_P_IDX 13 -#define MMU_USER_IDX 14 -#define MMU_USER_P_IDX 15 - -#define MMU_IDX_MMU_DISABLED(MIDX) ((MIDX) < MMU_KERNEL_IDX) -#define MMU_IDX_TO_PRIV(MIDX) (((MIDX) - MMU_KERNEL_IDX) / 2) -#define MMU_IDX_TO_P(MIDX) (((MIDX) - MMU_KERNEL_IDX) & 1) -#define PRIV_P_TO_MMU_IDX(PRIV, P) ((PRIV) * 2 + !!(P) + MMU_KERNEL_IDX) +#define MMU_KERNEL_IDX 0 +#define MMU_KERNEL_P_IDX 1 +#define MMU_PL1_IDX 2 +#define MMU_PL1_P_IDX 3 +#define MMU_PL2_IDX 4 +#define MMU_PL2_P_IDX 5 +#define MMU_USER_IDX 6 +#define MMU_USER_P_IDX 7 +#define MMU_ABS_IDX 8 +#define MMU_ABS_W_IDX 9 + +#define MMU_IDX_MMU_DISABLED(MIDX) ((MIDX) >= MMU_ABS_IDX) +#define MMU_IDX_TO_PRIV(MIDX) ((MIDX) / 2) +#define MMU_IDX_TO_P(MIDX) ((MIDX) & 1) +#define PRIV_P_TO_MMU_IDX(PRIV, P) ((PRIV) * 2 + !!(P)) #define PRIV_KERNEL 0 #define PRIV_USER 3 -#define TARGET_INSN_START_EXTRA_WORDS 2 - /* No need to flush MMU_ABS*_IDX */ #define HPPA_MMU_FLUSH_MASK \ (1 << MMU_KERNEL_IDX | 1 << MMU_KERNEL_P_IDX | \ @@ -303,8 +304,6 @@ struct HPPACPUClass { ResettablePhases parent_phases; }; -#include "exec/cpu-all.h" - static inline bool hppa_is_pa20(const CPUHPPAState *env) { return env->is_pa20; @@ -352,9 +351,6 @@ hwaddr hppa_abs_to_phys_pa2_w1(vaddr addr); #define CS_BASE_DIFFPAGE (1 << 12) #define CS_BASE_DIFFSPACE (1 << 13) -void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags); - target_ulong cpu_hppa_get_psw(CPUHPPAState *env); void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong); void update_gva_offset_mask(CPUHPPAState *env); diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index a62d9d3..4535320 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" @@ -95,7 +94,8 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) { uint32_t soft_exp = get_float_exception_flags(&env->fp_status); uint32_t hard_exp = 0; - uint32_t shadow = env->fr0_shadow; + uint32_t shadow = env->fr0_shadow & 0x3ffffff; + uint32_t fr1 = 0; if (likely(soft_exp == 0)) { env->fr[0] = (uint64_t)shadow << 32; @@ -108,9 +108,22 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, R_FPSR_ENA_O_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, R_FPSR_ENA_Z_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, R_FPSR_ENA_V_MASK); - shadow |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + if (hard_exp & shadow) { + shadow = FIELD_DP32(shadow, FPSR, T, 1); + /* fill exception register #1, which is lower 32-bits of fr[0] */ +#if !defined(CONFIG_USER_ONLY) + if (hard_exp & (R_FPSR_ENA_O_MASK | R_FPSR_ENA_U_MASK)) { + /* over- and underflow both set overflow flag only */ + fr1 = FIELD_DP32(fr1, FPSR, C, 1); + fr1 = FIELD_DP32(fr1, FPSR, FLG_O, 1); + } else +#endif + { + fr1 |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + } + } env->fr0_shadow = shadow; - env->fr[0] = (uint64_t)shadow << 32; + env->fr[0] = (uint64_t)shadow << 32 | fr1; if (hard_exp & shadow) { hppa_dynamic_excp(env, EXCP_ASSIST, ra); diff --git a/target/hppa/helper.c b/target/hppa/helper.c index ac7f58f..d7f8495 100644 --- a/target/hppa/helper.c +++ b/target/hppa/helper.c @@ -21,7 +21,6 @@ #include "qemu/log.h" #include "cpu.h" #include "fpu/softfloat.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "qemu/qemu-print.h" #include "hw/hppa/hppa_hardware.h" diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 7d48643..191ae19 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -177,6 +177,10 @@ void hppa_cpu_do_interrupt(CPUState *cs) } } env->cr[CR_IIR] = ldl_phys(cs->as, paddr); + if (i == EXCP_ASSIST) { + /* stuff insn code into bits of FP exception register #1 */ + env->fr[0] |= (env->cr[CR_IIR] & 0x03ffffff); + } } break; diff --git a/target/hppa/machine.c b/target/hppa/machine.c index bb47a2e..13e5551 100644 --- a/target/hppa/machine.c +++ b/target/hppa/machine.c @@ -216,7 +216,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, HPPACPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, HPPACPU, 1, vmstate_env, CPUHPPAState), VMSTATE_END_OF_LIST() }; diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index fb1d93e..9bdd0a6 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -20,9 +20,11 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" +#include "accel/tcg/cpu-mmu-index.h" +#include "accel/tcg/probe.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "exec/helper-proto.h" #include "hw/core/cpu.h" #include "trace.h" diff --git a/target/hppa/op_helper.c b/target/hppa/op_helper.c index beb8f88..0458378 100644 --- a/target/hppa/op_helper.c +++ b/target/hppa/op_helper.c @@ -20,9 +20,9 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "qemu/timer.h" #include "trace.h" #ifdef CONFIG_USER_ONLY diff --git a/target/hppa/sys_helper.c b/target/hppa/sys_helper.c index 052a6a8..6e65fad 100644 --- a/target/hppa/sys_helper.c +++ b/target/hppa/sys_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "qemu/timer.h" #include "system/runstate.h" diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 0d0d1bc..7a81cfc 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" @@ -28,6 +27,7 @@ #include "exec/helper-gen.h" #include "exec/translator.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "exec/log.h" #define HELPER_H "helper.h" @@ -1208,10 +1208,10 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, cb_msb = tcg_temp_new_i64(); cb = tcg_temp_new_i64(); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); if (is_c) { - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, - get_psw_carry(ctx, d), ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, in2, get_psw_carry(ctx, d)); + } else { + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); } tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); @@ -1307,9 +1307,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, if (is_b) { /* DEST,C = IN1 + ~IN2 + C. */ tcg_gen_not_i64(cb, in2); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, - get_psw_carry(ctx, d), ctx->zero); - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, cb, ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, cb, get_psw_carry(ctx, d)); tcg_gen_xor_i64(cb, cb, in1); tcg_gen_xor_i64(cb, cb, dest); } else { @@ -3007,9 +3005,7 @@ static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a) tcg_gen_xor_i64(add2, in2, addc); tcg_gen_andi_i64(addc, addc, 1); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, add1, ctx->zero, add2, ctx->zero); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb, - addc, ctx->zero); + tcg_gen_addcio_i64(dest, cpu_psw_cb_msb, add1, add2, addc); /* Write back the result register. */ save_gpr(ctx, a->t, dest); @@ -3552,8 +3548,7 @@ static bool do_addb(DisasContext *ctx, unsigned r, TCGv_i64 in1, TCGv_i64 cb = tcg_temp_new_i64(); TCGv_i64 cb_msb = tcg_temp_new_i64(); - tcg_gen_movi_i64(cb_msb, 0); - tcg_gen_add2_i64(dest, cb_msb, in1, cb_msb, in2, cb_msb); + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); cb_cond = get_carry(ctx, d, cb, cb_msb); diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index ced1998..560f468 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "system/memory_mapping.h" +#include "system/memory.h" /* PAE Paging or IA-32e Paging */ static void walk_pte(MemoryMappingList *list, AddressSpace *as, @@ -34,7 +35,7 @@ static void walk_pte(MemoryMappingList *list, AddressSpace *as, } start_paddr = (pte & ~0xfff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -64,7 +65,7 @@ static void walk_pte2(MemoryMappingList *list, AddressSpace *as, } start_paddr = pte & ~0xfff; - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -99,7 +100,7 @@ static void walk_pde(MemoryMappingList *list, AddressSpace *as, if (pde & PG_PSE_MASK) { /* 2 MB page */ start_paddr = (pde & ~0x1fffff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -141,7 +142,7 @@ static void walk_pde2(MemoryMappingList *list, AddressSpace *as, */ high_paddr = ((hwaddr)(pde & 0x1fe000) << 19); start_paddr = (pde & ~0x3fffff) | high_paddr; - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -202,7 +203,7 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as, if (pdpe & PG_PSE_MASK) { /* 1 GB page */ start_paddr = (pdpe & ~0x3fffffff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c index b372784..cfb71bf 100644 --- a/target/i386/confidential-guest.c +++ b/target/i386/confidential-guest.c @@ -20,7 +20,7 @@ OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, X86_CONFIDENTIAL_GUEST, CONFIDENTIAL_GUEST_SUPPORT) -static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +static void x86_confidential_guest_class_init(ObjectClass *oc, const void *data) { } diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be76..48b88db 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass { /* <public> */ int (*kvm_type)(X86ConfidentialGuest *cg); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/cpu-apic.c b/target/i386/cpu-apic.c index c1708b0..242a05f 100644 --- a/target/i386/cpu-apic.c +++ b/target/i386/cpu-apic.c @@ -14,7 +14,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "system/xen.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "hw/qdev-properties.h" #include "hw/i386/apic_internal.h" #include "cpu-internal.h" diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h index b0e884c..ebb844b 100644 --- a/target/i386/cpu-param.h +++ b/target/i386/cpu-param.h @@ -22,7 +22,6 @@ #endif #define TARGET_PAGE_BITS 12 -/* The x86 has a strong memory model with some store-after-load re-ordering */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) +#define TARGET_INSN_START_EXTRA_WORDS 1 #endif diff --git a/target/i386/cpu-system.c b/target/i386/cpu-system.c index 55f192e..b1494aa 100644 --- a/target/i386/cpu-system.c +++ b/target/i386/cpu-system.c @@ -24,7 +24,7 @@ #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qom/qom-qobject.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-internal.h" diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1b64cea..455caff 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -28,6 +28,7 @@ #include "system/hvf.h" #include "hvf/hvf-i386.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "sev.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -35,13 +36,17 @@ #include "standard-headers/asm-x86/kvm_para.h" #include "hw/qdev-properties.h" #include "hw/i386/topology.h" +#include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" -#include "qapi/qapi-commands-machine-target.h" -#include "exec/address-spaces.h" +#include "qapi/qapi-commands-machine.h" +#include "system/address-spaces.h" #include "hw/boards.h" #include "hw/i386/sgx-epc.h" #endif +#include "system/qtest.h" +#include "tcg/tcg-cpu.h" #include "disas/capstone.h" #include "cpu-internal.h" @@ -63,6 +68,7 @@ struct CPUID2CacheDescriptorInfo { /* * Known CPUID 2 cache descriptors. + * TLB, prefetch and sectored cache related descriptors are not included. * From Intel SDM Volume 2A, CPUID instruction */ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { @@ -84,18 +90,29 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 2, .line_size = 64, }, [0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, .associativity = 8, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x22, 0x23 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x22, 0x23 are not included + */ [0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 16, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x25, 0x20 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x25, 0x29 are not included + */ [0x2C] = { .level = 1, .type = DATA_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, [0x30] = { .level = 1, .type = INSTRUCTION_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, + /* + * Newer Intel CPUs (having the cores without L3, e.g., Intel MTL, ARL) + * use CPUID 0x4 leaf to describe cache topology, by encoding CPUID 0x2 + * leaf with 0xFF. For older CPUs (without 0x4 leaf), it's also valid + * to just ignore L3's code if there's no L3. + * + * This already covers all the cases in QEMU, so code 0x40 is not + * included. + */ [0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, .associativity = 4, .line_size = 32, }, [0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, @@ -112,7 +129,18 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 8, .line_size = 64, }, [0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB, .associativity = 12, .line_size = 64, }, - /* Descriptor 0x49 depends on CPU family/model, so it is not included */ + /* + * Descriptor 0x49 has 2 cases: + * - 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size. + * - 3rd-level cache: 4MB, 16-way set associative, 64-byte line size + * (Intel Xeon processor MP, Family 0FH, Model 06H). + * + * When it represents L3, then it depends on CPU family/model. Fortunately, + * the legacy cache/CPU models don't have such special L3. So, just add it + * to represent the general L2 case. + */ + [0x49] = { .level = 2, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 16, .line_size = 64, }, [0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, .associativity = 12, .line_size = 64, }, [0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, @@ -133,9 +161,10 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 4, .line_size = 64, }, [0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 4, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. + */ [0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, .associativity = 8, .line_size = 64, }, [0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, @@ -196,7 +225,7 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { * Return a CPUID 2 cache descriptor for a given cache. * If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE */ -static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) +static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache, bool *unmacthed) { int i; @@ -213,9 +242,46 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) } } + *unmacthed |= true; return CACHE_DESCRIPTOR_UNAVAILABLE; } +static const CPUCaches legacy_intel_cpuid2_cache_info; + +/* Encode cache info for CPUID[2] */ +static void encode_cache_cpuid2(X86CPU *cpu, + const CPUCaches *caches, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + CPUX86State *env = &cpu->env; + int l1d, l1i, l2, l3; + bool unmatched = false; + + *eax = 1; /* Number of CPUID[EAX=2] calls required */ + *ebx = *ecx = *edx = 0; + + l1d = cpuid2_cache_descriptor(caches->l1d_cache, &unmatched); + l1i = cpuid2_cache_descriptor(caches->l1i_cache, &unmatched); + l2 = cpuid2_cache_descriptor(caches->l2_cache, &unmatched); + l3 = cpuid2_cache_descriptor(caches->l3_cache, &unmatched); + + if (!cpu->consistent_cache || + (env->cpuid_min_level < 0x4 && !unmatched)) { + /* + * Though SDM defines code 0x40 for cases with no L2 or L3. It's + * also valid to just ignore l3's code if there's no l2. + */ + if (cpu->enable_l3_cache) { + *ecx = l3; + } + *edx = (l1d << 16) | (l1i << 8) | l2; + } else { + *ecx = 0; + *edx = CACHE_DESCRIPTOR_UNAVAILABLE; + } +} + /* CPUID Leaf 4 constants: */ /* EAX: */ @@ -283,11 +349,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); + /* + * The following fields have bit-width limitations, so consider the + * maximum values to avoid overflow: + * Bits 25-14: maximum 4095. + * Bits 31-26: maximum 63. + */ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - (max_core_ids_in_package(topo_info) << 26) | - (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); + (MIN(max_core_ids_in_package(topo_info), 63) << 26) | + (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -427,7 +499,6 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) { assert(cache->size % 1024 == 0); - assert(cache->lines_per_tag > 0); assert(cache->associativity > 0); assert(cache->line_size > 0); return ((cache->size / 1024) << 24) | (cache->associativity << 16) | @@ -436,8 +507,8 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) #define ASSOC_FULL 0xFF -/* AMD associativity encoding used on CPUID Leaf 0x80000006: */ -#define AMD_ENC_ASSOC(a) (a <= 1 ? a : \ +/* x86 associativity encoding used on CPUID Leaf 0x80000006: */ +#define X86_ENC_ASSOC(a) (a <= 1 ? a : \ a == 2 ? 0x2 : \ a == 4 ? 0x4 : \ a == 8 ? 0x6 : \ @@ -460,19 +531,18 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, { assert(l2->size % 1024 == 0); assert(l2->associativity > 0); - assert(l2->lines_per_tag > 0); assert(l2->line_size > 0); *ecx = ((l2->size / 1024) << 16) | - (AMD_ENC_ASSOC(l2->associativity) << 12) | + (X86_ENC_ASSOC(l2->associativity) << 12) | (l2->lines_per_tag << 8) | (l2->line_size); + /* For Intel, EDX is reserved. */ if (l3) { assert(l3->size % (512 * 1024) == 0); assert(l3->associativity > 0); - assert(l3->lines_per_tag > 0); assert(l3->line_size > 0); *edx = ((l3->size / (512 * 1024)) << 18) | - (AMD_ENC_ASSOC(l3->associativity) << 12) | + (X86_ENC_ASSOC(l3->associativity) << 12) | (l3->lines_per_tag << 8) | (l3->line_size); } else { *edx = 0; @@ -490,7 +560,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; + /* Bits 25:14 - NumSharingCache: maximum 4095. */ + *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -570,117 +641,172 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * These are legacy cache values. If there is a need to change any * of these values please use builtin_x86_defs */ - -/* L1 data cache: */ -static CPUCacheInfo legacy_l1d_cache = { - .type = DATA_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1d_cache_amd = { - .type = DATA_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* L1 instruction cache: */ -static CPUCacheInfo legacy_l1i_cache = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1i_cache_amd = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* Level 2 unified cache: */ -static CPUCacheInfo legacy_l2_cache = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 4 * MiB, - .self_init = 1, - .line_size = 64, - .associativity = 16, - .sets = 4096, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -static CPUCacheInfo legacy_l2_cache_cpuid2 = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 2 * MiB, - .line_size = 64, - .associativity = 8, - .share_level = CPU_TOPOLOGY_LEVEL_INVALID, +static const CPUCaches legacy_amd_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .lines_per_tag = 1, + .associativity = 16, + .sets = 512, + .partitions = 1, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; - -/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l2_cache_amd = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 512 * KiB, - .line_size = 64, - .lines_per_tag = 1, - .associativity = 16, - .sets = 512, - .partitions = 1, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, +/* + * Only used for the CPU models with CPUID level < 4. + * These CPUs (CPUID level < 4) only use CPUID leaf 2 to present + * cache information. + * + * Note: This cache model is just a default one, and is not + * guaranteed to match real hardwares. + */ +static const CPUCaches legacy_intel_cpuid2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 2 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; -/* Level 3 unified cache: */ -static CPUCacheInfo legacy_l3_cache = { - .type = UNIFIED_CACHE, - .level = 3, - .size = 16 * MiB, - .line_size = 64, - .associativity = 16, - .sets = 16384, - .partitions = 1, - .lines_per_tag = 1, - .self_init = true, - .inclusive = true, - .complex_indexing = true, - .share_level = CPU_TOPOLOGY_LEVEL_DIE, +static const CPUCaches legacy_intel_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 4 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; /* TLB definitions: */ @@ -774,11 +900,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \ CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \ - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE) + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \ + CPUID_HT) /* partly implemented: CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: - CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_TM, CPUID_PBE */ /* * Kernel-only features that can be shown to usermode programs even if @@ -846,7 +973,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ - CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES | \ + CPUID_EXT3_CMP_LEG) #define TCG_EXT4_FEATURES 0 @@ -895,6 +1023,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_ECX_FEATURES 0 #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 @@ -920,6 +1049,17 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_KERNEL_FEATURES) +#if defined CONFIG_USER_ONLY +#define CPUID_8000_0021_EAX_KERNEL_FEATURES CPUID_8000_0021_EAX_AUTO_IBRS +#else +#define CPUID_8000_0021_EAX_KERNEL_FEATURES 0 +#endif + +#define TCG_8000_0021_EAX_FEATURES ( \ + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | \ + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | \ + CPUID_8000_0021_EAX_KERNEL_FEATURES) + FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_1_EDX] = { .type = CPUID_FEATURE_WORD, @@ -1134,6 +1274,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "msr-imm", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_ECX, + }, + .tcg_features = TCG_7_1_ECX_FEATURES, + }, [FEAT_7_1_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1237,17 +1396,17 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, + NULL, "verw-clear", "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, - .tcg_features = 0, + .tcg_features = TCG_8000_0021_EAX_FEATURES, .unmigratable_flags = 0, }, [FEAT_8000_0021_EBX] = { @@ -1256,6 +1415,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, "tsa-sq-no", "tsa-l1-no", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000021, .reg = R_ECX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_8000_0022_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1370,6 +1545,14 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "bhi-no", NULL, NULL, NULL, "pbrsb-no", NULL, "gds-no", "rfds-no", "rfds-clear", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, "its-no", NULL, }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, @@ -1654,14 +1837,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} static FeatureDep feature_dependencies[] = { { @@ -1773,10 +1963,6 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, - { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, }, @@ -1831,9 +2017,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -1899,7 +2082,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted) static inline bool accel_uses_host_cpuid(void) { - return kvm_enabled() || hvf_enabled(); + return !tcg_enabled() && !qtest_enabled(); } static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) @@ -2183,6 +2366,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2291,6 +2528,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2399,6 +2690,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2453,6 +2798,486 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + } +}; + +static const CPUCaches xeon_spr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 15, + + /* CPUID 0x4.0x3.ECX */ + .sets = 65536, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 60 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_gnr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 294912, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 288 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_srf_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 128, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 4096, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 4 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_MODULE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x3.ECX */ + .sets = 147456, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 108 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches yongfeng_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.ECX */ + .lines_per_tag = 1, + .size = 32 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.EDX */ + .lines_per_tag = 1, + .size = 64 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x2.ECX */ + .sets = 512, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = true, + .complex_indexing = false, + + /* CPUID 0x80000006.ECX */ + .size = 256 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 8192, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = true, + .inclusive = true, + .complex_indexing = false, + + .size = 8 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -2705,6 +3530,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { I486_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium", @@ -2717,6 +3543,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium2", @@ -2729,6 +3556,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM2_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium3", @@ -2741,6 +3569,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM3_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "athlon", @@ -4273,6 +5102,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .note = "with spr-sp cache model and 0x1f leaf", + .cache_info = &xeon_spr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, @@ -4426,6 +5264,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with gnr-sp cache model and 0x1f leaf", + .cache_info = &xeon_gnr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -4571,6 +5418,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with srf-sp cache model and 0x1f leaf", + .cache_info = &xeon_srf_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -5210,6 +6066,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, @@ -5348,6 +6223,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, @@ -5423,6 +6317,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, @@ -5497,6 +6410,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", @@ -5621,6 +6559,110 @@ static const X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, .model_id = "Zhaoxin YongFeng Processor", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .note = "with the correct model number", + .props = (PropValue[]) { + { "model", "0x5b" }, + { /* end of list */ } + } + }, + { + .version = 3, + .note = "with the cache model and 0x1f leaf", + .cache_info = &yongfeng_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, + { /* end of list */ } + } + }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, }, }; @@ -5689,13 +6731,14 @@ static void max_x86_cpu_realize(DeviceState *dev, Error **errp) x86_cpu_realizefn(dev, errp); } -static void max_x86_cpu_class_init(ObjectClass *oc, void *data) +static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); X86CPUClass *xcc = X86_CPU_CLASS(oc); xcc->ordering = 9; + xcc->max_features = true; xcc->model_description = "Enables all features supported by the accelerator in the current host"; @@ -5706,22 +6749,21 @@ static void max_x86_cpu_class_init(ObjectClass *oc, void *data) static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); - - /* We can't fill the features array here because we don't know yet if - * "migratable" is true or false. - */ - cpu->max_features = true; - object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); + CPUX86State *env = &cpu->env; /* - * these defaults are used for TCG and all other accelerators - * besides KVM and HVF, which overwrite these values + * these defaults are used for TCG, other accelerators have overwritten + * these values */ - object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, - &error_abort); - object_property_set_str(OBJECT(cpu), "model-id", - "QEMU TCG CPU version " QEMU_HW_VERSION, - &error_abort); + if (!env->cpuid_vendor1) { + object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, + &error_abort); + } + if (!env->cpuid_model[0]) { + object_property_set_str(OBJECT(cpu), "model-id", + "QEMU TCG CPU version " QEMU_HW_VERSION, + &error_abort); + } } static const TypeInfo max_x86_cpu_type_info = { @@ -5731,7 +6773,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5740,11 +6782,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } @@ -5764,12 +6810,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5782,7 +6829,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, @@ -5800,10 +6875,7 @@ static void x86_cpuid_version_get_family(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 8) & 0xf; - if (value == 0xf) { - value += (env->cpuid_version >> 20) & 0xff; - } + value = x86_cpu_family(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5841,8 +6913,7 @@ static void x86_cpuid_version_get_model(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 4) & 0xf; - value |= ((env->cpuid_version >> 16) & 0xf) << 4; + value = x86_cpu_model(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5876,7 +6947,7 @@ static void x86_cpuid_version_get_stepping(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = env->cpuid_version & 0xf; + value = x86_cpu_stepping(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5944,11 +7015,11 @@ static char *x86_cpuid_get_model_id(Object *obj, Error **errp) char *value; int i; - value = g_malloc(48 + 1); - for (i = 0; i < 48; i++) { + value = g_malloc(CPUID_MODEL_ID_SZ + 1); + for (i = 0; i < CPUID_MODEL_ID_SZ; i++) { value[i] = env->cpuid_model[i >> 2] >> (8 * (i & 3)); } - value[48] = '\0'; + value[CPUID_MODEL_ID_SZ] = '\0'; return value; } @@ -5963,7 +7034,7 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id, model_id = ""; } len = strlen(model_id); - memset(env->cpuid_model, 0, 48); + memset(env->cpuid_model, 0, CPUID_MODEL_ID_SZ); for (i = 0; i < 48; i++) { if (i >= len) { c = '\0'; @@ -6226,7 +7297,7 @@ static void listflags(GList *features) } /* Sort alphabetically by type name, respecting X86CPUClass::ordering. */ -static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) +static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b, gpointer d) { ObjectClass *class_a = (ObjectClass *)a; ObjectClass *class_b = (ObjectClass *)b; @@ -6247,7 +7318,7 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) static GSList *get_sorted_cpu_model_list(void) { GSList *list = object_class_get_list(TYPE_X86_CPU, false); - list = g_slist_sort(list, x86_cpu_list_compare); + list = g_slist_sort_with_data(list, x86_cpu_list_compare, NULL); return list; } @@ -6304,8 +7375,13 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data) qemu_printf(" %-20s %s\n", name, desc); } +static gint strcmp_wrap(gconstpointer a, gconstpointer b, gpointer d) +{ + return strcmp(a, b); +} + /* list available CPU models and flags */ -void x86_cpu_list(void) +static void x86_cpu_list(void) { int i, j; GSList *list; @@ -6326,7 +7402,7 @@ void x86_cpu_list(void) } } - names = g_list_sort(names, (GCompareFunc)strcmp); + names = g_list_sort_with_data(names, strcmp_wrap, NULL); qemu_printf("\nRecognized CPUID flags:\n"); listflags(names); @@ -6479,6 +7555,20 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w) #endif break; + case FEAT_7_0_EDX: + /* + * Windows does not like ARCH_CAPABILITIES on AMD machines at all. + * Do not show the fake ARCH_CAPABILITIES MSR that KVM sets up, + * except if needed for migration. + * + * When arch_cap_always_on is removed, this tweak can move to + * kvm_arch_get_supported_cpuid. + */ + if (cpu && IS_AMD_CPU(&cpu->env) && !cpu->arch_cap_always_on) { + unavail = CPUID_7_0_EDX_ARCH_CAPABILITIES; + } + break; + default: break; } @@ -6688,7 +7778,7 @@ static const gchar *x86_gdb_arch_name(CPUState *cs) #endif } -static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data) +static void x86_cpu_cpudef_class_init(ObjectClass *oc, const void *data) { const X86CPUModel *model = data; X86CPUClass *xcc = X86_CPU_CLASS(oc); @@ -6818,14 +7908,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *edx = env->features[FEAT_1_EDX]; if (threads_per_pkg > 1) { - *ebx |= threads_per_pkg << 16; + uint32_t num; + + /* + * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor + * count, but Intel needs maximum number of addressable IDs for + * logical processors per package. + */ + if ((IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + num = 1 << apicid_pkg_offset(topo_info); + } else { + num = threads_per_pkg; + } + + /* Fixup overflow: max value for bits 23-16 is 255. */ + *ebx |= MIN(num, 255) << 16; } - if (!cpu->enable_pmu) { - *ecx &= ~CPUID_EXT_PDCM; + if (cpu->pdcm_on_even_without_pmu) { + if (!cpu->enable_pmu) { + *ecx &= ~CPUID_EXT_PDCM; + } } break; - case 2: - /* cache info: needed for Pentium Pro compatibility */ + case 2: { /* cache info: needed for Pentium Pro compatibility */ + const CPUCaches *caches; + + if (env->enable_legacy_cpuid2_cache) { + caches = &legacy_intel_cpuid2_cache_info; + } else if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; @@ -6833,18 +7948,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = *ebx = *ecx = *edx = 0; break; } - *eax = 1; /* Number of CPUID[EAX=2] calls required */ - *ebx = 0; - if (!cpu->enable_l3_cache) { - *ecx = 0; + encode_cache_cpuid2(cpu, caches, eax, ebx, ecx, edx); + break; + } + case 4: { + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; } else { - *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache); + caches = &env->cache_info; } - *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache)); - break; - case 4: + /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); @@ -6856,13 +7971,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(topo_info) << 26; + *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; /* Share the cache at package level. */ - *eax |= max_thread_ids_for_cache(topo_info, - CPU_TOPOLOGY_LEVEL_SOCKET) << 14; + *eax |= MIN(max_thread_ids_for_cache(topo_info, + CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { @@ -6872,30 +7987,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - topo_info, + encode_cache_cpuid4(caches->l1d_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 1: /* L1 icache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - topo_info, + encode_cache_cpuid4(caches->l1i_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 2: /* L2 cache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - topo_info, + encode_cache_cpuid4(caches->l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ if (cpu->enable_l3_cache) { - encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - topo_info, + encode_cache_cpuid4(caches->l3_cache, topo_info, eax, ebx, ecx, edx); break; } @@ -6906,6 +8017,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } break; + } case 5: /* MONITOR/MWAIT Leaf */ *eax = cpu->mwait.eax; /* Smallest monitor-line size in bytes */ @@ -6933,9 +8045,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *ecx = env->features[FEAT_7_1_ECX]; *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; - *ecx = 0; } else if (count == 2) { *edx = env->features[FEAT_7_2_EDX]; *eax = 0; @@ -6996,21 +8108,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; - case 0x1C: - if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { - x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); - *edx = 0; - } - break; - case 0x1F: - /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { - *eax = *ebx = *ecx = *edx = 0; - break; - } - - encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); - break; case 0xD: { /* Processor Extended State */ *eax = 0; @@ -7151,6 +8248,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1C: + if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; case 0x1D: { /* AMX TILE, for now hardcoded for Sapphire Rapids*/ *eax = 0; @@ -7188,6 +8291,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1F: + /* V2 Extended Topology Enumeration Leaf */ + if (!x86_has_cpuid_0x1f(cpu)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); + break; case 0x24: { *eax = 0; *ebx = 0; @@ -7224,9 +8336,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000000: *eax = env->cpuid_xlevel; - *ebx = env->cpuid_vendor1; - *edx = env->cpuid_vendor2; - *ecx = env->cpuid_vendor3; + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ebx = *ecx = *edx = 0; + } else { + *ebx = env->cpuid_vendor1; + *edx = env->cpuid_vendor2; + *ecx = env->cpuid_vendor3; + } break; case 0x80000001: *eax = env->cpuid_version; @@ -7234,7 +8352,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->features[FEAT_8000_0001_ECX]; *edx = env->features[FEAT_8000_0001_EDX]; - if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + if (tcg_enabled() && IS_INTEL_CPU(env) && !(env->hflags & HF_LMA_MASK)) { *edx &= ~CPUID_EXT2_SYSCALL; } @@ -7247,41 +8365,78 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2]; *edx = env->cpuid_model[(index - 0x80000002) * 4 + 3]; break; - case 0x80000005: - /* cache info (L1 cache) */ + case 0x80000005: { + /* cache info (L1 cache/TLB Associativity Field) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } + + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); - *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); + *ecx = encode_cache_cpuid80000005(caches->l1d_cache); + *edx = encode_cache_cpuid80000005(caches->l1i_cache); break; - case 0x80000006: - /* cache info (L2 cache) */ + } + case 0x80000006: { /* cache info (L2 cache/TLB/L3 cache) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *eax = *ebx = 0; + encode_cache_cpuid80000006(caches->l2_cache, + NULL, ecx, edx); + break; + } + + *eax = (X86_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | + *ebx = (X86_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); - encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, + + encode_cache_cpuid80000006(caches->l2_cache, cpu->enable_l3_cache ? - env->cache_info_amd.l3_cache : NULL, + caches->l3_cache : NULL, ecx, edx); break; + } case 0x80000007: *eax = 0; - *ebx = env->features[FEAT_8000_0007_EBX]; + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *ebx = 0; + } else { + *ebx = env->features[FEAT_8000_0007_EBX]; + } *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; @@ -7294,6 +8449,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; + + /* + * Don't emulate Bits [7:0] & Bits [15:12] for Intel/Zhaoxin, since + * they're using 0x1f leaf. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ecx = *edx = 0; + break; + } + if (threads_per_pkg > 1) { /* * Bits 15:12 is "The number of bits in the initial @@ -7329,19 +8495,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, + encode_cache_cpuid8000001d(env->cache_info.l1d_cache, topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, + encode_cache_cpuid8000001d(env->cache_info.l1i_cache, topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, + encode_cache_cpuid8000001d(env->cache_info.l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, + encode_cache_cpuid8000001d(env->cache_info.l3_cache, topo_info, eax, ebx, ecx, edx); break; default: /* end of info */ @@ -7362,6 +8528,22 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0x8000001F: + *eax = *ebx = *ecx = *edx = 0; + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + case 0x80000021: + *eax = *ebx = *ecx = *edx = 0; + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = env->features[FEAT_8000_0021_EBX]; + *ecx = env->features[FEAT_8000_0021_ECX]; + break; case 0x80000022: *eax = *ebx = *ecx = *edx = 0; /* AMD Extended Performance Monitoring and Debug */ @@ -7394,21 +8576,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; - case 0x8000001F: - *eax = *ebx = *ecx = *edx = 0; - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; - *eax |= sev_snp_enabled() ? 0x10 : 0; - *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - case 0x80000021: - *eax = *ebx = *ecx = *edx = 0; - *eax = env->features[FEAT_8000_0021_EAX]; - *ebx = env->features[FEAT_8000_0021_EBX]; - break; default: /* reserved values: zero */ *eax = 0; @@ -7482,7 +8649,11 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) env->idt.limit = 0xffff; env->gdt.limit = 0xffff; +#if defined(CONFIG_USER_ONLY) + env->ldt.limit = 0; +#else env->ldt.limit = 0xffff; +#endif env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT); env->tr.limit = 0xffff; env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT); @@ -7628,7 +8799,7 @@ static void mce_init(X86CPU *cpu) CPUX86State *cenv = &cpu->env; unsigned int bank; - if (((cenv->cpuid_version >> 8) & 0xf) >= 6 + if (x86_cpu_family(cenv->cpuid_version) >= 6 && (cenv->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF | @@ -7756,6 +8927,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) */ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; FeatureWord w; int i; @@ -7775,12 +8947,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - /*TODO: Now cpu->max_features doesn't overwrite features + /* TODO: Now xcc->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->max_features) { + if (xcc->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { /* Override only features that weren't set explicitly * by the user. @@ -7812,6 +8984,13 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } + if (!cpu->pdcm_on_even_without_pmu) { + /* PDCM is fixed1 bit for TDX */ + if (!cpu->enable_pmu && !is_tdx_vm()) { + env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; + } + } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { FeatureDep *d = &feature_dependencies[i]; if (!(env->features[d->from.index] & d->from.mask)) { @@ -7840,6 +9019,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); @@ -7868,7 +9048,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8040,46 +9220,34 @@ static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu, level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1d_cache->share_level = level; - env->cache_info_amd.l1d_cache->share_level = level; + env->cache_info.l1d_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_cpuid4.l1d_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_amd.l1d_cache->share_level); + env->cache_info.l1d_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1i_cache->share_level = level; - env->cache_info_amd.l1i_cache->share_level = level; + env->cache_info.l1i_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_cpuid4.l1i_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_amd.l1i_cache->share_level); + env->cache_info.l1i_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l2_cache->share_level = level; - env->cache_info_amd.l2_cache->share_level = level; + env->cache_info.l2_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_cpuid4.l2_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_amd.l2_cache->share_level); + env->cache_info.l2_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l3_cache->share_level = level; - env->cache_info_amd.l3_cache->share_level = level; + env->cache_info.l3_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_cpuid4.l3_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_amd.l3_cache->share_level); + env->cache_info.l3_cache->share_level); } if (!machine_check_smp_cache(ms, errp)) { @@ -8103,6 +9271,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) tcg_cflags_set(cs, CF_PCREL); #endif + /* + * x-vendor-cpuid-only and v2 should be initernal only. But + * QEMU doesn't support "internal" property. + */ + if (!cpu->vendor_cpuid_only && cpu->vendor_cpuid_only_v2) { + error_setg(errp, "x-vendor-cpuid-only-v2 property " + "depends on x-vendor-cpuid-only"); + return; + } + if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; @@ -8306,24 +9484,22 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) "CPU model '%s' doesn't support legacy-cache=off", name); return; } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *cache_info; + env->cache_info = *cache_info; } else { /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2; - env->cache_info_cpuid2.l3_cache = &legacy_l3_cache; + if (!cpu->consistent_cache) { + env->enable_legacy_cpuid2_cache = true; + } - env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid4.l2_cache = &legacy_l2_cache; - env->cache_info_cpuid4.l3_cache = &legacy_l3_cache; + if (!cpu->vendor_cpuid_only_v2) { + env->enable_legacy_vendor_cache = true; + } - env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd; - env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd; - env->cache_info_amd.l2_cache = &legacy_l2_cache_amd; - env->cache_info_amd.l3_cache = &legacy_l3_cache; + if (IS_AMD_CPU(env)) { + env->cache_info = legacy_amd_cache_info; + } else { + env->cache_info = legacy_intel_cache_info; + } } #ifndef CONFIG_USER_ONLY @@ -8482,6 +9658,16 @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, static void x86_cpu_post_initfn(Object *obj) { +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif +} + +static void x86_cpu_init_xsave(void) +{ static bool first = true; uint64_t supported_xcr0; int i; @@ -8501,8 +9687,6 @@ static void x86_cpu_post_initfn(Object *obj) } } } - - accel_cpu_instance_init(CPU(obj)); } static void x86_cpu_init_default_topo(X86CPU *cpu) @@ -8571,6 +9755,13 @@ static void x86_cpu_initfn(Object *obj) if (xcc->model) { x86_cpu_load_model(cpu, xcc->model); } + + /* + * accel's cpu_instance_init may have the xsave check, + * so x86_ext_save_areas[] must be initialized before this. + */ + x86_cpu_init_xsave(); + accel_cpu_instance_init(CPU(obj)); } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -8651,39 +9842,6 @@ static bool x86_cpu_has_work(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -int x86_mmu_index_pl(CPUX86State *env, unsigned pl) -{ - int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; - int mmu_index_base = - pl == 3 ? MMU_USER64_IDX : - !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : - (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; - - return mmu_index_base + mmu_index_32; -} - -static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - CPUX86State *env = cpu_env(cs); - return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK); -} - -static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl) -{ - int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; - int mmu_index_base = - !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : - (pl < 3 && (env->eflags & AC_MASK) - ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX); - - return mmu_index_base + mmu_index_32; -} - -int cpu_mmu_index_kernel(CPUX86State *env) -{ - return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK); -} - static void x86_disas_set_info(CPUState *cs, disassemble_info *info) { X86CPU *cpu = X86_CPU(cs); @@ -8850,6 +10008,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only-v2", X86CPU, vendor_cpuid_only_v2, true), DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), @@ -8864,6 +10023,7 @@ static const Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("x-consistent-cache", X86CPU, consistent_cache, true), DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false), DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), @@ -8885,6 +10045,12 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), + DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false), + + DEFINE_PROP_BOOL("x-arch-cap-always-on", X86CPU, + arch_cap_always_on, false), + DEFINE_PROP_BOOL("x-pdcm-on-even-without-pmu", X86CPU, + pdcm_on_even_without_pmu, false), }; #ifndef CONFIG_USER_ONLY @@ -8905,7 +10071,7 @@ static const struct SysemuCPUOps i386_sysemu_ops = { }; #endif -static void x86_cpu_common_class_init(ObjectClass *oc, void *data) +static void x86_cpu_common_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -8924,8 +10090,8 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) cc->reset_dump_flags = CPU_DUMP_FPU | CPU_DUMP_CCOP; cc->class_by_name = x86_cpu_class_by_name; + cc->list_cpus = x86_cpu_list; cc->parse_features = x86_cpu_parse_featurestr; - cc->mmu_index = x86_cpu_mmu_index; cc->dump_state = x86_cpu_dump_state; cc->set_pc = x86_cpu_set_pc; cc->get_pc = x86_cpu_get_pc; @@ -8936,6 +10102,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) #ifndef CONFIG_USER_ONLY cc->sysemu_ops = &i386_sysemu_ops; #endif /* !CONFIG_USER_ONLY */ +#ifdef CONFIG_TCG + cc->tcg_ops = &x86_tcg_ops; +#endif /* CONFIG_TCG */ cc->gdb_arch_name = x86_gdb_arch_name; #ifdef TARGET_X86_64 @@ -9002,7 +10171,7 @@ static const TypeInfo x86_cpu_type_info = { }; /* "base" CPU model, used by query-cpu-model-expansion */ -static void x86_cpu_base_class_init(ObjectClass *oc, void *data) +static void x86_cpu_base_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 76f2444..ce94886 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -23,7 +23,9 @@ #include "system/tcg.h" #include "cpu-qom.h" #include "kvm/hyperv-proto.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "exec/memop.h" #include "hw/i386/topology.h" #include "qapi/qapi-types-common.h" @@ -33,12 +35,6 @@ #define XEN_NR_VIRQS 24 -#define KVM_HAVE_MCE_INJECTION 1 - -/* support for self modifying code even if the modified instruction is - close to the modifying instruction */ -#define TARGET_HAS_PRECISE_SMC - #ifdef TARGET_X86_64 #define I386_ELF_MACHINE EM_X86_64 #define ELF_MACHINE_UNAME "x86_64" @@ -439,9 +435,11 @@ typedef enum X86Seg { #define MSR_SMI_COUNT 0x34 #define MSR_CORE_THREAD_COUNT 0x35 #define MSR_MTRRcap 0xfe +#define MSR_MTRR_MEM_TYPE_WB 0x06 #define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) #define MSR_MTRRcap_WC_SUPPORTED (1 << 10) +#define MSR_MTRR_ENABLE (1 << 11) #define MSR_IA32_SYSENTER_CS 0x174 #define MSR_IA32_SYSENTER_ESP 0x175 @@ -588,6 +586,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -601,6 +600,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -623,6 +623,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -638,6 +643,7 @@ typedef enum FeatureWord { FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_8000_0021_EBX, /* CPUID[8000_0021].EBX */ + FEAT_8000_0021_ECX, /* CPUID[8000_0021].ECX */ FEAT_8000_0022_EAX, /* CPUID[8000_0022].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ @@ -665,12 +671,22 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); @@ -903,6 +919,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -924,6 +942,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ @@ -961,6 +981,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ @@ -982,6 +1004,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Linear Address Masking */ #define CPUID_7_1_EAX_LAM (1U << 26) +/* The immediate form of MSR access instructions */ +#define CPUID_7_1_ECX_MSR_IMM (1U << 5) + /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ @@ -1005,6 +1030,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_2_EDX_DDPD_U (1U << 3) /* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ #define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) @@ -1074,12 +1100,18 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Memory form of VERW mitigates TSA */ +#define CPUID_8000_0021_EAX_VERW_CLEAR (1U << 5) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ @@ -1097,6 +1129,11 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); */ #define CPUID_8000_0021_EBX_RAPSIZE (8U << 16) +/* CPU is not vulnerable TSA SA-SQ attack */ +#define CPUID_8000_0021_ECX_TSA_SQ_NO (1U << 1) +/* CPU is not vulnerable TSA SA-L1 attack */ +#define CPUID_8000_0021_ECX_TSA_L1_NO (1U << 2) + /* Performance Monitoring Version 2 */ #define CPUID_8000_0022_EAX_PERFMON_V2 (1U << 0) @@ -1104,6 +1141,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) @@ -1131,7 +1169,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* PMM enabled */ #define CPUID_C000_0001_EDX_PMM_EN (1U << 13) -#define CPUID_VENDOR_SZ 12 +#define CPUID_VENDOR_SZ 12 +#define CPUID_MODEL_ID_SZ 48 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ @@ -1610,8 +1649,6 @@ typedef struct { #define MAX_FIXED_COUNTERS 3 #define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0) -#define TARGET_INSN_START_EXTRA_WORDS 1 - #define NB_OPMASK_REGS 8 /* CPU can't have 0xFFFFFFFF APIC ID, use that value to distinguish @@ -1747,12 +1784,6 @@ typedef enum TPRAccess { /* Cache information data structures: */ -enum CacheType { - DATA_CACHE, - INSTRUCTION_CACHE, - UNIFIED_CACHE -}; - typedef struct CPUCacheInfo { enum CacheType type; uint8_t level; @@ -1811,11 +1842,6 @@ typedef struct CPUCaches { CPUCacheInfo *l3_cache; } CPUCaches; -typedef struct HVFX86LazyFlags { - target_ulong result; - target_ulong auxbits; -} HVFX86LazyFlags; - typedef struct CPUArchState { /* standard registers */ target_ulong regs[CPU_NB_REGS]; @@ -2057,11 +2083,14 @@ typedef struct CPUArchState { /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; - /* Cache information for CPUID. When legacy-cache=on, the cache data + /* + * Cache information for CPUID. When legacy-cache=on, the cache data * on each CPUID leaf will be different, because we keep compatibility * with old QEMU versions. */ - CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd; + CPUCaches cache_info; + bool enable_legacy_cpuid2_cache; + bool enable_legacy_vendor_cache; /* MTRRs */ uint64_t mtrr_fixed[11]; @@ -2107,9 +2136,8 @@ typedef struct CPUArchState { QEMUTimer *xen_periodic_timer; QemuMutex xen_timers_lock; #endif -#if defined(CONFIG_HVF) - HVFX86LazyFlags hvf_lflags; - void *hvf_mmio_buf; +#if defined(CONFIG_HVF) || defined(CONFIG_MSHV) + void *emu_mmio_buf; #endif uint64_t mcg_cap; @@ -2182,7 +2210,6 @@ struct ArchCPU { bool expose_tcg; bool migratable; bool migrate_smi_count; - bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to @@ -2204,6 +2231,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2242,6 +2272,13 @@ struct ArchCPU { */ bool legacy_cache; + /* + * Compatibility bits for old machine types. + * If true, use the same cache model in CPUID leaf 0x2 + * and 0x4. + */ + bool consistent_cache; + /* Compatibility bits for old machine types. * If true decode the CPUID Function 0x8000001E_ECX to support multiple * nodes per processor @@ -2251,12 +2288,24 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool force_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; - /* Only advertise CPUID leaves defined by the vendor */ + /* + * Compatibility bits for old machine types (PC machine v6.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ bool vendor_cpuid_only; + /* + * Compatibility bits for old machine types (PC machine v10.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ + bool vendor_cpuid_only_v2; + /* Only advertise TOPOEXT features that AMD defines */ bool amd_topoext_features_only; @@ -2275,6 +2324,18 @@ struct ArchCPU { /* Forcefully disable KVM PV features not exposed in guest CPUIDs */ bool kvm_pv_enforce_cpuid; + /* + * Expose arch-capabilities unconditionally even on AMD models, for backwards + * compatibility with QEMU <10.1. + */ + bool arch_cap_always_on; + + /* + * Backwards compatibility with QEMU <10.1. The PDCM feature is now disabled when + * PMU is not available, but prior to 10.1 it was enabled even if PMU is off. + */ + bool pdcm_on_even_without_pmu; + /* Number of physical address bits supported */ uint32_t phys_bits; @@ -2329,6 +2390,7 @@ struct X86CPUClass { */ const X86CPUModel *model; + bool max_features; /* Enable all supported features automatically */ bool host_cpuid_required; int ordering; bool migration_safe; @@ -2367,7 +2429,6 @@ int x86_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void x86_cpu_gdb_init(CPUState *cs); -void x86_cpu_list(void); int cpu_x86_support_mca_broadcast(CPUX86State *env); #ifndef CONFIG_USER_ONLY @@ -2398,7 +2459,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, SegmentCache *sc; unsigned int new_hflags; - sc = &env->segs[seg_reg]; + if (seg_reg == R_LDTR) { + sc = &env->ldt; + } else if (seg_reg == R_TR) { + sc = &env->tr; + } else { + sc = &env->segs[seg_reg]; + } + sc->selector = selector; sc->base = base; sc->limit = limit; @@ -2512,11 +2580,27 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); + +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->force_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); void cpu_sync_avx_hflag(CPUX86State *env); +typedef enum X86ASIdx { + X86ASIdx_MEM = 0, + X86ASIdx_SMM = 1, +} X86ASIdx; + #ifndef CONFIG_USER_ONLY static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs) { @@ -2561,8 +2645,6 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("qemu32") #endif -#define cpu_list x86_cpu_list - /* MMU modes definitions */ #define MMU_KSMAP64_IDX 0 #define MMU_KSMAP32_IDX 1 @@ -2597,35 +2679,17 @@ static inline bool is_mmu_index_32(int mmu_index) return mmu_index & 1; } -int x86_mmu_index_pl(CPUX86State *env, unsigned pl); -int cpu_mmu_index_kernel(CPUX86State *env); - #define CC_DST (env->cc_dst) #define CC_SRC (env->cc_src) #define CC_SRC2 (env->cc_src2) #define CC_OP (env->cc_op) -#include "exec/cpu-all.h" #include "svm.h" #if !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif -static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *flags = env->hflags | - (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); - if (env->hflags & HF_CS64_MASK) { - *cs_base = 0; - *pc = env->eip; - } else { - *cs_base = env->segs[R_CS].base; - *pc = (uint32_t)(*cs_base + env->eip); - } -} - void do_cpu_init(X86CPU *cpu); #define MCE_INJECT_BROADCAST 1 @@ -2660,6 +2724,36 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env) } } +static inline uint32_t x86_cpu_family(uint32_t eax) +{ + uint32_t family = (eax >> 8) & 0xf; + + if (family == 0xf) { + family += (eax >> 20) & 0xff; + } + + return family; +} + +static inline uint32_t x86_cpu_model(uint32_t eax) +{ + uint32_t family, model; + + family = x86_cpu_family(eax); + model = (eax >> 4) & 0xf; + + if (family >= 0x6) { + model += ((eax >> 16) & 0xf) << 4; + } + + return model; +} + +static inline uint32_t x86_cpu_stepping(uint32_t eax) +{ + return eax & 0xf; +} + static inline bool cpu_has_vmx(CPUX86State *env) { return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; @@ -2843,4 +2937,29 @@ static inline bool ctl_has_irq(CPUX86State *env) # define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20) #endif +/* majority(NOT a, b, c) = (a ^ b) ? b : c */ +#define MAJ_INV1(a, b, c) ((((a) ^ (b)) & ((b) ^ (c))) ^ (c)) + +/* + * ADD_COUT_VEC(x, y) = majority((x + y) ^ x ^ y, x, y) + * + * If two corresponding bits in x and y are the same, that's the carry + * independent of the value (x+y)^x^y. Hence x^y can be replaced with + * 1 in (x+y)^x^y, resulting in majority(NOT (x+y), x, y) + */ +#define ADD_COUT_VEC(op1, op2, result) \ + MAJ_INV1(result, op1, op2) + +/* + * SUB_COUT_VEC(x, y) = NOT majority(x, NOT y, (x - y) ^ x ^ NOT y) + * = majority(NOT x, y, (x - y) ^ x ^ y) + * + * Note that the carry out is actually a borrow, i.e. it is inverted. + * If two corresponding bits in x and y are different, the value of the + * bit in (x-y)^x^y likewise does not matter. Hence, x^y can be replaced + * with 0 in (x-y)^x^y, resulting in majority(NOT x, y, x-y) + */ +#define SUB_COUT_VEC(op1, op2, result) \ + MAJ_INV1(op1, op2, result) + #endif /* I386_CPU_H */ diff --git a/target/i386/emulate/meson.build b/target/i386/emulate/meson.build new file mode 100644 index 0000000..b6dafb6 --- /dev/null +++ b/target/i386/emulate/meson.build @@ -0,0 +1,8 @@ +emulator_files = files( + 'x86_decode.c', + 'x86_emu.c', + 'x86_flags.c', +) + +i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: emulator_files) +i386_system_ss.add(when: 'CONFIG_MSHV', if_true: emulator_files) diff --git a/target/i386/emulate/panic.h b/target/i386/emulate/panic.h new file mode 100644 index 0000000..71c2487 --- /dev/null +++ b/target/i386/emulate/panic.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2016 Veertu Inc, + * Copyright (C) 2017 Google Inc, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef X86_EMU_PANIC_H +#define X86_EMU_PANIC_H + +#define VM_PANIC(x) {\ + printf("%s\n", x); \ + abort(); \ +} + +#define VM_PANIC_ON(x) {\ + if (x) { \ + printf("%s\n", #x); \ + abort(); \ + } \ +} + +#define VM_PANIC_EX(...) {\ + printf(__VA_ARGS__); \ + abort(); \ +} + +#define VM_PANIC_ON_EX(x, ...) {\ + if (x) { \ + printf(__VA_ARGS__); \ + abort(); \ + } \ +} + +#endif diff --git a/target/i386/hvf/x86.h b/target/i386/emulate/x86.h index 063cd0b..73edccf 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/emulate/x86.h @@ -16,8 +16,8 @@ * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HVF_X86_H -#define HVF_X86_H +#ifndef X86_EMU_DEFS_H +#define X86_EMU_DEFS_H typedef struct x86_register { union { diff --git a/target/i386/hvf/x86_decode.c b/target/i386/emulate/x86_decode.c index 5fea2dd..97bd6f1 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/emulate/x86_decode.c @@ -20,15 +20,13 @@ #include "panic.h" #include "x86_decode.h" -#include "vmx.h" -#include "x86_mmu.h" -#include "x86_descr.h" +#include "x86_emu.h" #define OPCODE_ESCAPE 0xf static void decode_invalid(CPUX86State *env, struct x86_decode *decode) { - printf("%llx: failed to decode instruction ", env->eip); + printf(TARGET_FMT_lx ": failed to decode instruction ", env->eip); for (int i = 0; i < decode->opcode_len; i++) { printf("%x ", decode->opcode[i]); } @@ -73,10 +71,16 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode, VM_PANIC_EX("%s invalid size %d\n", __func__, size); break; } - target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len; - vmx_read_mem(env_cpu(env), &val, va, size); + + /* copy the bytes from the instruction stream, if available */ + if (decode->stream && decode->len + size <= decode->stream->len) { + memcpy(&val, decode->stream->bytes + decode->len, size); + } else { + target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len; + emul_ops->fetch_instruction(env_cpu(env), &val, va, size); + } decode->len += size; - + return val; } @@ -111,8 +115,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,8 +125,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, op->type = X86_VAR_REG; op->reg = R_EAX; /* Since reg is always AX, REX prefix has no impact. */ - op->ptr = get_reg_ref(env, op->reg, false, 0, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, false, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -264,16 +268,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -289,16 +293,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -379,16 +383,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -396,15 +400,15 @@ static void fetch_moffs(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op) { op->type = X86_VAR_OFFSET; - op->ptr = decode_bytes(env, decode, decode->addressing_size); + op->addr = decode_bytes(env, decode, decode->addressing_size); } static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -413,8 +417,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, - decode->operand_size); + op->regptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -431,7 +435,6 @@ struct decode_tbl { void (*decode_op4)(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op4); void (*decode_postfix)(CPUX86State *env, struct x86_decode *decode); - uint32_t flags_mask; }; struct decode_x87_tbl { @@ -447,7 +450,6 @@ struct decode_x87_tbl { void (*decode_op2)(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op2); void (*decode_postfix)(CPUX86State *env, struct x86_decode *decode); - uint32_t flags_mask; }; struct decode_tbl invl_inst = {0x0, 0, 0, false, NULL, NULL, NULL, NULL, @@ -472,7 +474,6 @@ static void decode_x87_ins(CPUX86State *env, struct x86_decode *decode) if (decoder->operand_size) { decode->operand_size = decoder->operand_size; } - decode->flags_mask = decoder->flags_mask; decode->fpop_stack = decoder->pop; decode->frev = decoder->rev; @@ -505,9 +506,6 @@ static void decode_ffgroup(CPUX86State *env, struct x86_decode *decode) X86_DECODE_CMD_INVL }; decode->cmd = group[decode->modrm.reg]; - if (decode->modrm.reg > 2) { - decode->flags_mask = 0; - } } static void decode_sldtgroup(CPUX86State *env, struct x86_decode *decode) @@ -639,8 +637,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, - decode->rex.b, decode->operand_size); + decode->op[0].regptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -695,941 +693,927 @@ static void decode_db_4(CPUX86State *env, struct x86_decode *decode) } -#define RFLAGS_MASK_NONE 0 -#define RFLAGS_MASK_OSZAPC (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C) -#define RFLAGS_MASK_LAHF (CC_S | CC_Z | CC_A | CC_P | CC_C) -#define RFLAGS_MASK_CF (CC_C) -#define RFLAGS_MASK_IF (IF_MASK) -#define RFLAGS_MASK_TF (TF_MASK) -#define RFLAGS_MASK_DF (DF_MASK) -#define RFLAGS_MASK_ZF (CC_Z) - struct decode_tbl _1op_inst[] = { {0x0, X86_DECODE_CMD_ADD, 1, true, decode_modrm_rm, decode_modrm_reg, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0x1, X86_DECODE_CMD_ADD, 0, true, decode_modrm_rm, decode_modrm_reg, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0x2, X86_DECODE_CMD_ADD, 1, true, decode_modrm_reg, decode_modrm_rm, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0x3, X86_DECODE_CMD_ADD, 0, true, decode_modrm_reg, decode_modrm_rm, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0x4, X86_DECODE_CMD_ADD, 1, false, decode_rax, decode_imm8, NULL, NULL, - NULL, RFLAGS_MASK_OSZAPC}, + NULL}, {0x5, X86_DECODE_CMD_ADD, 0, false, decode_rax, decode_imm, NULL, NULL, - NULL, RFLAGS_MASK_OSZAPC}, + NULL}, {0x6, X86_DECODE_CMD_PUSH_SEG, 0, false, false, NULL, NULL, NULL, - decode_pushseg, RFLAGS_MASK_NONE}, + decode_pushseg}, {0x7, X86_DECODE_CMD_POP_SEG, 0, false, false, NULL, NULL, NULL, - decode_popseg, RFLAGS_MASK_NONE}, + decode_popseg}, {0x8, X86_DECODE_CMD_OR, 1, true, decode_modrm_rm, decode_modrm_reg, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0x9, X86_DECODE_CMD_OR, 0, true, decode_modrm_rm, decode_modrm_reg, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0xa, X86_DECODE_CMD_OR, 1, true, decode_modrm_reg, decode_modrm_rm, NULL, - NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL}, {0xb, X86_DECODE_CMD_OR, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xc, X86_DECODE_CMD_OR, 1, false, decode_rax, decode_imm8, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xd, X86_DECODE_CMD_OR, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xe, X86_DECODE_CMD_PUSH_SEG, 0, false, false, - NULL, NULL, NULL, decode_pushseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_pushseg}, {0xf, X86_DECODE_CMD_POP_SEG, 0, false, false, - NULL, NULL, NULL, decode_popseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_popseg}, {0x10, X86_DECODE_CMD_ADC, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x11, X86_DECODE_CMD_ADC, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x12, X86_DECODE_CMD_ADC, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x13, X86_DECODE_CMD_ADC, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x14, X86_DECODE_CMD_ADC, 1, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x15, X86_DECODE_CMD_ADC, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x16, X86_DECODE_CMD_PUSH_SEG, 0, false, false, - NULL, NULL, NULL, decode_pushseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_pushseg}, {0x17, X86_DECODE_CMD_POP_SEG, 0, false, false, - NULL, NULL, NULL, decode_popseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_popseg}, {0x18, X86_DECODE_CMD_SBB, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x19, X86_DECODE_CMD_SBB, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x1a, X86_DECODE_CMD_SBB, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x1b, X86_DECODE_CMD_SBB, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x1c, X86_DECODE_CMD_SBB, 1, false, decode_rax, decode_imm8, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x1d, X86_DECODE_CMD_SBB, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x1e, X86_DECODE_CMD_PUSH_SEG, 0, false, false, - NULL, NULL, NULL, decode_pushseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_pushseg}, {0x1f, X86_DECODE_CMD_POP_SEG, 0, false, false, - NULL, NULL, NULL, decode_popseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_popseg}, {0x20, X86_DECODE_CMD_AND, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x21, X86_DECODE_CMD_AND, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x22, X86_DECODE_CMD_AND, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x23, X86_DECODE_CMD_AND, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x24, X86_DECODE_CMD_AND, 1, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x25, X86_DECODE_CMD_AND, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x28, X86_DECODE_CMD_SUB, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x29, X86_DECODE_CMD_SUB, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x2a, X86_DECODE_CMD_SUB, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x2b, X86_DECODE_CMD_SUB, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x2c, X86_DECODE_CMD_SUB, 1, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x2d, X86_DECODE_CMD_SUB, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x2f, X86_DECODE_CMD_DAS, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, NULL}, {0x30, X86_DECODE_CMD_XOR, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x31, X86_DECODE_CMD_XOR, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x32, X86_DECODE_CMD_XOR, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x33, X86_DECODE_CMD_XOR, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x34, X86_DECODE_CMD_XOR, 1, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x35, X86_DECODE_CMD_XOR, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x38, X86_DECODE_CMD_CMP, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x39, X86_DECODE_CMD_CMP, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x3a, X86_DECODE_CMD_CMP, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x3b, X86_DECODE_CMD_CMP, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x3c, X86_DECODE_CMD_CMP, 1, false, decode_rax, decode_imm8, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x3d, X86_DECODE_CMD_CMP, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x3f, X86_DECODE_CMD_AAS, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, NULL}, {0x40, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x41, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x42, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x43, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x44, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x45, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x46, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x47, X86_DECODE_CMD_INC, 0, false, - NULL, NULL, NULL, NULL, decode_incgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_incgroup}, {0x48, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x49, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4a, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4b, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4c, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4d, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4e, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x4f, X86_DECODE_CMD_DEC, 0, false, - NULL, NULL, NULL, NULL, decode_decgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_decgroup}, {0x50, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x51, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x52, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x53, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x54, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x55, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x56, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x57, X86_DECODE_CMD_PUSH, 0, false, - NULL, NULL, NULL, NULL, decode_pushgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_pushgroup}, {0x58, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x59, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5a, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5b, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5c, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5d, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5e, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x5f, X86_DECODE_CMD_POP, 0, false, - NULL, NULL, NULL, NULL, decode_popgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_popgroup}, {0x60, X86_DECODE_CMD_PUSHA, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x61, X86_DECODE_CMD_POPA, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x68, X86_DECODE_CMD_PUSH, 0, false, decode_imm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x6a, X86_DECODE_CMD_PUSH, 0, false, decode_imm8_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x69, X86_DECODE_CMD_IMUL_3, 0, true, decode_modrm_reg, - decode_modrm_rm, decode_imm, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_modrm_rm, decode_imm, NULL, NULL}, {0x6b, X86_DECODE_CMD_IMUL_3, 0, true, decode_modrm_reg, decode_modrm_rm, - decode_imm8_signed, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_imm8_signed, NULL, NULL}, {0x6c, X86_DECODE_CMD_INS, 1, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x6d, X86_DECODE_CMD_INS, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x6e, X86_DECODE_CMD_OUTS, 1, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x6f, X86_DECODE_CMD_OUTS, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x70, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x71, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x72, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x73, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x74, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x75, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x76, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x77, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x78, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x79, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7a, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7b, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7c, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7d, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7e, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x7f, X86_DECODE_CMD_JXX, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x80, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, decode_imm8, - NULL, NULL, decode_addgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_addgroup}, {0x81, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_imm, - NULL, NULL, decode_addgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_addgroup}, {0x82, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, decode_imm8, - NULL, NULL, decode_addgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_addgroup}, {0x83, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_imm8_signed, - NULL, NULL, decode_addgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_addgroup}, {0x84, X86_DECODE_CMD_TST, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x85, X86_DECODE_CMD_TST, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0x86, X86_DECODE_CMD_XCHG, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x87, X86_DECODE_CMD_XCHG, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x88, X86_DECODE_CMD_MOV, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x89, X86_DECODE_CMD_MOV, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x8a, X86_DECODE_CMD_MOV, 1, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x8b, X86_DECODE_CMD_MOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x8c, X86_DECODE_CMD_MOV_FROM_SEG, 0, true, decode_modrm_rm, - decode_modrm_reg, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_reg, NULL, NULL, NULL}, {0x8d, X86_DECODE_CMD_LEA, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x8e, X86_DECODE_CMD_MOV_TO_SEG, 0, true, decode_modrm_reg, - decode_modrm_rm, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_rm, NULL, NULL, NULL}, {0x8f, X86_DECODE_CMD_POP, 0, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x90, X86_DECODE_CMD_NOP, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x91, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x92, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x93, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x94, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x95, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x96, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x97, X86_DECODE_CMD_XCHG, 0, false, NULL, decode_rax, - NULL, NULL, decode_xchgroup, RFLAGS_MASK_NONE}, + NULL, NULL, decode_xchgroup}, {0x98, X86_DECODE_CMD_CBW, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x99, X86_DECODE_CMD_CWD, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x9a, X86_DECODE_CMD_CALL_FAR, 0, false, NULL, - NULL, NULL, NULL, decode_farjmp, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_farjmp}, {0x9c, X86_DECODE_CMD_PUSHF, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, /*{0x9d, X86_DECODE_CMD_POPF, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_POPF},*/ + NULL, NULL, NULL},*/ {0x9e, X86_DECODE_CMD_SAHF, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x9f, X86_DECODE_CMD_LAHF, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_LAHF}, + NULL, NULL, NULL}, {0xa0, X86_DECODE_CMD_MOV, 1, false, decode_rax, fetch_moffs, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa1, X86_DECODE_CMD_MOV, 0, false, decode_rax, fetch_moffs, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa2, X86_DECODE_CMD_MOV, 1, false, fetch_moffs, decode_rax, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa3, X86_DECODE_CMD_MOV, 0, false, fetch_moffs, decode_rax, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa4, X86_DECODE_CMD_MOVS, 1, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa5, X86_DECODE_CMD_MOVS, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa6, X86_DECODE_CMD_CMPS, 1, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xa7, X86_DECODE_CMD_CMPS, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xaa, X86_DECODE_CMD_STOS, 1, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xab, X86_DECODE_CMD_STOS, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xac, X86_DECODE_CMD_LODS, 1, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xad, X86_DECODE_CMD_LODS, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xae, X86_DECODE_CMD_SCAS, 1, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xaf, X86_DECODE_CMD_SCAS, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xa8, X86_DECODE_CMD_TST, 1, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xa9, X86_DECODE_CMD_TST, 0, false, decode_rax, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xb0, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb1, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb2, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb3, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb4, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb5, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb6, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb7, X86_DECODE_CMD_MOV, 1, false, NULL, - NULL, NULL, NULL, decode_movgroup8, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup8}, {0xb8, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xb9, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xba, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xbb, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xbc, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xbd, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xbe, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xbf, X86_DECODE_CMD_MOV, 0, false, NULL, - NULL, NULL, NULL, decode_movgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_movgroup}, {0xc0, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, decode_imm8, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xc1, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_imm8, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xc2, X86_DECODE_RET_NEAR, 0, false, decode_imm16, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xc3, X86_DECODE_RET_NEAR, 0, false, NULL, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xc4, X86_DECODE_CMD_LES, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xc5, X86_DECODE_CMD_LDS, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xc6, X86_DECODE_CMD_MOV, 1, true, decode_modrm_rm, decode_imm8, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xc7, X86_DECODE_CMD_MOV, 0, true, decode_modrm_rm, decode_imm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xc8, X86_DECODE_CMD_ENTER, 0, false, decode_imm16, decode_imm8, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xc9, X86_DECODE_CMD_LEAVE, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xca, X86_DECODE_RET_FAR, 0, false, decode_imm16, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xcb, X86_DECODE_RET_FAR, 0, false, decode_imm_0, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xcd, X86_DECODE_CMD_INT, 0, false, decode_imm8, NULL, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, /*{0xcf, X86_DECODE_CMD_IRET, 0, false, NULL, NULL, - NULL, NULL, NULL, RFLAGS_MASK_IRET},*/ + NULL, NULL, NULL},*/ {0xd0, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, decode_imm_1, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xd1, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_imm_1, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xd2, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, decode_rcx, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xd3, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_rcx, - NULL, NULL, decode_rotgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_rotgroup}, {0xd4, X86_DECODE_CMD_AAM, 0, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL}, {0xd5, X86_DECODE_CMD_AAD, 0, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL}, {0xd7, X86_DECODE_CMD_XLAT, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xd8, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xd9, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xda, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xdb, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xdc, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xdd, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xde, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xdf, X86_DECODE_CMD_INVL, 0, true, NULL, - NULL, NULL, NULL, decode_x87_ins, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_x87_ins}, {0xe0, X86_DECODE_CMD_LOOP, 0, false, decode_imm8_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe1, X86_DECODE_CMD_LOOP, 0, false, decode_imm8_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe2, X86_DECODE_CMD_LOOP, 0, false, decode_imm8_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe3, X86_DECODE_CMD_JCXZ, 1, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0xe4, X86_DECODE_CMD_IN, 1, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe5, X86_DECODE_CMD_IN, 0, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe6, X86_DECODE_CMD_OUT, 1, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe7, X86_DECODE_CMD_OUT, 0, false, decode_imm8, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe8, X86_DECODE_CMD_CALL_NEAR, 0, false, decode_imm_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xe9, X86_DECODE_CMD_JMP_NEAR, 0, false, decode_imm_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xea, X86_DECODE_CMD_JMP_FAR, 0, false, - NULL, NULL, NULL, NULL, decode_farjmp, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_farjmp}, {0xeb, X86_DECODE_CMD_JMP_NEAR, 1, false, decode_imm8_signed, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xec, X86_DECODE_CMD_IN, 1, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xed, X86_DECODE_CMD_IN, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xee, X86_DECODE_CMD_OUT, 1, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xef, X86_DECODE_CMD_OUT, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xf4, X86_DECODE_CMD_HLT, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xf5, X86_DECODE_CMD_CMC, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_CF}, + NULL, NULL, NULL, NULL, NULL}, {0xf6, X86_DECODE_CMD_INVL, 1, true, - NULL, NULL, NULL, NULL, decode_f7group, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_f7group}, {0xf7, X86_DECODE_CMD_INVL, 0, true, - NULL, NULL, NULL, NULL, decode_f7group, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, NULL, decode_f7group}, {0xf8, X86_DECODE_CMD_CLC, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_CF}, + NULL, NULL, NULL, NULL, NULL}, {0xf9, X86_DECODE_CMD_STC, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_CF}, + NULL, NULL, NULL, NULL, NULL}, {0xfa, X86_DECODE_CMD_CLI, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_IF}, + NULL, NULL, NULL, NULL, NULL}, {0xfb, X86_DECODE_CMD_STI, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_IF}, + NULL, NULL, NULL, NULL, NULL}, {0xfc, X86_DECODE_CMD_CLD, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_DF}, + NULL, NULL, NULL, NULL, NULL}, {0xfd, X86_DECODE_CMD_STD, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_DF}, + NULL, NULL, NULL, NULL, NULL}, {0xfe, X86_DECODE_CMD_INVL, 1, true, decode_modrm_rm, - NULL, NULL, NULL, decode_incgroup2, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, decode_incgroup2}, {0xff, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, - NULL, NULL, NULL, decode_ffgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL, decode_ffgroup}, }; struct decode_tbl _2op_inst[] = { {0x0, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, - NULL, NULL, NULL, decode_sldtgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_sldtgroup}, {0x1, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, - NULL, NULL, NULL, decode_lidtgroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_lidtgroup}, {0x6, X86_DECODE_CMD_CLTS, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_TF}, + NULL, NULL, NULL, NULL, NULL}, {0x9, X86_DECODE_CMD_WBINVD, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x18, X86_DECODE_CMD_PREFETCH, 0, true, - NULL, NULL, NULL, NULL, decode_x87_general, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_x87_general}, {0x1f, X86_DECODE_CMD_NOP, 0, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x20, X86_DECODE_CMD_MOV_FROM_CR, 0, true, decode_modrm_rm, - decode_modrm_reg, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_reg, NULL, NULL, NULL}, {0x21, X86_DECODE_CMD_MOV_FROM_DR, 0, true, decode_modrm_rm, - decode_modrm_reg, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_reg, NULL, NULL, NULL}, {0x22, X86_DECODE_CMD_MOV_TO_CR, 0, true, decode_modrm_reg, - decode_modrm_rm, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_rm, NULL, NULL, NULL}, {0x23, X86_DECODE_CMD_MOV_TO_DR, 0, true, decode_modrm_reg, - decode_modrm_rm, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + decode_modrm_rm, NULL, NULL, NULL}, {0x30, X86_DECODE_CMD_WRMSR, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x31, X86_DECODE_CMD_RDTSC, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x32, X86_DECODE_CMD_RDMSR, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0x40, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x41, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x42, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x43, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x44, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x45, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x46, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x47, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x48, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x49, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4a, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4b, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4c, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4d, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4e, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x4f, X86_DECODE_CMD_CMOV, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0x77, X86_DECODE_CMD_EMMS, 0, false, - NULL, NULL, NULL, NULL, decode_x87_general, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_x87_general}, {0x82, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x83, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x84, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x85, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x86, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x87, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x88, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x89, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8a, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8b, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8c, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8d, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8e, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x8f, X86_DECODE_CMD_JXX, 0, false, - NULL, NULL, NULL, NULL, decode_jxx, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_jxx}, {0x90, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x91, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x92, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x93, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x94, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x95, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x96, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x97, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x98, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x99, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9a, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9b, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9c, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9d, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9e, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0x9f, X86_DECODE_CMD_SETXX, 1, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL}, {0xb0, X86_DECODE_CMD_CMPXCHG, 1, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xb1, X86_DECODE_CMD_CMPXCHG, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xb6, X86_DECODE_CMD_MOVZX, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xb7, X86_DECODE_CMD_MOVZX, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xb8, X86_DECODE_CMD_POPCNT, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xbe, X86_DECODE_CMD_MOVSX, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xbf, X86_DECODE_CMD_MOVSX, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xa0, X86_DECODE_CMD_PUSH_SEG, 0, false, false, - NULL, NULL, NULL, decode_pushseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_pushseg}, {0xa1, X86_DECODE_CMD_POP_SEG, 0, false, false, - NULL, NULL, NULL, decode_popseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_popseg}, {0xa2, X86_DECODE_CMD_CPUID, 0, false, - NULL, NULL, NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, NULL}, {0xa3, X86_DECODE_CMD_BT, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_CF}, + NULL, NULL, NULL}, {0xa4, X86_DECODE_CMD_SHLD, 0, true, decode_modrm_rm, decode_modrm_reg, - decode_imm8, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_imm8, NULL, NULL}, {0xa5, X86_DECODE_CMD_SHLD, 0, true, decode_modrm_rm, decode_modrm_reg, - decode_rcx, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_rcx, NULL, NULL}, {0xa8, X86_DECODE_CMD_PUSH_SEG, 0, false, false, - NULL, NULL, NULL, decode_pushseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_pushseg}, {0xa9, X86_DECODE_CMD_POP_SEG, 0, false, false, - NULL, NULL, NULL, decode_popseg, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_popseg}, {0xab, X86_DECODE_CMD_BTS, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_CF}, + NULL, NULL, NULL}, {0xac, X86_DECODE_CMD_SHRD, 0, true, decode_modrm_rm, decode_modrm_reg, - decode_imm8, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_imm8, NULL, NULL}, {0xad, X86_DECODE_CMD_SHRD, 0, true, decode_modrm_rm, decode_modrm_reg, - decode_rcx, NULL, NULL, RFLAGS_MASK_OSZAPC}, + decode_rcx, NULL, NULL}, {0xae, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, - NULL, NULL, NULL, decode_aegroup, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, decode_aegroup}, {0xaf, X86_DECODE_CMD_IMUL_2, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xb2, X86_DECODE_CMD_LSS, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_NONE}, + NULL, NULL, NULL}, {0xb3, X86_DECODE_CMD_BTR, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xba, X86_DECODE_CMD_INVL, 0, true, decode_modrm_rm, decode_imm8, - NULL, NULL, decode_btgroup, RFLAGS_MASK_OSZAPC}, + NULL, NULL, decode_btgroup}, {0xbb, X86_DECODE_CMD_BTC, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xbc, X86_DECODE_CMD_BSF, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xbd, X86_DECODE_CMD_BSR, 0, true, decode_modrm_reg, decode_modrm_rm, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xc1, X86_DECODE_CMD_XADD, 0, true, decode_modrm_rm, decode_modrm_reg, - NULL, NULL, NULL, RFLAGS_MASK_OSZAPC}, + NULL, NULL, NULL}, {0xc7, X86_DECODE_CMD_CMPXCHG8B, 0, true, decode_modrm_rm, - NULL, NULL, NULL, NULL, RFLAGS_MASK_ZF}, + NULL, NULL, NULL, NULL}, {0xc8, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xc9, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xca, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xcb, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xcc, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xcd, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xce, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, {0xcf, X86_DECODE_CMD_BSWAP, 0, false, - NULL, NULL, NULL, NULL, decode_bswap, RFLAGS_MASK_NONE}, + NULL, NULL, NULL, NULL, decode_bswap}, }; struct decode_x87_tbl invl_inst_x87 = {0x0, 0, 0, 0, 0, false, false, NULL, - NULL, decode_invalid, 0}; + NULL, decode_invalid}; struct decode_x87_tbl _x87_inst[] = { {0xd8, 0, 3, X86_DECODE_CMD_FADD, 10, false, false, - decode_x87_modrm_st0, decode_decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_decode_x87_modrm_st0, NULL}, {0xd8, 0, 0, X86_DECODE_CMD_FADD, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd8, 1, 3, X86_DECODE_CMD_FMUL, 10, false, false, decode_x87_modrm_st0, - decode_decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_decode_x87_modrm_st0, NULL}, {0xd8, 1, 0, X86_DECODE_CMD_FMUL, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd8, 4, 3, X86_DECODE_CMD_FSUB, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xd8, 4, 0, X86_DECODE_CMD_FSUB, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd8, 5, 3, X86_DECODE_CMD_FSUB, 10, true, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xd8, 5, 0, X86_DECODE_CMD_FSUB, 4, true, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd8, 6, 3, X86_DECODE_CMD_FDIV, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xd8, 6, 0, X86_DECODE_CMD_FDIV, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd8, 7, 3, X86_DECODE_CMD_FDIV, 10, true, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xd8, 7, 0, X86_DECODE_CMD_FDIV, 4, true, false, decode_x87_modrm_st0, - decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL}, {0xd9, 0, 3, X86_DECODE_CMD_FLD, 10, false, false, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xd9, 0, 0, X86_DECODE_CMD_FLD, 4, false, false, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xd9, 1, 3, X86_DECODE_CMD_FXCH, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xd9, 1, 0, X86_DECODE_CMD_INVL, 10, false, false, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xd9, 2, 3, X86_DECODE_CMD_INVL, 10, false, false, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xd9, 2, 0, X86_DECODE_CMD_FST, 4, false, false, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xd9, 3, 3, X86_DECODE_CMD_INVL, 10, false, false, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xd9, 3, 0, X86_DECODE_CMD_FST, 4, false, true, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xd9, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, - decode_x87_modrm_st0, NULL, decode_d9_4, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, decode_d9_4}, {0xd9, 4, 0, X86_DECODE_CMD_INVL, 4, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, - {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, + {0xd9, 5, 3, X86_DECODE_CMD_FLDxx, 10, false, false, NULL, NULL, NULL}, {0xd9, 5, 0, X86_DECODE_CMD_FLDCW, 2, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xd9, 7, 3, X86_DECODE_CMD_FNSTCW, 2, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xd9, 7, 0, X86_DECODE_CMD_FNSTCW, 2, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xda, 0, 3, X86_DECODE_CMD_FCMOV, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xda, 0, 0, X86_DECODE_CMD_FADD, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, {0xda, 1, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0, - decode_decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_decode_x87_modrm_st0, NULL}, {0xda, 1, 0, X86_DECODE_CMD_FMUL, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, {0xda, 2, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xda, 3, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, - {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, + {0xda, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 4, 0, X86_DECODE_CMD_FSUB, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, {0xda, 5, 3, X86_DECODE_CMD_FUCOM, 10, false, true, decode_x87_modrm_st0, - decode_decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_decode_x87_modrm_st0, NULL}, {0xda, 5, 0, X86_DECODE_CMD_FSUB, 4, true, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, - {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, + {0xda, 6, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 6, 0, X86_DECODE_CMD_FDIV, 4, false, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, - {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, + {0xda, 7, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xda, 7, 0, X86_DECODE_CMD_FDIV, 4, true, false, decode_x87_modrm_st0, - decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL}, {0xdb, 0, 3, X86_DECODE_CMD_FCMOV, 10, false, false, decode_x87_modrm_st0, - decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL}, {0xdb, 0, 0, X86_DECODE_CMD_FLD, 4, false, false, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdb, 1, 3, X86_DECODE_CMD_FCMOV, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdb, 2, 3, X86_DECODE_CMD_FCMOV, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdb, 2, 0, X86_DECODE_CMD_FST, 4, false, false, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdb, 3, 3, X86_DECODE_CMD_FCMOV, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdb, 3, 0, X86_DECODE_CMD_FST, 4, false, true, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdb, 4, 3, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, - decode_db_4, RFLAGS_MASK_NONE}, - {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL, - RFLAGS_MASK_NONE}, + decode_db_4}, + {0xdb, 4, 0, X86_DECODE_CMD_INVL, 10, false, false, NULL, NULL, NULL}, {0xdb, 5, 3, X86_DECODE_CMD_FUCOMI, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdb, 5, 0, X86_DECODE_CMD_FLD, 10, false, false, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xdb, 7, 0, X86_DECODE_CMD_FST, 10, false, true, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xdc, 0, 3, X86_DECODE_CMD_FADD, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 0, 0, X86_DECODE_CMD_FADD, 8, false, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdc, 1, 3, X86_DECODE_CMD_FMUL, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 1, 0, X86_DECODE_CMD_FMUL, 8, false, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdc, 4, 3, X86_DECODE_CMD_FSUB, 10, true, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 4, 0, X86_DECODE_CMD_FSUB, 8, false, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdc, 5, 3, X86_DECODE_CMD_FSUB, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 5, 0, X86_DECODE_CMD_FSUB, 8, true, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdc, 6, 3, X86_DECODE_CMD_FDIV, 10, true, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 6, 0, X86_DECODE_CMD_FDIV, 8, false, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdc, 7, 3, X86_DECODE_CMD_FDIV, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdc, 7, 0, X86_DECODE_CMD_FDIV, 8, true, false, - decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_floatp, NULL}, {0xdd, 0, 0, X86_DECODE_CMD_FLD, 8, false, false, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xdd, 1, 3, X86_DECODE_CMD_FXCH, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdd, 2, 3, X86_DECODE_CMD_FST, 10, false, false, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xdd, 2, 0, X86_DECODE_CMD_FST, 8, false, false, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xdd, 3, 3, X86_DECODE_CMD_FST, 10, false, true, - decode_x87_modrm_st0, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, NULL, NULL}, {0xdd, 3, 0, X86_DECODE_CMD_FST, 8, false, true, - decode_x87_modrm_floatp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_floatp, NULL, NULL}, {0xdd, 4, 3, X86_DECODE_CMD_FUCOM, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdd, 4, 0, X86_DECODE_CMD_FRSTOR, 8, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xdd, 5, 3, X86_DECODE_CMD_FUCOM, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdd, 7, 0, X86_DECODE_CMD_FNSTSW, 0, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xdd, 7, 3, X86_DECODE_CMD_FNSTSW, 0, false, false, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xde, 0, 3, X86_DECODE_CMD_FADD, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 0, 0, X86_DECODE_CMD_FADD, 2, false, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xde, 1, 3, X86_DECODE_CMD_FMUL, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 1, 0, X86_DECODE_CMD_FMUL, 2, false, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xde, 4, 3, X86_DECODE_CMD_FSUB, 10, true, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 4, 0, X86_DECODE_CMD_FSUB, 2, false, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xde, 5, 3, X86_DECODE_CMD_FSUB, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 5, 0, X86_DECODE_CMD_FSUB, 2, true, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xde, 6, 3, X86_DECODE_CMD_FDIV, 10, true, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 6, 0, X86_DECODE_CMD_FDIV, 2, false, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xde, 7, 3, X86_DECODE_CMD_FDIV, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xde, 7, 0, X86_DECODE_CMD_FDIV, 2, true, false, - decode_x87_modrm_st0, decode_x87_modrm_intp, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_intp, NULL}, {0xdf, 0, 0, X86_DECODE_CMD_FLD, 2, false, false, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdf, 1, 3, X86_DECODE_CMD_FXCH, 10, false, false, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdf, 2, 3, X86_DECODE_CMD_FST, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdf, 2, 0, X86_DECODE_CMD_FST, 2, false, false, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdf, 3, 3, X86_DECODE_CMD_FST, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdf, 3, 0, X86_DECODE_CMD_FST, 2, false, true, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdf, 4, 3, X86_DECODE_CMD_FNSTSW, 2, false, true, - decode_x87_modrm_bytep, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_bytep, NULL, NULL}, {0xdf, 5, 3, X86_DECODE_CMD_FUCOMI, 10, false, true, - decode_x87_modrm_st0, decode_x87_modrm_st0, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_st0, decode_x87_modrm_st0, NULL}, {0xdf, 5, 0, X86_DECODE_CMD_FLD, 8, false, false, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, {0xdf, 7, 0, X86_DECODE_CMD_FST, 8, false, true, - decode_x87_modrm_intp, NULL, NULL, RFLAGS_MASK_NONE}, + decode_x87_modrm_intp, NULL, NULL}, }; void calc_modrm_operand16(CPUX86State *env, struct x86_decode *decode, @@ -1678,16 +1662,16 @@ void calc_modrm_operand16(CPUX86State *env, struct x86_decode *decode, } calc_addr: if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = (uint16_t)ptr; + op->addr = (uint16_t)ptr; } else { - op->ptr = decode_linear_addr(env, decode, (uint16_t)ptr, seg); + op->addr = decode_linear_addr(env, decode, (uint16_t)ptr, seg); } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, +void *get_reg_ref(CPUX86State *env, int reg, int rex_present, int is_extended, int size) { - target_ulong ptr = 0; + void *ptr = NULL; if (is_extended) { reg |= R_R8; @@ -1696,13 +1680,13 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, switch (size) { case 1: if (is_extended || reg < 4 || rex_present) { - ptr = (target_ulong)&RL(env, reg); + ptr = &RL(env, reg); } else { - ptr = (target_ulong)&RH(env, reg - 4); + ptr = &RH(env, reg - 4); } break; default: - ptr = (target_ulong)&RRX(env, reg); + ptr = &RRX(env, reg); break; } return ptr; @@ -1713,7 +1697,7 @@ target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, { target_ulong val = 0; memcpy(&val, - (void *)get_reg_ref(env, reg, rex_present, is_extended, size), + get_reg_ref(env, reg, rex_present, is_extended, size), size); return val; } @@ -1780,9 +1764,9 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, } if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = (uint32_t)ptr; + op->addr = (uint32_t)ptr; } else { - op->ptr = decode_linear_addr(env, decode, (uint32_t)ptr, seg); + op->addr = decode_linear_addr(env, decode, (uint32_t)ptr, seg); } } @@ -1810,9 +1794,9 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } if (X86_DECODE_CMD_LEA == decode->cmd) { - op->ptr = ptr; + op->addr = ptr; } else { - op->ptr = decode_linear_addr(env, decode, ptr, seg); + op->addr = decode_linear_addr(env, decode, ptr, seg); } } @@ -1823,8 +1807,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, - decode->rex.b, decode->operand_size); + op->regptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } @@ -1893,16 +1877,6 @@ static void decode_prefix(CPUX86State *env, struct x86_decode *decode) } } -static struct x86_segment_descriptor get_cs_descriptor(CPUState *s) -{ - struct vmx_segment vmx_cs; - x86_segment_descriptor cs; - vmx_read_segment_descriptor(s, &vmx_cs, R_CS); - vmx_segment_to_x86_descriptor(s, &vmx_cs, &cs); - - return cs; -} - void set_addressing_size(CPUX86State *env, struct x86_decode *decode) { decode->addressing_size = -1; @@ -1914,7 +1888,8 @@ void set_addressing_size(CPUX86State *env, struct x86_decode *decode) } } else if (!x86_is_long_mode(env_cpu(env))) { /* protected */ - x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env)); + x86_segment_descriptor cs; + emul_ops->read_segment_descriptor(env_cpu(env), &cs, R_CS); /* check db */ if (cs.db) { if (decode->addr_size_override) { @@ -1950,7 +1925,8 @@ void set_operand_size(CPUX86State *env, struct x86_decode *decode) } } else if (!x86_is_long_mode(env_cpu(env))) { /* protected */ - x86_segment_descriptor cs = get_cs_descriptor(env_cpu(env)); + x86_segment_descriptor cs; + emul_ops->read_segment_descriptor(env_cpu(env), &cs, R_CS); /* check db */ if (cs.db) { if (decode->op_size_override) { @@ -2055,7 +2031,6 @@ static inline void decode_opcode_general(CPUX86State *env, if (inst_decoder->operand_size) { decode->operand_size = inst_decoder->operand_size; } - decode->flags_mask = inst_decoder->flags_mask; if (inst_decoder->is_modrm) { decode_modrm(env, decode); @@ -2107,9 +2082,10 @@ static void decode_opcodes(CPUX86State *env, struct x86_decode *decode) } } -uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) +static uint32_t decode_opcode(CPUX86State *env, struct x86_decode *decode) { memset(decode, 0, sizeof(*decode)); + decode_prefix(env, decode); set_addressing_size(env, decode); set_operand_size(env, decode); @@ -2119,6 +2095,18 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) return decode->len; } +uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) +{ + return decode_opcode(env, decode); +} + +uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode, + struct x86_insn_stream *stream) +{ + decode->stream = stream; + return decode_opcode(env, decode); +} + void init_decoder(void) { int i; diff --git a/target/i386/hvf/x86_decode.h b/target/i386/emulate/x86_decode.h index a2d7a2a..1cadf36 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/emulate/x86_decode.h @@ -15,8 +15,8 @@ * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HVF_X86_DECODE_H -#define HVF_X86_DECODE_H +#ifndef X86_EMU_DECODE_H +#define X86_EMU_DECODE_H #include "cpu.h" #include "x86.h" @@ -266,9 +266,17 @@ typedef struct x86_decode_op { int reg; target_ulong val; - target_ulong ptr; + union { + target_ulong addr; + void *regptr; + }; } x86_decode_op; +typedef struct x86_insn_stream { + const uint8_t *bytes; + size_t len; +} x86_insn_stream; + typedef struct x86_decode { int len; uint8_t opcode[4]; @@ -295,16 +303,18 @@ typedef struct x86_decode { struct x86_modrm modrm; struct x86_decode_op op[4]; bool is_fpu; - uint32_t flags_mask; + x86_insn_stream *stream; } x86_decode; uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); +uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode, + struct x86_insn_stream *stream); -target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, - int is_extended, int size); +void *get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size); target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, int is_extended, int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/hvf/x86_emu.c b/target/i386/emulate/x86_emu.c index ebba80a..4409f7b 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/emulate/x86_emu.c @@ -31,8 +31,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// #include "qemu/osdep.h" @@ -40,11 +40,7 @@ #include "x86_decode.h" #include "x86.h" #include "x86_emu.h" -#include "x86_mmu.h" #include "x86_flags.h" -#include "vmcs.h" -#include "vmx.h" -#include "hvf-i386.h" #define EXEC_2OP_FLAGS_CMD(env, decode, cmd, FLAGS_FUNC, save_res) \ { \ @@ -56,7 +52,7 @@ uint8_t v2 = (uint8_t)decode->op[1].val; \ uint8_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 1); \ + write_val_ext(env, &decode->op[0], diff, 1); \ } \ FLAGS_FUNC##8(env, v1, v2, diff); \ break; \ @@ -67,7 +63,7 @@ uint16_t v2 = (uint16_t)decode->op[1].val; \ uint16_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 2); \ + write_val_ext(env, &decode->op[0], diff, 2); \ } \ FLAGS_FUNC##16(env, v1, v2, diff); \ break; \ @@ -78,7 +74,7 @@ uint32_t v2 = (uint32_t)decode->op[1].val; \ uint32_t diff = v1 cmd v2; \ if (save_res) { \ - write_val_ext(env, decode->op[0].ptr, diff, 4); \ + write_val_ext(env, &decode->op[0], diff, 4); \ } \ FLAGS_FUNC##32(env, v1, v2, diff); \ break; \ @@ -125,7 +121,7 @@ void write_reg(CPUX86State *env, int reg, target_ulong val, int size) } } -target_ulong read_val_from_reg(target_ulong reg_ptr, int size) +target_ulong read_val_from_reg(void *reg_ptr, int size) { target_ulong val; @@ -148,7 +144,7 @@ target_ulong read_val_from_reg(target_ulong reg_ptr, int size) return val; } -void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) +void write_val_to_reg(void *reg_ptr, target_ulong val, int size) { switch (size) { case 1: @@ -168,36 +164,32 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) } } -static bool is_host_reg(CPUX86State *env, target_ulong ptr) +static void write_val_to_mem(CPUX86State *env, target_ulong ptr, target_ulong val, int size) { - return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs); + emul_ops->write_mem(env_cpu(env), &val, ptr, size); } -void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size) +void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size) { - if (is_host_reg(env, ptr)) { - write_val_to_reg(ptr, val, size); - return; + if (decode->type == X86_VAR_REG) { + write_val_to_reg(decode->regptr, val, size); + } else { + write_val_to_mem(env, decode->addr, val, size); } - vmx_write_mem(env_cpu(env), ptr, &val, size); } uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes) { - vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, ptr, bytes); - return env->hvf_mmio_buf; + emul_ops->read_mem(env_cpu(env), env->emu_mmio_buf, ptr, bytes); + return env->emu_mmio_buf; } -target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) +static target_ulong read_val_from_mem(CPUX86State *env, target_long ptr, int size) { target_ulong val; uint8_t *mmio_ptr; - if (is_host_reg(env, ptr)) { - return read_val_from_reg(ptr, size); - } - mmio_ptr = read_mmio(env, ptr, size); switch (size) { case 1: @@ -219,6 +211,15 @@ target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) return val; } +target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size) +{ + if (decode->type == X86_VAR_REG) { + return read_val_from_reg(decode->regptr, size); + } else { + return read_val_from_mem(env, decode->addr, size); + } +} + static void fetch_operands(CPUX86State *env, struct x86_decode *decode, int n, bool val_op0, bool val_op1, bool val_op2) { @@ -230,25 +231,25 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode, case X86_VAR_IMMEDIATE: break; case X86_VAR_REG: - VM_PANIC_ON(!decode->op[i].ptr); + VM_PANIC_ON(!decode->op[i].regptr); if (calc_val[i]) { - decode->op[i].val = read_val_from_reg(decode->op[i].ptr, + decode->op[i].val = read_val_from_reg(decode->op[i].regptr, decode->operand_size); } break; case X86_VAR_RM: calc_modrm_operand(env, decode, &decode->op[i]); if (calc_val[i]) { - decode->op[i].val = read_val_ext(env, decode->op[i].ptr, + decode->op[i].val = read_val_ext(env, &decode->op[i], decode->operand_size); } break; case X86_VAR_OFFSET: - decode->op[i].ptr = decode_linear_addr(env, decode, - decode->op[i].ptr, - R_DS); + decode->op[i].addr = decode_linear_addr(env, decode, + decode->op[i].addr, + R_DS); if (calc_val[i]) { - decode->op[i].val = read_val_ext(env, decode->op[i].ptr, + decode->op[i].val = read_val_ext(env, &decode->op[i], decode->operand_size); } break; @@ -261,7 +262,7 @@ static void fetch_operands(CPUX86State *env, struct x86_decode *decode, static void exec_mov(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, false, true, false); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, + write_val_ext(env, &decode->op[0], decode->op[1].val, decode->operand_size); env->eip += decode->len; @@ -316,7 +317,7 @@ static void exec_neg(CPUX86State *env, struct x86_decode *decode) fetch_operands(env, decode, 2, true, true, false); val = 0 - sign(decode->op[1].val, decode->operand_size); - write_val_ext(env, decode->op[1].ptr, val, decode->operand_size); + write_val_ext(env, &decode->op[1], val, decode->operand_size); if (4 == decode->operand_size) { SET_FLAGS_OSZAPC_SUB32(env, 0, 0 - val, val); @@ -367,7 +368,7 @@ static void exec_not(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 1, true, false, false); - write_val_ext(env, decode->op[0].ptr, ~decode->op[0].val, + write_val_ext(env, &decode->op[0], ~decode->op[0].val, decode->operand_size); env->eip += decode->len; } @@ -386,8 +387,8 @@ void exec_movzx(CPUX86State *env, struct x86_decode *decode) } decode->operand_size = src_op_size; calc_modrm_operand(env, decode, &decode->op[1]); - decode->op[1].val = read_val_ext(env, decode->op[1].ptr, src_op_size); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); + decode->op[1].val = read_val_ext(env, &decode->op[1], src_op_size); + write_val_ext(env, &decode->op[0], decode->op[1].val, op_size); env->eip += decode->len; } @@ -396,18 +397,18 @@ static void exec_out(CPUX86State *env, struct x86_decode *decode) { switch (decode->opcode[0]) { case 0xe6: - hvf_handle_io(env_cpu(env), decode->op[0].val, &AL(env), 1, 1, 1); + emul_ops->handle_io(env_cpu(env), decode->op[0].val, &AL(env), 1, 1, 1); break; case 0xe7: - hvf_handle_io(env_cpu(env), decode->op[0].val, &RAX(env), 1, - decode->operand_size, 1); + emul_ops->handle_io(env_cpu(env), decode->op[0].val, &RAX(env), 1, + decode->operand_size, 1); break; case 0xee: - hvf_handle_io(env_cpu(env), DX(env), &AL(env), 1, 1, 1); + emul_ops->handle_io(env_cpu(env), DX(env), &AL(env), 1, 1, 1); break; case 0xef: - hvf_handle_io(env_cpu(env), DX(env), &RAX(env), 1, - decode->operand_size, 1); + emul_ops->handle_io(env_cpu(env), DX(env), &RAX(env), 1, + decode->operand_size, 1); break; default: VM_PANIC("Bad out opcode\n"); @@ -421,10 +422,10 @@ static void exec_in(CPUX86State *env, struct x86_decode *decode) target_ulong val = 0; switch (decode->opcode[0]) { case 0xe4: - hvf_handle_io(env_cpu(env), decode->op[0].val, &AL(env), 0, 1, 1); + emul_ops->handle_io(env_cpu(env), decode->op[0].val, &AL(env), 0, 1, 1); break; case 0xe5: - hvf_handle_io(env_cpu(env), decode->op[0].val, &val, 0, + emul_ops->handle_io(env_cpu(env), decode->op[0].val, &val, 0, decode->operand_size, 1); if (decode->operand_size == 2) { AX(env) = val; @@ -433,10 +434,11 @@ static void exec_in(CPUX86State *env, struct x86_decode *decode) } break; case 0xec: - hvf_handle_io(env_cpu(env), DX(env), &AL(env), 0, 1, 1); + emul_ops->handle_io(env_cpu(env), DX(env), &AL(env), 0, 1, 1); break; case 0xed: - hvf_handle_io(env_cpu(env), DX(env), &val, 0, decode->operand_size, 1); + emul_ops->handle_io(env_cpu(env), DX(env), &val, 0, + decode->operand_size, 1); if (decode->operand_size == 2) { AX(env) = val; } else { @@ -472,10 +474,10 @@ static inline void string_rep(CPUX86State *env, struct x86_decode *decode, while (rcx--) { func(env, decode); write_reg(env, R_ECX, rcx, decode->addressing_size); - if ((PREFIX_REP == rep) && !get_ZF(env)) { + if ((PREFIX_REP == rep) && !env->cc_dst) { break; } - if ((PREFIX_REPN == rep) && get_ZF(env)) { + if ((PREFIX_REPN == rep) && env->cc_dst) { break; } } @@ -486,10 +488,10 @@ static void exec_ins_single(CPUX86State *env, struct x86_decode *decode) target_ulong addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); - hvf_handle_io(env_cpu(env), DX(env), env->hvf_mmio_buf, 0, - decode->operand_size, 1); - vmx_write_mem(env_cpu(env), addr, env->hvf_mmio_buf, - decode->operand_size); + emul_ops->handle_io(env_cpu(env), DX(env), env->emu_mmio_buf, 0, + decode->operand_size, 1); + emul_ops->write_mem(env_cpu(env), env->emu_mmio_buf, addr, + decode->operand_size); string_increment_reg(env, R_EDI, decode); } @@ -509,10 +511,10 @@ static void exec_outs_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr = decode_linear_addr(env, decode, RSI(env), R_DS); - vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, addr, - decode->operand_size); - hvf_handle_io(env_cpu(env), DX(env), env->hvf_mmio_buf, 1, - decode->operand_size, 1); + emul_ops->read_mem(env_cpu(env), env->emu_mmio_buf, addr, + decode->operand_size); + emul_ops->handle_io(env_cpu(env), DX(env), env->emu_mmio_buf, 1, + decode->operand_size, 1); string_increment_reg(env, R_ESI, decode); } @@ -538,8 +540,8 @@ static void exec_movs_single(CPUX86State *env, struct x86_decode *decode) dst_addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); - val = read_val_ext(env, src_addr, decode->operand_size); - write_val_ext(env, dst_addr, val, decode->operand_size); + val = read_val_from_mem(env, src_addr, decode->operand_size); + write_val_to_mem(env, dst_addr, val, decode->operand_size); string_increment_reg(env, R_ESI, decode); string_increment_reg(env, R_EDI, decode); @@ -566,9 +568,9 @@ static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode) decode->addressing_size, R_ES); decode->op[0].type = X86_VAR_IMMEDIATE; - decode->op[0].val = read_val_ext(env, src_addr, decode->operand_size); + decode->op[0].val = read_val_from_mem(env, src_addr, decode->operand_size); decode->op[1].type = X86_VAR_IMMEDIATE; - decode->op[1].val = read_val_ext(env, dst_addr, decode->operand_size); + decode->op[1].val = read_val_from_mem(env, dst_addr, decode->operand_size); EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); @@ -595,7 +597,7 @@ static void exec_stos_single(CPUX86State *env, struct x86_decode *decode) addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); val = read_reg(env, R_EAX, decode->operand_size); - vmx_write_mem(env_cpu(env), addr, &val, decode->operand_size); + emul_ops->write_mem(env_cpu(env), &val, addr, decode->operand_size); string_increment_reg(env, R_EDI, decode); } @@ -619,7 +621,7 @@ static void exec_scas_single(CPUX86State *env, struct x86_decode *decode) addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); decode->op[1].type = X86_VAR_IMMEDIATE; - vmx_read_mem(env_cpu(env), &decode->op[1].val, addr, decode->operand_size); + emul_ops->read_mem(env_cpu(env), &decode->op[1].val, addr, decode->operand_size); EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); string_increment_reg(env, R_EDI, decode); @@ -644,7 +646,7 @@ static void exec_lods_single(CPUX86State *env, struct x86_decode *decode) target_ulong val = 0; addr = decode_linear_addr(env, decode, RSI(env), R_DS); - vmx_read_mem(env_cpu(env), &val, addr, decode->operand_size); + emul_ops->read_mem(env_cpu(env), &val, addr, decode->operand_size); write_reg(env, R_EAX, val, decode->operand_size); string_increment_reg(env, R_ESI, decode); @@ -671,13 +673,13 @@ void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_c static void exec_rdmsr(CPUX86State *env, struct x86_decode *decode) { - hvf_simulate_rdmsr(env); + emul_ops->simulate_rdmsr(env_cpu(env)); env->eip += decode->len; } static void exec_wrmsr(CPUX86State *env, struct x86_decode *decode) { - hvf_simulate_wrmsr(env); + emul_ops->simulate_wrmsr(env_cpu(env)); env->eip += decode->len; } @@ -700,15 +702,15 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) if (decode->op[0].type != X86_VAR_REG) { if (4 == decode->operand_size) { displacement = ((int32_t) (decode->op[1].val & 0xffffffe0)) / 32; - decode->op[0].ptr += 4 * displacement; + decode->op[0].addr += 4 * displacement; } else if (2 == decode->operand_size) { displacement = ((int16_t) (decode->op[1].val & 0xfff0)) / 16; - decode->op[0].ptr += 2 * displacement; + decode->op[0].addr += 2 * displacement; } else { VM_PANIC("bt 64bit\n"); } } - decode->op[0].val = read_val_ext(env, decode->op[0].ptr, + decode->op[0].val = read_val_ext(env, &decode->op[0], decode->operand_size); cf = (decode->op[0].val >> index) & 0x01; @@ -726,7 +728,7 @@ static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) decode->op[0].val &= ~(1u << index); break; } - write_val_ext(env, decode->op[0].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[0], decode->op[0].val, decode->operand_size); set_CF(env, cf); } @@ -778,7 +780,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) of = cf ^ (res >> 7); } - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); SET_FLAGS_OSZAPC_LOGIC8(env, 0, 0, res); SET_FLAGS_OxxxxC(env, of, cf); break; @@ -794,7 +796,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) of = cf ^ (res >> 15); /* of = cf ^ result15 */ } - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); SET_FLAGS_OSZAPC_LOGIC16(env, 0, 0, res); SET_FLAGS_OxxxxC(env, of, cf); break; @@ -803,7 +805,7 @@ void exec_shl(CPUX86State *env, struct x86_decode *decode) { uint32_t res = decode->op[0].val << count; - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); SET_FLAGS_OSZAPC_LOGIC32(env, 0, 0, res); cf = (decode->op[0].val >> (32 - count)) & 0x1; of = cf ^ (res >> 31); /* of = cf ^ result31 */ @@ -834,10 +836,10 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode) decode->operand_size = src_op_size; calc_modrm_operand(env, decode, &decode->op[1]); - decode->op[1].val = sign(read_val_ext(env, decode->op[1].ptr, src_op_size), + decode->op[1].val = sign(read_val_ext(env, &decode->op[1], src_op_size), src_op_size); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); + write_val_ext(env, &decode->op[0], decode->op[1].val, op_size); env->eip += decode->len; } @@ -865,7 +867,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) count &= 0x7; /* use only bottom 3 bits */ res = ((uint8_t)decode->op[0].val >> count) | ((uint8_t)decode->op[0].val << (8 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); bit6 = (res >> 6) & 1; bit7 = (res >> 7) & 1; /* set eflags: ROR count affects the following flags: C, O */ @@ -889,7 +891,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) count &= 0x0f; /* use only 4 LSB's */ res = ((uint16_t)decode->op[0].val >> count) | ((uint16_t)decode->op[0].val << (16 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); bit14 = (res >> 14) & 1; bit15 = (res >> 15) & 1; @@ -907,7 +909,7 @@ void exec_ror(CPUX86State *env, struct x86_decode *decode) if (count) { res = ((uint32_t)decode->op[0].val >> count) | ((uint32_t)decode->op[0].val << (32 - count)); - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); bit31 = (res >> 31) & 1; bit30 = (res >> 30) & 1; @@ -944,7 +946,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint8_t)decode->op[0].val << count) | ((uint8_t)decode->op[0].val >> (8 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); /* set eflags: * ROL count affects the following flags: C, O */ @@ -971,7 +973,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint16_t)decode->op[0].val << count) | ((uint16_t)decode->op[0].val >> (16 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); bit0 = (res & 0x1); bit15 = (res >> 15); /* of = cf ^ result15 */ @@ -989,7 +991,7 @@ void exec_rol(CPUX86State *env, struct x86_decode *decode) res = ((uint32_t)decode->op[0].val << count) | ((uint32_t)decode->op[0].val >> (32 - count)); - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); bit0 = (res & 0x1); bit31 = (res >> 31); /* of = cf ^ result31 */ @@ -1027,7 +1029,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_8 >> (9 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); cf = (op1_8 >> (8 - count)) & 0x01; of = cf ^ (res >> 7); /* of = cf ^ result7 */ @@ -1053,7 +1055,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_16 >> (17 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); cf = (op1_16 >> (16 - count)) & 0x1; of = cf ^ (res >> 15); /* of = cf ^ result15 */ @@ -1076,7 +1078,7 @@ void exec_rcl(CPUX86State *env, struct x86_decode *decode) (op1_32 >> (33 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); cf = (op1_32 >> (32 - count)) & 0x1; of = cf ^ (res >> 31); /* of = cf ^ result31 */ @@ -1108,7 +1110,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) res = (op1_8 >> count) | (get_CF(env) << (8 - count)) | (op1_8 << (9 - count)); - write_val_ext(env, decode->op[0].ptr, res, 1); + write_val_ext(env, &decode->op[0], res, 1); cf = (op1_8 >> (count - 1)) & 0x1; of = (((res << 1) ^ res) >> 7) & 0x1; /* of = result6 ^ result7 */ @@ -1127,7 +1129,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) res = (op1_16 >> count) | (get_CF(env) << (16 - count)) | (op1_16 << (17 - count)); - write_val_ext(env, decode->op[0].ptr, res, 2); + write_val_ext(env, &decode->op[0], res, 2); cf = (op1_16 >> (count - 1)) & 0x1; of = ((uint16_t)((res << 1) ^ res) >> 15) & 0x1; /* of = result15 ^ @@ -1151,7 +1153,7 @@ void exec_rcr(CPUX86State *env, struct x86_decode *decode) (op1_32 << (33 - count)); } - write_val_ext(env, decode->op[0].ptr, res, 4); + write_val_ext(env, &decode->op[0], res, 4); cf = (op1_32 >> (count - 1)) & 0x1; of = ((res << 1) ^ res) >> 31; /* of = result30 ^ result31 */ @@ -1166,9 +1168,9 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, true, true, false); - write_val_ext(env, decode->op[0].ptr, decode->op[1].val, + write_val_ext(env, &decode->op[0], decode->op[1].val, decode->operand_size); - write_val_ext(env, decode->op[1].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[1], decode->op[0].val, decode->operand_size); env->eip += decode->len; @@ -1177,7 +1179,7 @@ static void exec_xchg(CPUX86State *env, struct x86_decode *decode) static void exec_xadd(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); - write_val_ext(env, decode->op[1].ptr, decode->op[0].val, + write_val_ext(env, &decode->op[1], decode->op[0].val, decode->operand_size); env->eip += decode->len; @@ -1231,6 +1233,8 @@ static struct cmd_handler { static struct cmd_handler _cmd_handler[X86_DECODE_CMD_LAST]; +const struct x86_emul_ops *emul_ops; + static void init_cmd_handler(void) { int i; @@ -1242,7 +1246,8 @@ static void init_cmd_handler(void) bool exec_instruction(CPUX86State *env, struct x86_decode *ins) { if (!_cmd_handler[ins->cmd].handler) { - printf("Unimplemented handler (%llx) for %d (%x %x) \n", env->eip, + printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x)\n", + env->eip, ins->cmd, ins->opcode[0], ins->opcode_len > 1 ? ins->opcode[1] : 0); env->eip += ins->len; @@ -1253,7 +1258,8 @@ bool exec_instruction(CPUX86State *env, struct x86_decode *ins) return true; } -void init_emu(void) +void init_emu(const struct x86_emul_ops *o) { + emul_ops = o; init_cmd_handler(); } diff --git a/target/i386/hvf/x86_emu.h b/target/i386/emulate/x86_emu.h index bc0fc72..05686b1 100644 --- a/target/i386/hvf/x86_emu.h +++ b/target/i386/emulate/x86_emu.h @@ -23,17 +23,32 @@ #include "x86_decode.h" #include "cpu.h" -void init_emu(void); +struct x86_emul_ops { + void (*fetch_instruction)(CPUState *cpu, void *data, target_ulong addr, + int bytes); + void (*read_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); + void (*write_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); + void (*read_segment_descriptor)(CPUState *cpu, struct x86_segment_descriptor *desc, + enum X86Seg seg); + void (*handle_io)(CPUState *cpu, uint16_t port, void *data, int direction, + int size, int count); + void (*simulate_rdmsr)(CPUState *cs); + void (*simulate_wrmsr)(CPUState *cs); +}; + +extern const struct x86_emul_ops *emul_ops; + +void init_emu(const struct x86_emul_ops *ops); bool exec_instruction(CPUX86State *env, struct x86_decode *ins); void x86_emul_raise_exception(CPUX86State *env, int exception_index, int error_code); target_ulong read_reg(CPUX86State *env, int reg, int size); void write_reg(CPUX86State *env, int reg, target_ulong val, int size); -target_ulong read_val_from_reg(target_ulong reg_ptr, int size); -void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size); -void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size); +target_ulong read_val_from_reg(void *reg_ptr, int size); +void write_val_to_reg(void *reg_ptr, target_ulong val, int size); +void write_val_ext(CPUX86State *env, struct x86_decode_op *decode, target_ulong val, int size); uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes); -target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size); +target_ulong read_val_ext(CPUX86State *env, struct x86_decode_op *decode, int size); void exec_movzx(CPUX86State *env, struct x86_decode *decode); void exec_shl(CPUX86State *env, struct x86_decode *decode); diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c new file mode 100644 index 0000000..6592193 --- /dev/null +++ b/target/i386/emulate/x86_flags.c @@ -0,0 +1,273 @@ +///////////////////////////////////////////////////////////////////////// +// +// Copyright (C) 2001-2012 The Bochs Project +// Copyright (C) 2017 Google Inc. +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. +///////////////////////////////////////////////////////////////////////// +/* + * flags functions + */ + +#include "qemu/osdep.h" + +#include "panic.h" +#include "cpu.h" +#include "x86_flags.h" +#include "x86.h" + + +/* + * The algorithms here are similar to those in Bochs. After an ALU + * operation, CC_DST can be used to compute ZF, SF and PF, whereas + * CC_SRC is used to compute AF, CF and OF. In reality, SF and PF are the + * XOR of the value computed from CC_DST and the value found in bits 7 and 2 + * of CC_SRC; this way the same logic can be used to compute the flags + * both before and after an ALU operation. + * + * Compared to the TCG CC_OP codes, this avoids conditionals when converting + * to and from the RFLAGS representation. + */ + +#define LF_SIGN_BIT (TARGET_LONG_BITS - 1) + +#define LF_BIT_PD (2) /* lazy Parity Delta, same bit as PF */ +#define LF_BIT_AF (3) /* lazy Adjust flag */ +#define LF_BIT_SD (7) /* lazy Sign Flag Delta, same bit as SF */ +#define LF_BIT_CF (TARGET_LONG_BITS - 1) /* lazy Carry Flag */ +#define LF_BIT_PO (TARGET_LONG_BITS - 2) /* lazy Partial Overflow = CF ^ OF */ + +#define LF_MASK_PD ((target_ulong)0x01 << LF_BIT_PD) +#define LF_MASK_AF ((target_ulong)0x01 << LF_BIT_AF) +#define LF_MASK_SD ((target_ulong)0x01 << LF_BIT_SD) +#define LF_MASK_CF ((target_ulong)0x01 << LF_BIT_CF) +#define LF_MASK_PO ((target_ulong)0x01 << LF_BIT_PO) + +/* ******************* */ +/* OSZAPC */ +/* ******************* */ + +/* use carries to fill in AF, PO and CF, while ensuring PD and SD are clear. + * for full-word operations just clear PD and SD; for smaller operand + * sizes only keep AF in the low byte and shift the carries left to + * place PO and CF in the top two bits. + */ +#define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \ + env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \ + target_ulong temp = (lf_carries); \ + if ((size) == TARGET_LONG_BITS) { \ + temp = temp & ~(LF_MASK_PD | LF_MASK_SD); \ + } else { \ + temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \ + } \ + env->cc_src = temp; \ +} + +/* carries, result */ +#define SET_FLAGS_OSZAPC_8(carries, result) \ + SET_FLAGS_OSZAPC_SIZE(8, carries, result) +#define SET_FLAGS_OSZAPC_16(carries, result) \ + SET_FLAGS_OSZAPC_SIZE(16, carries, result) +#define SET_FLAGS_OSZAPC_32(carries, result) \ + SET_FLAGS_OSZAPC_SIZE(32, carries, result) + +/* ******************* */ +/* OSZAP */ +/* ******************* */ +/* same as setting OSZAPC, but preserve CF and flip PO if the old value of CF + * did not match the high bit of lf_carries. */ +#define SET_FLAGS_OSZAP_SIZE(size, lf_carries, lf_result) { \ + env->cc_dst = (target_ulong)(int##size##_t)(lf_result); \ + target_ulong temp = (lf_carries); \ + if ((size) == TARGET_LONG_BITS) { \ + temp = (temp & ~(LF_MASK_PD | LF_MASK_SD)); \ + } else { \ + temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \ + } \ + target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0; \ + env->cc_src = temp ^ (cf_changed * (LF_MASK_PO | LF_MASK_CF)); \ +} + +/* carries, result */ +#define SET_FLAGS_OSZAP_8(carries, result) \ + SET_FLAGS_OSZAP_SIZE(8, carries, result) +#define SET_FLAGS_OSZAP_16(carries, result) \ + SET_FLAGS_OSZAP_SIZE(16, carries, result) +#define SET_FLAGS_OSZAP_32(carries, result) \ + SET_FLAGS_OSZAP_SIZE(32, carries, result) + +void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf) +{ + env->cc_src &= ~(LF_MASK_PO | LF_MASK_CF); + env->cc_src |= (-(target_ulong)new_cf << LF_BIT_PO); + env->cc_src ^= ((target_ulong)new_of << LF_BIT_PO); +} + +void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, + uint32_t diff) +{ + SET_FLAGS_OSZAPC_32(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAPC_SUB16(CPUX86State *env, uint16_t v1, uint16_t v2, + uint16_t diff) +{ + SET_FLAGS_OSZAPC_16(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAPC_SUB8(CPUX86State *env, uint8_t v1, uint8_t v2, + uint8_t diff) +{ + SET_FLAGS_OSZAPC_8(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAPC_ADD32(CPUX86State *env, uint32_t v1, uint32_t v2, + uint32_t diff) +{ + SET_FLAGS_OSZAPC_32(ADD_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAPC_ADD16(CPUX86State *env, uint16_t v1, uint16_t v2, + uint16_t diff) +{ + SET_FLAGS_OSZAPC_16(ADD_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAPC_ADD8(CPUX86State *env, uint8_t v1, uint8_t v2, + uint8_t diff) +{ + SET_FLAGS_OSZAPC_8(ADD_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, + uint32_t diff) +{ + SET_FLAGS_OSZAP_32(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_SUB16(CPUX86State *env, uint16_t v1, uint16_t v2, + uint16_t diff) +{ + SET_FLAGS_OSZAP_16(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_SUB8(CPUX86State *env, uint8_t v1, uint8_t v2, + uint8_t diff) +{ + SET_FLAGS_OSZAP_8(SUB_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_ADD32(CPUX86State *env, uint32_t v1, uint32_t v2, + uint32_t diff) +{ + SET_FLAGS_OSZAP_32(ADD_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_ADD16(CPUX86State *env, uint16_t v1, uint16_t v2, + uint16_t diff) +{ + SET_FLAGS_OSZAP_16(ADD_COUT_VEC(v1, v2, diff), diff); +} + +void SET_FLAGS_OSZAP_ADD8(CPUX86State *env, uint8_t v1, uint8_t v2, + uint8_t diff) +{ + SET_FLAGS_OSZAP_8(ADD_COUT_VEC(v1, v2, diff), diff); +} + + +void SET_FLAGS_OSZAPC_LOGIC32(CPUX86State *env, uint32_t v1, uint32_t v2, + uint32_t diff) +{ + SET_FLAGS_OSZAPC_32(0, diff); +} + +void SET_FLAGS_OSZAPC_LOGIC16(CPUX86State *env, uint16_t v1, uint16_t v2, + uint16_t diff) +{ + SET_FLAGS_OSZAPC_16(0, diff); +} + +void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, + uint8_t diff) +{ + SET_FLAGS_OSZAPC_8(0, diff); +} + +static inline uint32_t get_PF(CPUX86State *env) +{ + return ((parity8(env->cc_dst) - 1) ^ env->cc_src) & CC_P; +} + +static inline uint32_t get_OF(CPUX86State *env) +{ + return ((env->cc_src >> (LF_BIT_CF - 11)) + CC_O / 2) & CC_O; +} + +bool get_CF(CPUX86State *env) +{ + return ((target_long)env->cc_src) < 0; +} + +void set_CF(CPUX86State *env, bool val) +{ + /* If CF changes, flip PO and CF */ + target_ulong temp = -(target_ulong)val; + target_ulong cf_changed = ((target_long)(env->cc_src ^ temp)) < 0; + env->cc_src ^= cf_changed * (LF_MASK_PO | LF_MASK_CF); +} + +static inline uint32_t get_ZF(CPUX86State *env) +{ + return env->cc_dst ? 0 : CC_Z; +} + +static inline uint32_t get_SF(CPUX86State *env) +{ + return ((env->cc_dst >> (LF_SIGN_BIT - LF_BIT_SD)) ^ + env->cc_src) & CC_S; +} + +void lflags_to_rflags(CPUX86State *env) +{ + env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O); + /* rotate left by one to move carry-out bits into CF and AF */ + env->eflags |= ( + (env->cc_src << 1) | + (env->cc_src >> (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + env->eflags |= get_SF(env); + env->eflags |= get_PF(env); + env->eflags |= get_ZF(env); + env->eflags |= get_OF(env); +} + +void rflags_to_lflags(CPUX86State *env) +{ + target_ulong cf_af, cf_xor_of; + + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; + + /* ... and therefore cc_src always uses opposite polarity. */ + env->cc_src = CC_P; + env->cc_src ^= env->eflags & (CC_S | CC_P); + + /* rotate right by one to move CF and AF into the carry-out positions */ + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); + + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; + env->cc_src |= -cf_xor_of & LF_MASK_PO; +} diff --git a/target/i386/hvf/x86_flags.h b/target/i386/emulate/x86_flags.h index 75c2a7f..a395c83 100644 --- a/target/i386/hvf/x86_flags.h +++ b/target/i386/emulate/x86_flags.h @@ -14,34 +14,24 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// /* * x86 eflags functions */ -#ifndef X86_FLAGS_H -#define X86_FLAGS_H +#ifndef X86_EMU_FLAGS_H +#define X86_EMU_FLAGS_H #include "cpu.h" void lflags_to_rflags(CPUX86State *env); void rflags_to_lflags(CPUX86State *env); -bool get_PF(CPUX86State *env); -void set_PF(CPUX86State *env, bool val); bool get_CF(CPUX86State *env); void set_CF(CPUX86State *env, bool val); -bool get_AF(CPUX86State *env); -void set_AF(CPUX86State *env, bool val); -bool get_ZF(CPUX86State *env); -void set_ZF(CPUX86State *env, bool val); -bool get_SF(CPUX86State *env); -void set_SF(CPUX86State *env, bool val); -bool get_OF(CPUX86State *env); -void set_OF(CPUX86State *env, bool val); -void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf); +void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf); void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, uint32_t diff); @@ -78,4 +68,4 @@ void SET_FLAGS_OSZAPC_LOGIC16(CPUX86State *env, uint16_t v1, uint16_t v2, void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, uint8_t diff); -#endif /* X86_FLAGS_H */ +#endif /* X86_EMU_FLAGS_H */ diff --git a/target/i386/helper.c b/target/i386/helper.c index c07b1b1..72b2e19 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -22,9 +22,11 @@ #include "cpu.h" #include "exec/cputlb.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "system/runstate.h" #ifndef CONFIG_USER_ONLY #include "system/hw_accel.h" +#include "system/memory.h" #include "monitor/monitor.h" #include "kvm/kvm_i386.h" #endif @@ -108,6 +110,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) /* x86 mmu */ /* XXX: add PGE support */ +#ifndef CONFIG_USER_ONLY void x86_cpu_set_a20(X86CPU *cpu, int a20_state) { CPUX86State *env = &cpu->env; @@ -127,6 +130,7 @@ void x86_cpu_set_a20(X86CPU *cpu, int a20_state) env->a20_mask = ~(1 << 20) | (a20_state << 20); } } +#endif void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) { @@ -524,7 +528,7 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, static inline target_ulong get_memio_eip(CPUX86State *env) { #ifdef CONFIG_TCG - uint64_t data[TARGET_INSN_START_WORDS]; + uint64_t data[INSN_START_WORDS]; CPUState *cs = env_cpu(env); if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) { @@ -617,6 +621,10 @@ void do_cpu_init(X86CPU *cpu) void do_cpu_sipi(X86CPU *cpu) { + CPUX86State *env = &cpu->env; + if (env->hflags & HF_SMM_MASK) { + return; + } apic_sipi(cpu->apic_state); } diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 3e4e85e..d5e2bb5 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; @@ -80,7 +80,6 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp) return true; } -#define CPUID_MODEL_ID_SZ 48 /** * cpu_x86_fill_model_id: * Get CPUID model ID string from host CPU. @@ -109,18 +108,22 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping) { uint32_t eax, ebx, ecx, edx; - host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); + host_cpuid(0x0, 0, NULL, &ebx, &ecx, &edx); x86_cpu_vendor_words2str(vendor, ebx, edx, ecx); + if (!family && !model && !stepping) { + return; + } + host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); if (family) { - *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + *family = x86_cpu_family(eax); } if (model) { - *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + *model = x86_cpu_model(eax); } if (stepping) { - *stepping = eax & 0x0F; + *stepping = x86_cpu_stepping(eax); } } @@ -128,29 +131,27 @@ void host_cpu_instance_init(X86CPU *cpu) { X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); - if (xcc->model) { - uint32_t ebx = 0, ecx = 0, edx = 0; - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(vendor, ebx, edx, ecx); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); - } -} - -void host_cpu_max_instance_init(X86CPU *cpu) -{ char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; int family, model, stepping; - /* Use max host physical address bits if -cpu max option is applied */ - object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); - + /* + * setting vendor applies to both max/host and builtin_x86_defs CPU. + * FIXME: this probably should warn or should be skipped if vendors do + * not match, because family numbers are incompatible between Intel and AMD. + */ host_cpu_vendor_fms(vendor, &family, &model, &stepping); + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + + if (!xcc->max_features) { + return; + } + host_cpu_fill_model_id(model_id); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + /* Use max host physical address bits if -cpu max option is applied */ + object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); + object_property_set_int(OBJECT(cpu), "family", family, &error_abort); object_property_set_int(OBJECT(cpu), "model", model, &error_abort); object_property_set_int(OBJECT(cpu), "stepping", stepping, @@ -159,7 +160,16 @@ void host_cpu_max_instance_init(X86CPU *cpu) &error_abort); } -static void host_cpu_class_init(ObjectClass *oc, void *data) +bool is_host_cpu_intel(void) +{ + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpu_vendor_fms(vendor, NULL, NULL, NULL); + + return g_str_equal(vendor, CPUID_VENDOR_INTEL); +} + +static void host_cpu_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc91..ee65324 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,10 +10,11 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); -void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping); +bool is_host_cpu_intel(void); #endif /* HOST_CPU_H */ diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c index b5f4c80..94ee096 100644 --- a/target/i386/hvf/hvf-cpu.c +++ b/target/i386/hvf/hvf-cpu.c @@ -21,8 +21,6 @@ static void hvf_cpu_max_instance_init(X86CPU *cpu) { CPUX86State *env = &cpu->env; - host_cpu_max_instance_init(cpu); - env->cpuid_min_level = hvf_get_supported_cpuid(0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -61,20 +59,21 @@ static void hvf_cpu_xsave_init(void) static void hvf_cpu_instance_init(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); host_cpu_instance_init(cpu); /* Special cases not set in the X86CPUDefinition structs: */ /* TODO: in-kernel irqchip for hvf */ - if (cpu->max_features) { + if (xcc->max_features) { hvf_cpu_max_instance_init(cpu); } hvf_cpu_xsave_init(); } -static void hvf_cpu_accel_class_init(ObjectClass *oc, void *data) +static void hvf_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 044ad23..8c42ae6 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -19,8 +19,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); void hvf_handle_io(CPUState *, uint16_t, void *, int, int, int); -void hvf_simulate_rdmsr(CPUX86State *env); -void hvf_simulate_wrmsr(CPUX86State *env); +void hvf_simulate_rdmsr(CPUState *cpu); +void hvf_simulate_wrmsr(CPUState *cpu); /* Host specific functions */ int hvf_inject_interrupt(CPUArchState *env, int vector); diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 9ba0e04..8445cad 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -59,12 +59,12 @@ #include "hvf-i386.h" #include "vmcs.h" #include "vmx.h" -#include "x86.h" +#include "emulate/x86.h" #include "x86_descr.h" -#include "x86_flags.h" +#include "emulate/x86_flags.h" #include "x86_mmu.h" -#include "x86_decode.h" -#include "x86_emu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" #include "x86_task.h" #include "x86hvf.h" @@ -76,6 +76,7 @@ #include "qemu/main-loop.h" #include "qemu/accel.h" #include "target/i386/cpu.h" +#include "exec/target_page.h" static Error *invtsc_mig_blocker; @@ -168,7 +169,7 @@ void hvf_arch_vcpu_destroy(CPUState *cpu) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - g_free(env->hvf_mmio_buf); + g_free(env->emu_mmio_buf); } static void init_tsc_freq(CPUX86State *env) @@ -229,6 +230,33 @@ hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range) return hv_vm_create(HV_VM_DEFAULT); } +static void hvf_read_segment_descriptor(CPUState *s, struct x86_segment_descriptor *desc, + X86Seg seg) +{ + struct vmx_segment vmx_segment; + vmx_read_segment_descriptor(s, &vmx_segment, seg); + vmx_segment_to_x86_descriptor(s, &vmx_segment, desc); +} + +static void hvf_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes) +{ + vmx_read_mem(cpu, data, gva, bytes); +} + +static void hvf_write_mem(CPUState *cpu, void *data, target_ulong gva, int bytes) +{ + vmx_write_mem(cpu, gva, data, bytes); +} + +static const struct x86_emul_ops hvf_x86_emul_ops = { + .read_mem = hvf_read_mem, + .write_mem = hvf_write_mem, + .read_segment_descriptor = hvf_read_segment_descriptor, + .handle_io = hvf_handle_io, + .simulate_rdmsr = hvf_simulate_rdmsr, + .simulate_wrmsr = hvf_simulate_wrmsr, +}; + int hvf_arch_init_vcpu(CPUState *cpu) { X86CPU *x86cpu = X86_CPU(cpu); @@ -237,13 +265,13 @@ int hvf_arch_init_vcpu(CPUState *cpu) int r; uint64_t reqCap; - init_emu(); + init_emu(&hvf_x86_emul_ops); init_decoder(); if (hvf_state->hvf_caps == NULL) { hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); } - env->hvf_mmio_buf = g_new(char, 4096); + env->emu_mmio_buf = g_new(char, 4096); if (x86cpu->vmware_cpuid_freq) { init_tsc_freq(env); @@ -481,10 +509,10 @@ void hvf_store_regs(CPUState *cs) macvm_set_rip(cs, env->eip); } -void hvf_simulate_rdmsr(CPUX86State *env) +void hvf_simulate_rdmsr(CPUState *cs) { - X86CPU *cpu = env_archcpu(env); - CPUState *cs = env_cpu(env); + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; uint32_t msr = ECX(env); uint64_t val = 0; @@ -586,10 +614,10 @@ void hvf_simulate_rdmsr(CPUX86State *env) RDX(env) = (uint32_t)(val >> 32); } -void hvf_simulate_wrmsr(CPUX86State *env) +void hvf_simulate_wrmsr(CPUState *cs) { - X86CPU *cpu = env_archcpu(env); - CPUState *cs = env_cpu(env); + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; uint32_t msr = ECX(env); uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env); @@ -705,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (hvf_inject_interrupts(cpu)) { @@ -745,9 +773,9 @@ int hvf_vcpu_exec(CPUState *cpu) switch (exit_reason) { case EXIT_REASON_HLT: { macvm_set_rip(cpu, rip + ins_len); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) - && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && + && !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI) && !(idtvec_info & VMCS_IDT_VEC_VALID)) { cpu->halted = 1; ret = EXCP_HLT; @@ -875,9 +903,9 @@ int hvf_vcpu_exec(CPUState *cpu) { hvf_load_regs(cpu); if (exit_reason == EXIT_REASON_RDMSR) { - hvf_simulate_rdmsr(env); + hvf_simulate_rdmsr(cpu); } else { - hvf_simulate_wrmsr(env); + hvf_simulate_wrmsr(cpu); } env->eip += ins_len; hvf_store_regs(cpu); diff --git a/target/i386/hvf/meson.build b/target/i386/hvf/meson.build index 05c3c8c..519d190 100644 --- a/target/i386/hvf/meson.build +++ b/target/i386/hvf/meson.build @@ -2,10 +2,7 @@ i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( 'hvf.c', 'x86.c', 'x86_cpuid.c', - 'x86_decode.c', 'x86_descr.c', - 'x86_emu.c', - 'x86_flags.c', 'x86_mmu.c', 'x86_task.c', 'x86hvf.c', diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 80ce262..26d6029 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -29,11 +29,12 @@ #include <Hypervisor/hv_vmx.h> #include "vmcs.h" #include "cpu.h" -#include "x86.h" +#include "emulate/x86.h" #include "system/hvf.h" #include "system/hvf_int.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" static inline uint64_t rreg(hv_vcpuid_t vcpu, hv_x86_reg_t reg) { diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c index a0ede13..5c75ec9 100644 --- a/target/i386/hvf/x86.c +++ b/target/i386/hvf/x86.c @@ -19,8 +19,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "x86_decode.h" -#include "x86_emu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" #include "vmcs.h" #include "vmx.h" #include "x86_mmu.h" diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index ae836f6..0798a0c 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -24,7 +24,7 @@ #include "qemu/cpuid.h" #include "host/cpuinfo.h" #include "cpu.h" -#include "x86.h" +#include "emulate/x86.h" #include "vmx.h" #include "system/hvf.h" #include "hvf-i386.h" @@ -73,7 +73,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS; + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_HT; ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE | diff --git a/target/i386/hvf/x86_descr.h b/target/i386/hvf/x86_descr.h index ce5de98..24af494 100644 --- a/target/i386/hvf/x86_descr.h +++ b/target/i386/hvf/x86_descr.h @@ -19,7 +19,7 @@ #ifndef HVF_X86_DESCR_H #define HVF_X86_DESCR_H -#include "x86.h" +#include "emulate/x86.h" typedef struct vmx_segment { uint16_t sel; diff --git a/target/i386/hvf/x86_flags.c b/target/i386/hvf/x86_flags.c deleted file mode 100644 index 03d6de5..0000000 --- a/target/i386/hvf/x86_flags.c +++ /dev/null @@ -1,313 +0,0 @@ -///////////////////////////////////////////////////////////////////////// -// -// Copyright (C) 2001-2012 The Bochs Project -// Copyright (C) 2017 Google Inc. -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA -///////////////////////////////////////////////////////////////////////// -/* - * flags functions - */ - -#include "qemu/osdep.h" - -#include "panic.h" -#include "cpu.h" -#include "x86_flags.h" -#include "x86.h" - - -/* this is basically bocsh code */ - -#define LF_SIGN_BIT 31 - -#define LF_BIT_SD (0) /* lazy Sign Flag Delta */ -#define LF_BIT_AF (3) /* lazy Adjust flag */ -#define LF_BIT_PDB (8) /* lazy Parity Delta Byte (8 bits) */ -#define LF_BIT_CF (31) /* lazy Carry Flag */ -#define LF_BIT_PO (30) /* lazy Partial Overflow = CF ^ OF */ - -#define LF_MASK_SD (0x01 << LF_BIT_SD) -#define LF_MASK_AF (0x01 << LF_BIT_AF) -#define LF_MASK_PDB (0xFF << LF_BIT_PDB) -#define LF_MASK_CF (0x01 << LF_BIT_CF) -#define LF_MASK_PO (0x01 << LF_BIT_PO) - -#define ADD_COUT_VEC(op1, op2, result) \ - (((op1) & (op2)) | (((op1) | (op2)) & (~(result)))) - -#define SUB_COUT_VEC(op1, op2, result) \ - (((~(op1)) & (op2)) | (((~(op1)) ^ (op2)) & (result))) - -#define GET_ADD_OVERFLOW(op1, op2, result, mask) \ - ((((op1) ^ (result)) & ((op2) ^ (result))) & (mask)) - -/* ******************* */ -/* OSZAPC */ -/* ******************* */ - -/* size, carries, result */ -#define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \ - target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \ - (((lf_carries) >> (size - 2)) << LF_BIT_PO); \ - env->hvf_lflags.result = (target_ulong)(int##size##_t)(lf_result); \ - if ((size) == 32) { \ - temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \ - } else if ((size) == 16) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \ - } else if ((size) == 8) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \ - } else { \ - VM_PANIC("unimplemented"); \ - } \ - env->hvf_lflags.auxbits = (target_ulong)(uint32_t)temp; \ -} - -/* carries, result */ -#define SET_FLAGS_OSZAPC_8(carries, result) \ - SET_FLAGS_OSZAPC_SIZE(8, carries, result) -#define SET_FLAGS_OSZAPC_16(carries, result) \ - SET_FLAGS_OSZAPC_SIZE(16, carries, result) -#define SET_FLAGS_OSZAPC_32(carries, result) \ - SET_FLAGS_OSZAPC_SIZE(32, carries, result) - -/* ******************* */ -/* OSZAP */ -/* ******************* */ -/* size, carries, result */ -#define SET_FLAGS_OSZAP_SIZE(size, lf_carries, lf_result) { \ - target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \ - (((lf_carries) >> (size - 2)) << LF_BIT_PO); \ - if ((size) == 32) { \ - temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \ - } else if ((size) == 16) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \ - } else if ((size) == 8) { \ - temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \ - } else { \ - VM_PANIC("unimplemented"); \ - } \ - env->hvf_lflags.result = (target_ulong)(int##size##_t)(lf_result); \ - target_ulong delta_c = (env->hvf_lflags.auxbits ^ temp) & LF_MASK_CF; \ - delta_c ^= (delta_c >> 1); \ - env->hvf_lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \ -} - -/* carries, result */ -#define SET_FLAGS_OSZAP_8(carries, result) \ - SET_FLAGS_OSZAP_SIZE(8, carries, result) -#define SET_FLAGS_OSZAP_16(carries, result) \ - SET_FLAGS_OSZAP_SIZE(16, carries, result) -#define SET_FLAGS_OSZAP_32(carries, result) \ - SET_FLAGS_OSZAP_SIZE(32, carries, result) - -void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf) -{ - uint32_t temp_po = new_of ^ new_cf; - env->hvf_lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF); - env->hvf_lflags.auxbits |= (temp_po << LF_BIT_PO) | (new_cf << LF_BIT_CF); -} - -void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, - uint32_t diff) -{ - SET_FLAGS_OSZAPC_32(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAPC_SUB16(CPUX86State *env, uint16_t v1, uint16_t v2, - uint16_t diff) -{ - SET_FLAGS_OSZAPC_16(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAPC_SUB8(CPUX86State *env, uint8_t v1, uint8_t v2, - uint8_t diff) -{ - SET_FLAGS_OSZAPC_8(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAPC_ADD32(CPUX86State *env, uint32_t v1, uint32_t v2, - uint32_t diff) -{ - SET_FLAGS_OSZAPC_32(ADD_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAPC_ADD16(CPUX86State *env, uint16_t v1, uint16_t v2, - uint16_t diff) -{ - SET_FLAGS_OSZAPC_16(ADD_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAPC_ADD8(CPUX86State *env, uint8_t v1, uint8_t v2, - uint8_t diff) -{ - SET_FLAGS_OSZAPC_8(ADD_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, - uint32_t diff) -{ - SET_FLAGS_OSZAP_32(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_SUB16(CPUX86State *env, uint16_t v1, uint16_t v2, - uint16_t diff) -{ - SET_FLAGS_OSZAP_16(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_SUB8(CPUX86State *env, uint8_t v1, uint8_t v2, - uint8_t diff) -{ - SET_FLAGS_OSZAP_8(SUB_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_ADD32(CPUX86State *env, uint32_t v1, uint32_t v2, - uint32_t diff) -{ - SET_FLAGS_OSZAP_32(ADD_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_ADD16(CPUX86State *env, uint16_t v1, uint16_t v2, - uint16_t diff) -{ - SET_FLAGS_OSZAP_16(ADD_COUT_VEC(v1, v2, diff), diff); -} - -void SET_FLAGS_OSZAP_ADD8(CPUX86State *env, uint8_t v1, uint8_t v2, - uint8_t diff) -{ - SET_FLAGS_OSZAP_8(ADD_COUT_VEC(v1, v2, diff), diff); -} - - -void SET_FLAGS_OSZAPC_LOGIC32(CPUX86State *env, uint32_t v1, uint32_t v2, - uint32_t diff) -{ - SET_FLAGS_OSZAPC_32(0, diff); -} - -void SET_FLAGS_OSZAPC_LOGIC16(CPUX86State *env, uint16_t v1, uint16_t v2, - uint16_t diff) -{ - SET_FLAGS_OSZAPC_16(0, diff); -} - -void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, - uint8_t diff) -{ - SET_FLAGS_OSZAPC_8(0, diff); -} - -bool get_PF(CPUX86State *env) -{ - uint32_t temp = (255 & env->hvf_lflags.result); - temp = temp ^ (255 & (env->hvf_lflags.auxbits >> LF_BIT_PDB)); - temp = (temp ^ (temp >> 4)) & 0x0F; - return (0x9669U >> temp) & 1; -} - -void set_PF(CPUX86State *env, bool val) -{ - uint32_t temp = (255 & env->hvf_lflags.result) ^ (!val); - env->hvf_lflags.auxbits &= ~(LF_MASK_PDB); - env->hvf_lflags.auxbits |= (temp << LF_BIT_PDB); -} - -bool get_OF(CPUX86State *env) -{ - return ((env->hvf_lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1; -} - -bool get_CF(CPUX86State *env) -{ - return (env->hvf_lflags.auxbits >> LF_BIT_CF) & 1; -} - -void set_OF(CPUX86State *env, bool val) -{ - bool old_cf = get_CF(env); - SET_FLAGS_OxxxxC(env, val, old_cf); -} - -void set_CF(CPUX86State *env, bool val) -{ - bool old_of = get_OF(env); - SET_FLAGS_OxxxxC(env, old_of, val); -} - -bool get_AF(CPUX86State *env) -{ - return (env->hvf_lflags.auxbits >> LF_BIT_AF) & 1; -} - -void set_AF(CPUX86State *env, bool val) -{ - env->hvf_lflags.auxbits &= ~(LF_MASK_AF); - env->hvf_lflags.auxbits |= val << LF_BIT_AF; -} - -bool get_ZF(CPUX86State *env) -{ - return !env->hvf_lflags.result; -} - -void set_ZF(CPUX86State *env, bool val) -{ - if (val) { - env->hvf_lflags.auxbits ^= - (((env->hvf_lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD); - /* merge the parity bits into the Parity Delta Byte */ - uint32_t temp_pdb = (255 & env->hvf_lflags.result); - env->hvf_lflags.auxbits ^= (temp_pdb << LF_BIT_PDB); - /* now zero the .result value */ - env->hvf_lflags.result = 0; - } else { - env->hvf_lflags.result |= (1 << 8); - } -} - -bool get_SF(CPUX86State *env) -{ - return ((env->hvf_lflags.result >> LF_SIGN_BIT) ^ - (env->hvf_lflags.auxbits >> LF_BIT_SD)) & 1; -} - -void set_SF(CPUX86State *env, bool val) -{ - bool temp_sf = get_SF(env); - env->hvf_lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD; -} - -void lflags_to_rflags(CPUX86State *env) -{ - env->eflags |= get_CF(env) ? CC_C : 0; - env->eflags |= get_PF(env) ? CC_P : 0; - env->eflags |= get_AF(env) ? CC_A : 0; - env->eflags |= get_ZF(env) ? CC_Z : 0; - env->eflags |= get_SF(env) ? CC_S : 0; - env->eflags |= get_OF(env) ? CC_O : 0; -} - -void rflags_to_lflags(CPUX86State *env) -{ - env->hvf_lflags.auxbits = env->hvf_lflags.result = 0; - set_OF(env, env->eflags & CC_O); - set_SF(env, env->eflags & CC_S); - set_ZF(env, env->eflags & CC_Z); - set_AF(env, env->eflags & CC_A); - set_PF(env, env->eflags & CC_P); - set_CF(env, env->eflags & CC_C); -} diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c index 579d0c3..afc5c17 100644 --- a/target/i386/hvf/x86_mmu.c +++ b/target/i386/hvf/x86_mmu.c @@ -19,7 +19,7 @@ #include "qemu/osdep.h" #include "panic.h" #include "cpu.h" -#include "x86.h" +#include "emulate/x86.h" #include "x86_mmu.h" #include "vmcs.h" #include "vmx.h" diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c index 1612179..bdf8b51 100644 --- a/target/i386/hvf/x86_task.c +++ b/target/i386/hvf/x86_task.c @@ -14,11 +14,11 @@ #include "hvf-i386.h" #include "vmcs.h" #include "vmx.h" -#include "x86.h" +#include "emulate/x86.h" #include "x86_descr.h" #include "x86_mmu.h" -#include "x86_decode.h" -#include "x86_emu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" #include "x86_task.h" #include "x86hvf.h" diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 531a340..a502437 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -24,7 +24,7 @@ #include "vmcs.h" #include "cpu.h" #include "x86_descr.h" -#include "x86_decode.h" +#include "emulate/x86_decode.h" #include "system/hw_accel.h" #include "hw/i386/apic_internal.h" @@ -395,9 +395,9 @@ bool hvf_inject_interrupts(CPUState *cs) }; } - if (cs->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_NMI); info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); } else { @@ -406,20 +406,19 @@ bool hvf_inject_interrupts(CPUState *cs) } if (!(env->hflags & HF_INHIBIT_IRQ_MASK) && - (cs->interrupt_request & CPU_INTERRUPT_HARD) && + cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK) && !(info & VMCS_INTR_VALID)) { int line = cpu_get_pic_interrupt(env); - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); if (line >= 0) { wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, line | VMCS_INTR_VALID | VMCS_INTR_T_HWINTR); } } - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { vmx_set_int_window_exiting(cs); } - return (cs->interrupt_request - & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)); + return cpu_test_interrupt(cs, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR); } int hvf_process_events(CPUState *cs) @@ -427,31 +426,31 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->accel->dirty) { + if (!cs->vcpu_dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } - if (cs->interrupt_request & CPU_INTERRUPT_INIT) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_INIT)) { cpu_synchronize_state(cs); do_cpu_init(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_POLL) { - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); } - if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 0; } - if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_SIPI)) { cpu_synchronize_state(cs); do_cpu_sipi(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_TPR) { - cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_TPR); cpu_synchronize_state(cs); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index 70b89ca..f7a81bd 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" +#include "exec/target_page.h" #include "hyperv.h" #include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" @@ -80,7 +81,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) * necessary because memory hierarchy is being changed */ async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); - cpu_exit(CPU(cpu)); return EXCP_INTERRUPT; case KVM_EXIT_HYPERV_HCALL: { diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 6269fa8..9c25b55 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -13,6 +13,7 @@ #include "qapi/error.h" #include "system/system.h" #include "hw/boards.h" +#include "hw/i386/x86.h" #include "kvm_i386.h" #include "accel/accel-cpu-target.h" @@ -41,6 +42,7 @@ static void kvm_set_guest_phys_bits(CPUState *cs) static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; bool ret; @@ -63,7 +65,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * check/update ucode_rev, phys_bits, guest_phys_bits, mwait * cpu_common_realizefn() (via xcc->parent_realize) */ - if (cpu->max_features) { + if (xcc->max_features) { if (enable_cpu_pm) { if (kvm_has_waitpkg()) { env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG; @@ -72,7 +74,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, &cpu->mwait.ecx, &cpu->mwait.edx); - } + } } if (cpu->ucode_rev == 0) { cpu->ucode_rev = @@ -90,6 +92,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) kvm_set_guest_phys_bits(cs); } + /* + * When SMM is enabled, there is 2 address spaces. Otherwise only 1. + * + * Only initialize address space 0 here, the second one for SMM is + * initialized at register_smram_listener() after machine init done. + */ + cs->num_ases = x86_machine_is_smm_enabled(X86_MACHINE(current_machine)) ? 2 : 1; + cpu_address_space_init(cs, X86ASIdx_MEM, "cpu-memory", cs->memory); + return true; } @@ -108,7 +119,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) CPUX86State *env = &cpu->env; KVMState *s = kvm_state; - host_cpu_max_instance_init(cpu); + object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort); @@ -216,14 +227,14 @@ static void kvm_cpu_instance_init(CPUState *cs) x86_cpu_apply_props(cpu, kvm_default_props); } - if (cpu->max_features) { + if (xcc->max_features) { kvm_cpu_max_instance_init(cpu); } kvm_cpu_xsave_init(); } -static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +static void kvm_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6c749d4..309f043 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -67,6 +68,7 @@ #include "hw/pci/msix.h" #include "migration/blocker.h" #include "exec/memattrs.h" +#include "exec/target_page.h" #include "trace.h" #include CONFIG_DEVICES @@ -191,6 +193,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) @@ -325,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } @@ -391,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { @@ -413,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -571,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } @@ -650,6 +653,23 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) must_be_one = (uint32_t)value; can_be_one = (uint32_t)(value >> 32); return can_be_one & ~must_be_one; + case MSR_IA32_ARCH_CAPABILITIES: + /* + * Special handling for fb-clear bit in ARCH_CAPABILITIES MSR. + * KVM will only report the bit if it is enabled in the host, + * but, for live migration capability purposes, we want to + * expose the bit to the guest even if it is disabled in the + * host, as long as the host itself is not vulnerable to + * the issue that the fb-clear bit is meant to mitigate. + */ + if ((value & MSR_ARCH_CAP_MDS_NO) && + (value & MSR_ARCH_CAP_TAA_NO) && + (value & MSR_ARCH_CAP_SBDR_SSDP_NO) && + (value & MSR_ARCH_CAP_FBSDP_NO) && + (value & MSR_ARCH_CAP_PSDP_NO)) { + value |= MSR_ARCH_CAP_FB_CLEAR; + } + return value; default: return value; @@ -867,6 +887,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } @@ -1778,8 +1807,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { @@ -1822,9 +1849,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -1863,7 +1889,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } @@ -2051,6 +2077,15 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -2075,6 +2110,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; @@ -2233,7 +2276,7 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; - if (((env->cpuid_version >> 8)&0xF) >= 6 + if (x86_cpu_family(env->cpuid_version) >= 6 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { uint64_t mcg_cap, unsupported_caps; @@ -2674,6 +2717,7 @@ static MemoryRegion smram_as_mem; static void register_smram_listener(Notifier *n, void *unused) { + CPUState *cpu; MemoryRegion *smram = (MemoryRegion *) object_resolve_path("/machine/smram", NULL); @@ -2697,7 +2741,11 @@ static void register_smram_listener(Notifier *n, void *unused) address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); kvm_memory_listener_register(kvm_state, &smram_listener, - &smram_address_space, 1, "kvm-smram"); + &smram_address_space, X86ASIdx_SMM, "kvm-smram"); + + CPU_FOREACH(cpu) { + cpu_address_space_init(cpu, X86ASIdx_SMM, "cpu-smm", &smram_as_root); + } } static void *kvm_msr_energy_thread(void *data) @@ -3205,16 +3253,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); @@ -3289,8 +3328,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) return ret; } - if (kvm_check_extension(s, KVM_CAP_X86_SMM) && - object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && + if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && x86_machine_is_smm_enabled(X86_MACHINE(ms))) { smram_machine_done.notify = register_smram_listener; qemu_add_machine_init_done_notifier(&smram_machine_done); @@ -3855,36 +3893,38 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } + + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } -static int kvm_put_msrs(X86CPU *cpu, int level) +static int kvm_put_msrs(X86CPU *cpu, KvmPutState level) { CPUX86State *env = &cpu->env; int i; @@ -5004,7 +5044,7 @@ static int kvm_get_apic(X86CPU *cpu) return 0; } -static int kvm_put_vcpu_events(X86CPU *cpu, int level) +static int kvm_put_vcpu_events(X86CPU *cpu, KvmPutState level) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; @@ -5043,7 +5083,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) */ events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; - cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); + cpu_reset_interrupt(cs, CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); } else { /* Keep these in cs->interrupt_request. */ events.smi.pending = 0; @@ -5247,7 +5287,7 @@ static int kvm_get_nested_state(X86CPU *cpu) return ret; } -int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cpu, KvmPutState level, Error **errp) { X86CPU *x86_cpu = X86_CPU(cpu); int ret; @@ -5430,10 +5470,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) int ret; /* Inject NMI */ - if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { bql_lock(); - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); bql_unlock(); DPRINTF("injected NMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_NMI); @@ -5442,9 +5482,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) strerror(-ret)); } } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { bql_lock(); - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); bql_unlock(); DPRINTF("injected SMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_SMI); @@ -5455,32 +5495,31 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_pic_in_kernel()) { - bql_lock(); - } /* Force the VCPU out of its inner loop to process any INIT requests * or (for userspace APIC, but it is cheap to combine the checks here) * pending TPR access reports. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } } if (!kvm_pic_in_kernel()) { /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && - (cpu->interrupt_request & CPU_INTERRUPT_HARD) && + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) { int irq; - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + bql_lock(); + + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { struct kvm_interrupt intr; @@ -5494,13 +5533,14 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) strerror(-ret)); } } + bql_unlock(); } /* If we have an interrupt but the guest is not ready to receive an * interrupt, request an interrupt window exit. This will * cause a return to userspace as soon as the guest is ready to * receive interrupts. */ - if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { run->request_interrupt_window = 1; } else { run->request_interrupt_window = 0; @@ -5508,8 +5548,6 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); - - bql_unlock(); } } @@ -5572,18 +5610,18 @@ int kvm_arch_process_async_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (cs->interrupt_request & CPU_INTERRUPT_MCE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_MCE)) { /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ assert(env->mcg_cap); - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_MCE); kvm_cpu_synchronize_state(cs); if (env->exception_nr == EXCP08_DBLE) { /* this means triple fault */ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - cs->exit_request = 1; + qatomic_set(&cs->exit_request, true); return 0; } kvm_queue_exception(env, EXCP12_MCHK, 0, 0); @@ -5595,7 +5633,7 @@ int kvm_arch_process_async_events(CPUState *cs) } } - if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { kvm_cpu_synchronize_state(cs); do_cpu_init(cpu); @@ -5605,21 +5643,21 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } - if (cs->interrupt_request & CPU_INTERRUPT_POLL) { - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); } - if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 0; } - if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_SIPI)) { kvm_cpu_synchronize_state(cs); do_cpu_sipi(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_TPR) { - cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_TPR); kvm_cpu_synchronize_state(cs); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); @@ -5633,9 +5671,9 @@ static int kvm_handle_halt(X86CPU *cpu) CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; - if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && - !(cs->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 1; return EXCP_HLT; } @@ -5999,9 +6037,11 @@ static bool host_supports_vmx(void) * because private/shared page tracking is already provided through other * means, these 2 use-cases should be treated as being mutually-exclusive. */ -static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run) { + struct kvm_pre_fault_memory mem; uint64_t gpa, size, attributes; + int ret; if (!machine_require_guest_memfd(current_machine)) return -EINVAL; @@ -6012,13 +6052,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); - return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + if (ret || !kvm_pre_fault_memory_supported) { + return ret; + } + + /* + * Opportunistically pre-fault memory in. Failures are ignored so that any + * errors in faulting in the memory will get captured in KVM page fault + * path when the guest first accesses the page. + */ + memset(&mem, 0, sizeof(mem)); + mem.gpa = gpa; + mem.size = size; + while (mem.size) { + if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) { + break; + } + } + + return 0; } -static int kvm_handle_hypercall(struct kvm_run *run) +static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run) { if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) - return kvm_handle_hc_map_gpa_range(run); + return kvm_handle_hc_map_gpa_range(cpu, run); return -EINVAL; } @@ -6118,7 +6177,35 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_hypercall(run); + ret = kvm_handle_hypercall(cpu, run); + break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; + case KVM_EXIT_TDX: + /* + * run->tdx is already set up for the case where userspace + * does not handle the TDVMCALL. + */ + switch (run->tdx.nr) { + case TDVMCALL_GET_QUOTE: + tdx_handle_get_quote(cpu, run); + break; + case TDVMCALL_GET_TD_VM_CALL_INFO: + tdx_handle_get_tdvmcall_info(cpu, run); + break; + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + tdx_handle_setup_event_notify_interrupt(cpu, run); + break; + } + ret = 0; break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8..5f83e88 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) @@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include <linux/kvm.h> + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); @@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996caf..2675bf8 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c new file mode 100644 index 0000000..dee8334 --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.c @@ -0,0 +1,302 @@ +/* + * QEMU TDX Quote Generation Support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" + +#include "tdx-quote-generator.h" + +#define QGS_MSG_LIB_MAJOR_VER 1 +#define QGS_MSG_LIB_MINOR_VER 1 + +typedef enum _qgs_msg_type_t { + GET_QUOTE_REQ = 0, + GET_QUOTE_RESP = 1, + GET_COLLATERAL_REQ = 2, + GET_COLLATERAL_RESP = 3, + GET_PLATFORM_INFO_REQ = 4, + GET_PLATFORM_INFO_RESP = 5, + QGS_MSG_TYPE_MAX +} qgs_msg_type_t; + +typedef struct _qgs_msg_header_t { + uint16_t major_version; + uint16_t minor_version; + uint32_t type; + uint32_t size; // size of the whole message, include this header, in byte + uint32_t error_code; // used in response only +} qgs_msg_header_t; + +typedef struct _qgs_msg_get_quote_req_t { + qgs_msg_header_t header; // header.type = GET_QUOTE_REQ + uint32_t report_size; // cannot be 0 + uint32_t id_list_size; // length of id_list, in byte, can be 0 +} qgs_msg_get_quote_req_t; + +typedef struct _qgs_msg_get_quote_resp_s { + qgs_msg_header_t header; // header.type = GET_QUOTE_RESP + uint32_t selected_id_size; // can be 0 in case only one id is sent in request + uint32_t quote_size; // length of quote_data, in byte + uint8_t id_quote[]; // selected id followed by quote +} qgs_msg_get_quote_resp_t; + +#define HEADER_SIZE 4 + +static uint32_t decode_header(const char *buf, size_t len) { + if (len < HEADER_SIZE) { + return 0; + } + uint32_t msg_size = 0; + for (uint32_t i = 0; i < HEADER_SIZE; ++i) { + msg_size = msg_size * 256 + (buf[i] & 0xFF); + } + return msg_size; +} + +static void encode_header(char *buf, size_t len, uint32_t size) { + assert(len >= HEADER_SIZE); + buf[0] = ((size >> 24) & 0xFF); + buf[1] = ((size >> 16) & 0xFF); + buf[2] = ((size >> 8) & 0xFF); + buf[3] = (size & 0xFF); +} + +static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) +{ + timer_del(&task->timer); + + if (task->watch) { + g_source_remove(task->watch); + } + qio_channel_close(QIO_CHANNEL(task->sioc), NULL); + object_unref(OBJECT(task->sioc)); + + task->completion(task); +} + +static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received, + task->payload_len - task->receive_buf_received, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return G_SOURCE_CONTINUE; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (ret == 0) { + error_report("End of file before reply received"); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + task->receive_buf_received += ret; + if (task->receive_buf_received >= HEADER_SIZE) { + uint32_t len = decode_header(task->receive_buf, + task->receive_buf_received); + if (len == 0 || + len > (task->payload_len - HEADER_SIZE)) { + error_report("Message len %u must be non-zero & less than %zu", + len, (task->payload_len - HEADER_SIZE)); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + /* Now we know the size, shrink to fit */ + task->payload_len = HEADER_SIZE + len; + task->receive_buf = g_renew(char, + task->receive_buf, + task->payload_len); + } + + if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) { + qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE); + if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER || + hdr->minor_version != QGS_MSG_LIB_MINOR_VER) { + error_report("Invalid QGS message header version %d.%d", + hdr->major_version, + hdr->minor_version); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->type != GET_QUOTE_RESP) { + error_report("Invalid QGS message type %d", + hdr->type); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->size > (task->payload_len - HEADER_SIZE)) { + error_report("QGS message size %d exceeds payload capacity %zu", + hdr->size, task->payload_len); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->error_code != 0) { + error_report("QGS message error code %d", + hdr->error_code); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) { + qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE); + if (msg->selected_id_size != 0) { + error_report("QGS message selected ID was %d not 0", + msg->selected_id_size); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) != + msg->quote_size) { + error_report("QGS quote size %d should be %zu", + msg->quote_size, + (task->payload_len - sizeof(qgs_msg_get_quote_resp_t))); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (task->receive_buf_received == task->payload_len) { + size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t); + memmove(task->receive_buf, + task->receive_buf + strip, + task->receive_buf_received - strip); + task->receive_buf_received -= strip; + task->status_code = TDX_VP_GET_QUOTE_SUCCESS; + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + tdx_generate_quote_cleanup(task); + return G_SOURCE_REMOVE; +} + +static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_write(ioc, task->send_data + task->send_data_sent, + task->send_data_size - task->send_data_sent, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); + goto end; + } + } + task->send_data_sent += ret; + + if (task->send_data_sent == task->send_data_size) { + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN, + tdx_get_quote_read, task, NULL); + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + return G_SOURCE_REMOVE; +} + +static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_task_propagate_error(qio_task, &err); + if (ret) { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE; + tdx_generate_quote_cleanup(task); + return; + } + + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT, + tdx_send_report, task, NULL); +} + +#define TRANSACTION_TIMEOUT 30000 + +static void getquote_expired(void *opaque) +{ + TdxGenerateQuoteTask *task = opaque; + + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); +} + +static void setup_get_quote_timer(TdxGenerateQuoteTask *task) +{ + int64_t time; + + timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task); + time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + timer_mod(&task->timer, time + TRANSACTION_TIMEOUT); +} + +void tdx_generate_quote(TdxGenerateQuoteTask *task, + SocketAddress *qg_sock_addr) +{ + QIOChannelSocket *sioc; + qgs_msg_get_quote_req_t msg; + + /* Prepare a QGS message prelude */ + msg.header.major_version = QGS_MSG_LIB_MAJOR_VER; + msg.header.minor_version = QGS_MSG_LIB_MINOR_VER; + msg.header.type = GET_QUOTE_REQ; + msg.header.size = sizeof(msg) + task->send_data_size; + msg.header.error_code = 0; + msg.report_size = task->send_data_size; + msg.id_list_size = 0; + + /* Make room to add the QGS message prelude */ + task->send_data = g_renew(char, + task->send_data, + task->send_data_size + sizeof(msg) + HEADER_SIZE); + memmove(task->send_data + sizeof(msg) + HEADER_SIZE, + task->send_data, + task->send_data_size); + memcpy(task->send_data + HEADER_SIZE, + &msg, + sizeof(msg)); + encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg)); + task->send_data_size += sizeof(msg) + HEADER_SIZE; + + sioc = qio_channel_socket_new(); + task->sioc = sioc; + + setup_get_quote_timer(task); + + qio_channel_socket_connect_async(sioc, qg_sock_addr, + tdx_quote_generator_connected, task, + NULL, NULL); +} diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h new file mode 100644 index 0000000..3bd9b8e --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H +#define QEMU_I386_TDX_QUOTE_GENERATOR_H + +#include "qom/object_interfaces.h" +#include "io/channel-socket.h" +#include "exec/hwaddr.h" + +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL + +#define TDX_VP_GET_QUOTE_SUCCESS 0ULL +#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL) +#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL + +/* Limit to avoid resource starvation. */ +#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024) +#define TDX_MAX_GET_QUOTE_REQUEST 16 + +#define TDX_GET_QUOTE_HDR_SIZE 24 + +/* Format of pages shared with guest. */ +struct tdx_get_quote_header { + /* Format version: must be 1 in little endian. */ + uint64_t structure_version; + + /* + * GetQuote status code in little endian: + * Guest must set error_code to 0 to avoid information leak. + * Qemu sets this before interrupting guest. + */ + uint64_t error_code; + + /* + * in-message size in little endian: The message will follow this header. + * The in-message will be send to QGS. + */ + uint32_t in_len; + + /* + * out-message size in little endian: + * On request, out_len must be zero to avoid information leak. + * On return, message size from QGS. Qemu overwrites this field. + * The message will follows this header. The in-message is overwritten. + */ + uint32_t out_len; + + /* + * Message buffer follows. + * Guest sets message that will be send to QGS. If out_len > in_len, guest + * should zero remaining buffer to avoid information leak. + * Qemu overwrites this buffer with a message returned from QGS. + */ +}; + +typedef struct TdxGenerateQuoteTask { + hwaddr buf_gpa; + hwaddr payload_gpa; + uint64_t payload_len; + + char *send_data; + uint64_t send_data_size; + uint64_t send_data_sent; + + char *receive_buf; + uint64_t receive_buf_received; + + uint64_t status_code; + struct tdx_get_quote_header hdr; + + QIOChannelSocket *sioc; + guint watch; + QEMUTimer timer; + + void (*completion)(struct TdxGenerateQuoteTask *task); + void *opaque; +} TdxGenerateQuoteTask; + +void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr); + +#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */ diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000..1f0e108 --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000..dbf0fa2 --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,1548 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/base64.h" +#include "qemu/mmap-alloc.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qom/object_interfaces.h" +#include "crypto/hash.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/system.h" +#include "system/ramblock.h" +#include "system/address-spaces.h" + +#include <linux/kvm_para.h> + +#include "cpu.h" +#include "cpu-internal.h" +#include "host-cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/tdvf.h" +#include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" +#include "hw/pci/msi.h" +#include "kvm_i386.h" +#include "tdx.h" +#include "tdx-quote-generator.h" + +#include "standard-headers/asm-x86/kvm_para.h" + +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) + +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + +static TdxGuest *tdx_guest; + +static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; + +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) +{ + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; + + return 0; +} + +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, + &error_fatal); + } +} + +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; + + tdx_init_ram_entries(); + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + tdx_post_init_vcpus(); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uintptr_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); +} + +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + TdxGuest *tdx = TDX_GUEST(cgs); + int r = 0; + + kvm_mark_guest_state_protected(); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + if (r) { + return r; + } + } + + tdx_setup_supported_cpuid(); + + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + + tdx_guest = tdx; + return 0; +} + +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + X86CPU *x86cpu = X86_CPU(cpu); + + if (xcc->model) { + error_report("Named cpu model is not supported for TDX yet!"); + exit(1); + } + + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + + x86cpu->force_cpuid_0x1f = true; +} + +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + + return 0; +} + +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " + "(KVM supported: 0x%"PRIx64")", tdx->attributes, + (uint64_t)tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", + tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); +} + +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", + xfam, (uint64_t)tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + size_t data_len; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", + env->tsc_khz); + return r; + } + + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + +static void tdx_inject_interrupt(TdxGuest *tdx) +{ + int ret; + uint32_t apicid, vector; + + qemu_mutex_lock(&tdx->lock); + vector = tdx->event_notify_vector; + apicid = tdx->event_notify_apicid; + qemu_mutex_unlock(&tdx->lock); + if (vector < 32 || vector > 255) { + return; + } + + MSIMessage msg = { + .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) | + (((uint64_t)apicid & 0xffffff00) << 32), + .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT), + }; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + /* In this case, no better way to tell it to guest. Log it. */ + error_report("TDX: injection interrupt %d failed, interrupt lost (%s).", + vector, strerror(-ret)); + } +} + +static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) +{ + TdxGuest *tdx = task->opaque; + int ret; + + /* Maintain the number of in-flight requests. */ + qemu_mutex_lock(&tdx->lock); + tdx->num--; + qemu_mutex_unlock(&tdx->lock); + + if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { + ret = address_space_write(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->receive_buf, + task->receive_buf_received); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to write quote data."); + } else { + task->hdr.out_len = cpu_to_le64(task->receive_buf_received); + } + } + task->hdr.error_code = cpu_to_le64(task->status_code); + + /* Publish the response contents before marking this request completed. */ + smp_wmb(); + ret = address_space_write(&address_space_memory, task->buf_gpa, + MEMTXATTRS_UNSPECIFIED, &task->hdr, + TDX_GET_QUOTE_HDR_SIZE); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to update GetQuote header."); + } + + tdx_inject_interrupt(tdx); + + g_free(task->send_data); + g_free(task->receive_buf); + g_free(task); + object_unref(tdx); +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ + TdxGenerateQuoteTask *task; + struct tdx_get_quote_header hdr; + hwaddr buf_gpa = run->tdx.get_quote.gpa; + uint64_t buf_len = run->tdx.get_quote.size; + + QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); + + run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; + + if (buf_len == 0) { + return; + } + + if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { + run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; + return; + } + + if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: get-quote: failed to read GetQuote header."); + return; + } + + if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { + return; + } + + /* Only safe-guard check to avoid too large buffer size. */ + if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || + le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { + return; + } + + if (!tdx_guest->qg_sock_addr) { + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: failed to update GetQuote header."); + return; + } + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + } + + qemu_mutex_lock(&tdx_guest->lock); + if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; + return; + } + tdx_guest->num++; + qemu_mutex_unlock(&tdx_guest->lock); + + task = g_new(TdxGenerateQuoteTask, 1); + task->buf_gpa = buf_gpa; + task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; + task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; + task->hdr = hdr; + task->completion = tdx_get_quote_completion; + + task->send_data_size = le32_to_cpu(hdr.in_len); + task->send_data = g_malloc(task->send_data_size); + task->send_data_sent = 0; + + if (address_space_read(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->send_data, + task->send_data_size) != MEMTX_OK) { + goto out_free; + } + + /* Mark the buffer in-flight. */ + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + goto out_free; + } + + task->receive_buf = g_malloc0(task->payload_len); + task->receive_buf_received = 0; + task->opaque = tdx_guest; + + object_ref(tdx_guest); + tdx_generate_quote(task, tdx_guest->qg_sock_addr); + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + +out_free: + g_free(task->send_data); + g_free(task); +} + +#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT) +#define SUPPORTED_TDVMCALLINFO_1_R12 (0) + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ + if (run->tdx.get_tdvmcall_info.leaf != 1) { + return; + } + + run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 & + SUPPORTED_TDVMCALLINFO_1_R11) | + tdx_caps->kernel_tdvmcallinfo_1_r11; + run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 & + SUPPORTED_TDVMCALLINFO_1_R12) | + tdx_caps->kernel_tdvmcallinfo_1_r12; + run->tdx.get_tdvmcall_info.r13 = 0; + run->tdx.get_tdvmcall_info.r14 = 0; + + run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS; +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t vector = run->tdx.setup_event_notify.vector; + + if (vector >= 32 && vector < 256) { + qemu_mutex_lock(&tdx_guest->lock); + tdx_guest->event_notify_vector = vector; + tdx_guest->event_notify_apicid = cpu->apic_id; + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS; + } else { + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND; + } +} + +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, bool has_gpa, + uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + panic_info->u.tdx.has_gpa = has_gpa; + + qemu_system_guest_panicked(panic_info); +} + +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + uint64_t gpa = -1ull; + bool has_gpa = false; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + has_gpa = true; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa); + + return -1; +} + +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + +static void tdx_guest_get_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (!tdx->qg_sock_addr) { + error_setg(errp, "quote-generation-socket is not set"); + return; + } + visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); +} + +static void tdx_guest_set_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + SocketAddress *sock = NULL; + + if (!visit_type_SocketAddress(v, name, &sock, errp)) { + return; + } + + if (tdx->qg_sock_addr) { + qapi_free_SocketAddress(tdx->qg_sock_addr); + } + + tdx->qg_sock_addr = sock; +} + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + qemu_mutex_init(&tdx->lock); + + cgs->require_guest_memfd = true; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); + + object_property_add(obj, "quote-generation-socket", "SocketAddress", + tdx_guest_get_qgs, + tdx_guest_set_qgs, + NULL, NULL); + + tdx->event_notify_vector = -1; + tdx->event_notify_apicid = -1; +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = tdx_kvm_init; + x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000..1c38faf --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + +#include "confidential-guest.h" +#include "cpu.h" +#include "hw/i386/tdvf.h" + +#include "tdx-quote-generator.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_GET_QUOTE 0x10002 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004 + +#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL +#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL +#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL +#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL +#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL + +#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1) + +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + QemuMutex lock; + + bool initialized; + uint64_t attributes; /* TD attributes */ + uint64_t xfam; + char *mrconfigid; /* base64 encoded sha384 digest */ + char *mrowner; /* base64 encoded sha384 digest */ + char *mrownerconfig; /* base64 encoded sha384 digest */ + + MemoryRegion *tdvf_mr; + TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; + + /* GetQuote */ + SocketAddress *qg_sock_addr; + int num; + + uint32_t event_notify_vector; + uint32_t event_notify_apicid; +} TdxGuest; + +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run); + +#endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c index 31508d4..890322a 100644 --- a/target/i386/kvm/vmsr_energy.c +++ b/target/i386/kvm/vmsr_energy.c @@ -27,16 +27,6 @@ char *vmsr_compute_default_paths(void) return g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL); } -bool is_host_cpu_intel(void) -{ - int family, model, stepping; - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, &family, &model, &stepping); - - return g_str_equal(vendor, CPUID_VENDOR_INTEL); -} - int is_rapl_enabled(void) { const char *path = "/sys/class/powercap/intel-rapl/enabled"; @@ -67,13 +57,9 @@ QIOChannelSocket *vmsr_open_socket(const char *path) }; QIOChannelSocket *sioc = qio_channel_socket_new(); - Error *local_err = NULL; qio_channel_set_name(QIO_CHANNEL(sioc), "vmsr-helper"); - qio_channel_socket_connect_sync(sioc, - &saddr, - &local_err); - if (local_err) { + if (qio_channel_socket_connect_sync(sioc, &saddr, NULL) < 0) { /* Close socket. */ qio_channel_close(QIO_CHANNEL(sioc), NULL); object_unref(OBJECT(sioc)); @@ -285,7 +271,6 @@ void vmsr_read_thread_stat(pid_t pid, } fclose(file); - return; } /* Read QEMU stat task folder to retrieve all QEMU threads ID */ diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h index 16cc1f4..151bcbd 100644 --- a/target/i386/kvm/vmsr_energy.h +++ b/target/i386/kvm/vmsr_energy.h @@ -94,6 +94,5 @@ double vmsr_get_ratio(uint64_t e_delta, unsigned long long delta_ticks, unsigned int maxticks); void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms); -bool is_host_cpu_intel(void); int is_rapl_enabled(void); #endif /* VMSR_ENERGY_H */ diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index e81a245..52de019 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -13,13 +13,15 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/error-report.h" +#include "exec/target_page.h" #include "hw/xen/xen.h" #include "system/kvm_int.h" #include "system/kvm_xen.h" #include "kvm/kvm_i386.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "xen-emu.h" #include "trace.h" +#include "system/memory.h" #include "system/runstate.h" #include "hw/pci/msi.h" @@ -74,6 +76,7 @@ static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa, static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, bool is_write) { + AddressSpace *as = cpu_addressspace(cs, MEMTXATTRS_UNSPECIFIED); uint8_t *buf = (uint8_t *)_buf; uint64_t gpa; size_t len; @@ -86,7 +89,7 @@ static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, len = sz; } - cpu_physical_memory_rw(gpa, buf, len, is_write); + address_space_rw(as, gpa, MEMTXATTRS_UNSPECIFIED, buf, len, is_write); buf += len; sz -= len; diff --git a/target/i386/machine.c b/target/i386/machine.c index 70f632a..45b7cea 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,7 +7,7 @@ #include "hw/i386/x86.h" #include "kvm/kvm_i386.h" #include "hw/xen/xen.h" - +#include "exec/watchpoint.h" #include "system/kvm.h" #include "system/kvm_xen.h" #include "system/tcg.h" @@ -462,6 +462,24 @@ static const VMStateDescription vmstate_exception_info = { } }; +static bool cpu_errcode_needed(void *opaque) +{ + X86CPU *cpu = opaque; + + return cpu->env.has_error_code != 0; +} + +static const VMStateDescription vmstate_error_code = { + .name = "cpu/error_code", + .version_id = 1, + .minimum_version_id = 1, + .needed = cpu_errcode_needed, + .fields = (const VMStateField[]) { + VMSTATE_INT32(env.error_code, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + /* Poll control MSR enabled by default */ static bool poll_control_msr_needed(void *opaque) { @@ -1060,9 +1078,8 @@ static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); - return env->tsc_khz && x86mc->save_tsc_khz; + + return env->tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { @@ -1747,6 +1764,7 @@ const VMStateDescription vmstate_x86_cpu = { }, .subsections = (const VMStateDescription * const []) { &vmstate_exception_info, + &vmstate_error_code, &vmstate_async_pf_msr, &vmstate_async_pf_int_msr, &vmstate_pv_eoi_msr, diff --git a/target/i386/meson.build b/target/i386/meson.build index 2e9c472..89ba491 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -11,6 +11,9 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_MSHV', if_true: files('host-cpu.c')) i386_system_ss = ss.source_set() i386_system_ss.add(files( @@ -31,6 +34,8 @@ subdir('whpx') subdir('nvmm') subdir('hvf') subdir('tcg') +subdir('emulate') +subdir('mshv') target_arch += {'i386': i386_ss} target_system_arch += {'i386': i386_system_ss} diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 3ea92b0..3c9b6ca 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -29,7 +29,6 @@ #include "monitor/hmp.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" /* Perform linear address sign extension */ diff --git a/target/i386/mshv/meson.build b/target/i386/mshv/meson.build new file mode 100644 index 0000000..647e5da --- /dev/null +++ b/target/i386/mshv/meson.build @@ -0,0 +1,8 @@ +i386_mshv_ss = ss.source_set() + +i386_mshv_ss.add(files( + 'mshv-cpu.c', + 'x86.c', +)) + +i386_system_ss.add_all(when: 'CONFIG_MSHV', if_true: i386_mshv_ss) diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c new file mode 100644 index 0000000..1f7b9cb --- /dev/null +++ b/target/i386/mshv/mshv-cpu.c @@ -0,0 +1,1763 @@ +/* + * QEMU MSHV support + * + * Copyright Microsoft, Corp. 2025 + * + * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com> + * Magnus Kulke <magnuskulke@microsoft.com> + * Jinank Jain <jinankjain@microsoft.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/memalign.h" +#include "qemu/typedefs.h" + +#include "system/mshv.h" +#include "system/mshv_int.h" +#include "system/address-spaces.h" +#include "linux/mshv.h" +#include "hw/hyperv/hvgdk.h" +#include "hw/hyperv/hvgdk_mini.h" +#include "hw/hyperv/hvhdk_mini.h" +#include "hw/i386/apic_internal.h" + +#include "cpu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" +#include "emulate/x86_flags.h" + +#include "trace-accel_mshv.h" +#include "trace.h" + +#include <sys/ioctl.h> + +#define MAX_REGISTER_COUNT (MAX_CONST(ARRAY_SIZE(STANDARD_REGISTER_NAMES), \ + MAX_CONST(ARRAY_SIZE(SPECIAL_REGISTER_NAMES), \ + ARRAY_SIZE(FPU_REGISTER_NAMES)))) + +static enum hv_register_name STANDARD_REGISTER_NAMES[18] = { + HV_X64_REGISTER_RAX, + HV_X64_REGISTER_RBX, + HV_X64_REGISTER_RCX, + HV_X64_REGISTER_RDX, + HV_X64_REGISTER_RSI, + HV_X64_REGISTER_RDI, + HV_X64_REGISTER_RSP, + HV_X64_REGISTER_RBP, + HV_X64_REGISTER_R8, + HV_X64_REGISTER_R9, + HV_X64_REGISTER_R10, + HV_X64_REGISTER_R11, + HV_X64_REGISTER_R12, + HV_X64_REGISTER_R13, + HV_X64_REGISTER_R14, + HV_X64_REGISTER_R15, + HV_X64_REGISTER_RIP, + HV_X64_REGISTER_RFLAGS, +}; + +static enum hv_register_name SPECIAL_REGISTER_NAMES[17] = { + HV_X64_REGISTER_CS, + HV_X64_REGISTER_DS, + HV_X64_REGISTER_ES, + HV_X64_REGISTER_FS, + HV_X64_REGISTER_GS, + HV_X64_REGISTER_SS, + HV_X64_REGISTER_TR, + HV_X64_REGISTER_LDTR, + HV_X64_REGISTER_GDTR, + HV_X64_REGISTER_IDTR, + HV_X64_REGISTER_CR0, + HV_X64_REGISTER_CR2, + HV_X64_REGISTER_CR3, + HV_X64_REGISTER_CR4, + HV_X64_REGISTER_CR8, + HV_X64_REGISTER_EFER, + HV_X64_REGISTER_APIC_BASE, +}; + +static enum hv_register_name FPU_REGISTER_NAMES[26] = { + HV_X64_REGISTER_XMM0, + HV_X64_REGISTER_XMM1, + HV_X64_REGISTER_XMM2, + HV_X64_REGISTER_XMM3, + HV_X64_REGISTER_XMM4, + HV_X64_REGISTER_XMM5, + HV_X64_REGISTER_XMM6, + HV_X64_REGISTER_XMM7, + HV_X64_REGISTER_XMM8, + HV_X64_REGISTER_XMM9, + HV_X64_REGISTER_XMM10, + HV_X64_REGISTER_XMM11, + HV_X64_REGISTER_XMM12, + HV_X64_REGISTER_XMM13, + HV_X64_REGISTER_XMM14, + HV_X64_REGISTER_XMM15, + HV_X64_REGISTER_FP_MMX0, + HV_X64_REGISTER_FP_MMX1, + HV_X64_REGISTER_FP_MMX2, + HV_X64_REGISTER_FP_MMX3, + HV_X64_REGISTER_FP_MMX4, + HV_X64_REGISTER_FP_MMX5, + HV_X64_REGISTER_FP_MMX6, + HV_X64_REGISTER_FP_MMX7, + HV_X64_REGISTER_FP_CONTROL_STATUS, + HV_X64_REGISTER_XMM_CONTROL_STATUS, +}; + +static int translate_gva(const CPUState *cpu, uint64_t gva, uint64_t *gpa, + uint64_t flags) +{ + int ret; + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + + hv_input_translate_virtual_address in = { 0 }; + hv_output_translate_virtual_address out = { 0 }; + struct mshv_root_hvcall args = {0}; + uint64_t gva_page = gva >> HV_HYP_PAGE_SHIFT; + + in.vp_index = vp_index; + in.control_flags = flags; + in.gva_page = gva_page; + + /* create the hvcall envelope */ + args.code = HVCALL_TRANSLATE_VIRTUAL_ADDRESS; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t) ∈ + args.out_sz = sizeof(out); + args.out_ptr = (uint64_t) &out; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to invoke gva->gpa translation"); + return -errno; + } + + if (out.translation_result.result_code != HV_TRANSLATE_GVA_SUCCESS) { + error_report("Failed to translate gva (" TARGET_FMT_lx ") to gpa", gva); + return -1; + } + + *gpa = ((out.gpa_page << HV_HYP_PAGE_SHIFT) + | (gva & ~(uint64_t)HV_HYP_PAGE_MASK)); + + return 0; +} + +int mshv_set_generic_regs(const CPUState *cpu, const hv_register_assoc *assocs, + size_t n_regs) +{ + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + size_t in_sz, assocs_sz; + hv_input_set_vp_registers *in = cpu->accel->hvcall_args.input_page; + struct mshv_root_hvcall args = {0}; + int ret; + + /* find out the size of the struct w/ a flexible array at the tail */ + assocs_sz = n_regs * sizeof(hv_register_assoc); + in_sz = sizeof(hv_input_set_vp_registers) + assocs_sz; + + /* fill the input struct */ + memset(in, 0, sizeof(hv_input_set_vp_registers)); + in->vp_index = vp_index; + memcpy(in->elements, assocs, assocs_sz); + + /* create the hvcall envelope */ + args.code = HVCALL_SET_VP_REGISTERS; + args.in_sz = in_sz; + args.in_ptr = (uint64_t) in; + args.reps = (uint16_t) n_regs; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to set registers"); + return -1; + } + + /* assert we set all registers */ + if (args.reps != n_regs) { + error_report("Failed to set registers: expected %zu elements" + ", got %u", n_regs, args.reps); + return -1; + } + + return 0; +} + +static int get_generic_regs(CPUState *cpu, hv_register_assoc *assocs, + size_t n_regs) +{ + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + hv_input_get_vp_registers *in = cpu->accel->hvcall_args.input_page; + hv_register_value *values = cpu->accel->hvcall_args.output_page; + size_t in_sz, names_sz, values_sz; + int i, ret; + struct mshv_root_hvcall args = {0}; + + /* find out the size of the struct w/ a flexible array at the tail */ + names_sz = n_regs * sizeof(hv_register_name); + in_sz = sizeof(hv_input_get_vp_registers) + names_sz; + + /* fill the input struct */ + memset(in, 0, sizeof(hv_input_get_vp_registers)); + in->vp_index = vp_index; + for (i = 0; i < n_regs; i++) { + in->names[i] = assocs[i].name; + } + + /* determine size of value output buffer */ + values_sz = n_regs * sizeof(union hv_register_value); + + /* create the hvcall envelope */ + args.code = HVCALL_GET_VP_REGISTERS; + args.in_sz = in_sz; + args.in_ptr = (uint64_t) in; + args.out_sz = values_sz; + args.out_ptr = (uint64_t) values; + args.reps = (uint16_t) n_regs; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to retrieve registers"); + return -1; + } + + /* assert we got all registers */ + if (args.reps != n_regs) { + error_report("Failed to retrieve registers: expected %zu elements" + ", got %u", n_regs, args.reps); + return -1; + } + + /* copy values into assoc */ + for (i = 0; i < n_regs; i++) { + assocs[i].value = values[i]; + } + + return 0; +} + +static int set_standard_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + hv_register_assoc assocs[ARRAY_SIZE(STANDARD_REGISTER_NAMES)]; + int ret; + size_t n_regs = ARRAY_SIZE(STANDARD_REGISTER_NAMES); + + /* set names */ + for (size_t i = 0; i < ARRAY_SIZE(STANDARD_REGISTER_NAMES); i++) { + assocs[i].name = STANDARD_REGISTER_NAMES[i]; + } + assocs[0].value.reg64 = env->regs[R_EAX]; + assocs[1].value.reg64 = env->regs[R_EBX]; + assocs[2].value.reg64 = env->regs[R_ECX]; + assocs[3].value.reg64 = env->regs[R_EDX]; + assocs[4].value.reg64 = env->regs[R_ESI]; + assocs[5].value.reg64 = env->regs[R_EDI]; + assocs[6].value.reg64 = env->regs[R_ESP]; + assocs[7].value.reg64 = env->regs[R_EBP]; + assocs[8].value.reg64 = env->regs[R_R8]; + assocs[9].value.reg64 = env->regs[R_R9]; + assocs[10].value.reg64 = env->regs[R_R10]; + assocs[11].value.reg64 = env->regs[R_R11]; + assocs[12].value.reg64 = env->regs[R_R12]; + assocs[13].value.reg64 = env->regs[R_R13]; + assocs[14].value.reg64 = env->regs[R_R14]; + assocs[15].value.reg64 = env->regs[R_R15]; + assocs[16].value.reg64 = env->eip; + lflags_to_rflags(env); + assocs[17].value.reg64 = env->eflags; + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set standard registers"); + return -errno; + } + return 0; +} + +int mshv_store_regs(CPUState *cpu) +{ + int ret; + + ret = set_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to store standard registers"); + return -1; + } + + return 0; +} + +static void populate_standard_regs(const hv_register_assoc *assocs, + CPUX86State *env) +{ + env->regs[R_EAX] = assocs[0].value.reg64; + env->regs[R_EBX] = assocs[1].value.reg64; + env->regs[R_ECX] = assocs[2].value.reg64; + env->regs[R_EDX] = assocs[3].value.reg64; + env->regs[R_ESI] = assocs[4].value.reg64; + env->regs[R_EDI] = assocs[5].value.reg64; + env->regs[R_ESP] = assocs[6].value.reg64; + env->regs[R_EBP] = assocs[7].value.reg64; + env->regs[R_R8] = assocs[8].value.reg64; + env->regs[R_R9] = assocs[9].value.reg64; + env->regs[R_R10] = assocs[10].value.reg64; + env->regs[R_R11] = assocs[11].value.reg64; + env->regs[R_R12] = assocs[12].value.reg64; + env->regs[R_R13] = assocs[13].value.reg64; + env->regs[R_R14] = assocs[14].value.reg64; + env->regs[R_R15] = assocs[15].value.reg64; + + env->eip = assocs[16].value.reg64; + env->eflags = assocs[17].value.reg64; + rflags_to_lflags(env); +} + +int mshv_get_standard_regs(CPUState *cpu) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(STANDARD_REGISTER_NAMES)]; + int ret; + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + size_t n_regs = ARRAY_SIZE(STANDARD_REGISTER_NAMES); + + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = STANDARD_REGISTER_NAMES[i]; + } + ret = get_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to get standard registers"); + return -1; + } + + populate_standard_regs(assocs, env); + return 0; +} + +static inline void populate_segment_reg(const hv_x64_segment_register *hv_seg, + SegmentCache *seg) +{ + memset(seg, 0, sizeof(SegmentCache)); + + seg->base = hv_seg->base; + seg->limit = hv_seg->limit; + seg->selector = hv_seg->selector; + + seg->flags = (hv_seg->segment_type << DESC_TYPE_SHIFT) + | (hv_seg->present * DESC_P_MASK) + | (hv_seg->descriptor_privilege_level << DESC_DPL_SHIFT) + | (hv_seg->_default << DESC_B_SHIFT) + | (hv_seg->non_system_segment * DESC_S_MASK) + | (hv_seg->_long << DESC_L_SHIFT) + | (hv_seg->granularity * DESC_G_MASK) + | (hv_seg->available * DESC_AVL_MASK); + +} + +static inline void populate_table_reg(const hv_x64_table_register *hv_seg, + SegmentCache *tbl) +{ + memset(tbl, 0, sizeof(SegmentCache)); + + tbl->base = hv_seg->base; + tbl->limit = hv_seg->limit; +} + +static void populate_special_regs(const hv_register_assoc *assocs, + X86CPU *x86cpu) +{ + CPUX86State *env = &x86cpu->env; + + populate_segment_reg(&assocs[0].value.segment, &env->segs[R_CS]); + populate_segment_reg(&assocs[1].value.segment, &env->segs[R_DS]); + populate_segment_reg(&assocs[2].value.segment, &env->segs[R_ES]); + populate_segment_reg(&assocs[3].value.segment, &env->segs[R_FS]); + populate_segment_reg(&assocs[4].value.segment, &env->segs[R_GS]); + populate_segment_reg(&assocs[5].value.segment, &env->segs[R_SS]); + + populate_segment_reg(&assocs[6].value.segment, &env->tr); + populate_segment_reg(&assocs[7].value.segment, &env->ldt); + + populate_table_reg(&assocs[8].value.table, &env->gdt); + populate_table_reg(&assocs[9].value.table, &env->idt); + + env->cr[0] = assocs[10].value.reg64; + env->cr[2] = assocs[11].value.reg64; + env->cr[3] = assocs[12].value.reg64; + env->cr[4] = assocs[13].value.reg64; + + cpu_set_apic_tpr(x86cpu->apic_state, assocs[14].value.reg64); + env->efer = assocs[15].value.reg64; + cpu_set_apic_base(x86cpu->apic_state, assocs[16].value.reg64); +} + + +int mshv_get_special_regs(CPUState *cpu) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(SPECIAL_REGISTER_NAMES)]; + int ret; + X86CPU *x86cpu = X86_CPU(cpu); + size_t n_regs = ARRAY_SIZE(SPECIAL_REGISTER_NAMES); + + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = SPECIAL_REGISTER_NAMES[i]; + } + ret = get_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to get special registers"); + return -errno; + } + + populate_special_regs(assocs, x86cpu); + return 0; +} + +int mshv_load_regs(CPUState *cpu) +{ + int ret; + + ret = mshv_get_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to load standard registers"); + return -1; + } + + ret = mshv_get_special_regs(cpu); + if (ret < 0) { + error_report("Failed to load special registers"); + return -1; + } + + return 0; +} + +static void add_cpuid_entry(GList *cpuid_entries, + uint32_t function, uint32_t index, + uint32_t eax, uint32_t ebx, + uint32_t ecx, uint32_t edx) +{ + struct hv_cpuid_entry *entry; + + entry = g_malloc0(sizeof(struct hv_cpuid_entry)); + entry->function = function; + entry->index = index; + entry->eax = eax; + entry->ebx = ebx; + entry->ecx = ecx; + entry->edx = edx; + + cpuid_entries = g_list_append(cpuid_entries, entry); +} + +static void collect_cpuid_entries(const CPUState *cpu, GList *cpuid_entries) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint32_t eax, ebx, ecx, edx; + uint32_t leaf, subleaf; + size_t max_leaf = 0x1F; + size_t max_subleaf = 0x20; + + uint32_t leaves_with_subleaves[] = {0x4, 0x7, 0xD, 0xF, 0x10}; + int n_subleaf_leaves = ARRAY_SIZE(leaves_with_subleaves); + + /* Regular leaves without subleaves */ + for (leaf = 0; leaf <= max_leaf; leaf++) { + bool has_subleaves = false; + for (int i = 0; i < n_subleaf_leaves; i++) { + if (leaf == leaves_with_subleaves[i]) { + has_subleaves = true; + break; + } + } + + if (!has_subleaves) { + cpu_x86_cpuid(env, leaf, 0, &eax, &ebx, &ecx, &edx); + if (eax == 0 && ebx == 0 && ecx == 0 && edx == 0) { + /* all zeroes indicates no more leaves */ + continue; + } + + add_cpuid_entry(cpuid_entries, leaf, 0, eax, ebx, ecx, edx); + continue; + } + + subleaf = 0; + while (subleaf < max_subleaf) { + cpu_x86_cpuid(env, leaf, subleaf, &eax, &ebx, &ecx, &edx); + + if (eax == 0 && ebx == 0 && ecx == 0 && edx == 0) { + /* all zeroes indicates no more leaves */ + break; + } + add_cpuid_entry(cpuid_entries, leaf, 0, eax, ebx, ecx, edx); + subleaf++; + } + } +} + +static int register_intercept_result_cpuid_entry(const CPUState *cpu, + uint8_t subleaf_specific, + uint8_t always_override, + struct hv_cpuid_entry *entry) +{ + int ret; + int vp_index = cpu->cpu_index; + int cpu_fd = mshv_vcpufd(cpu); + + struct hv_register_x64_cpuid_result_parameters cpuid_params = { + .input.eax = entry->function, + .input.ecx = entry->index, + .input.subleaf_specific = subleaf_specific, + .input.always_override = always_override, + .input.padding = 0, + /* + * With regard to masks - these are to specify bits to be overwritten + * The current CpuidEntry structure wouldn't allow to carry the masks + * in addition to the actual register values. For this reason, the + * masks are set to the exact values of the corresponding register bits + * to be registered for an overwrite. To view resulting values the + * hypervisor would return, HvCallGetVpCpuidValues hypercall can be + * used. + */ + .result.eax = entry->eax, + .result.eax_mask = entry->eax, + .result.ebx = entry->ebx, + .result.ebx_mask = entry->ebx, + .result.ecx = entry->ecx, + .result.ecx_mask = entry->ecx, + .result.edx = entry->edx, + .result.edx_mask = entry->edx, + }; + union hv_register_intercept_result_parameters parameters = { + .cpuid = cpuid_params, + }; + + hv_input_register_intercept_result in = {0}; + in.vp_index = vp_index; + in.intercept_type = HV_INTERCEPT_TYPE_X64_CPUID; + in.parameters = parameters; + + struct mshv_root_hvcall args = {0}; + args.code = HVCALL_REGISTER_INTERCEPT_RESULT; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t)∈ + + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("failed to register intercept result for cpuid"); + return -1; + } + + return 0; +} + +static int register_intercept_result_cpuid(const CPUState *cpu, + struct hv_cpuid *cpuid) +{ + int ret = 0, entry_ret; + struct hv_cpuid_entry *entry; + uint8_t subleaf_specific, always_override; + + for (size_t i = 0; i < cpuid->nent; i++) { + entry = &cpuid->entries[i]; + + /* set defaults */ + subleaf_specific = 0; + always_override = 1; + + /* Intel */ + /* 0xb - Extended Topology Enumeration Leaf */ + /* 0x1f - V2 Extended Topology Enumeration Leaf */ + /* AMD */ + /* 0x8000_001e - Processor Topology Information */ + /* 0x8000_0026 - Extended CPU Topology */ + if (entry->function == 0xb + || entry->function == 0x1f + || entry->function == 0x8000001e + || entry->function == 0x80000026) { + subleaf_specific = 1; + always_override = 1; + } else if (entry->function == 0x00000001 + || entry->function == 0x80000000 + || entry->function == 0x80000001 + || entry->function == 0x80000008) { + subleaf_specific = 0; + always_override = 1; + } + + entry_ret = register_intercept_result_cpuid_entry(cpu, subleaf_specific, + always_override, + entry); + if ((entry_ret < 0) && (ret == 0)) { + ret = entry_ret; + } + } + + return ret; +} + +static int set_cpuid2(const CPUState *cpu) +{ + int ret; + size_t n_entries, cpuid_size; + struct hv_cpuid *cpuid; + struct hv_cpuid_entry *entry; + GList *entries = NULL; + + collect_cpuid_entries(cpu, entries); + n_entries = g_list_length(entries); + + cpuid_size = sizeof(struct hv_cpuid) + + n_entries * sizeof(struct hv_cpuid_entry); + + cpuid = g_malloc0(cpuid_size); + cpuid->nent = n_entries; + cpuid->padding = 0; + + for (size_t i = 0; i < n_entries; i++) { + entry = g_list_nth_data(entries, i); + cpuid->entries[i] = *entry; + g_free(entry); + } + g_list_free(entries); + + ret = register_intercept_result_cpuid(cpu, cpuid); + g_free(cpuid); + if (ret < 0) { + return ret; + } + + return 0; +} + +static inline void populate_hv_segment_reg(SegmentCache *seg, + hv_x64_segment_register *hv_reg) +{ + uint32_t flags = seg->flags; + + hv_reg->base = seg->base; + hv_reg->limit = seg->limit; + hv_reg->selector = seg->selector; + hv_reg->segment_type = (flags >> DESC_TYPE_SHIFT) & 0xF; + hv_reg->non_system_segment = (flags & DESC_S_MASK) != 0; + hv_reg->descriptor_privilege_level = (flags >> DESC_DPL_SHIFT) & 0x3; + hv_reg->present = (flags & DESC_P_MASK) != 0; + hv_reg->reserved = 0; + hv_reg->available = (flags & DESC_AVL_MASK) != 0; + hv_reg->_long = (flags >> DESC_L_SHIFT) & 0x1; + hv_reg->_default = (flags >> DESC_B_SHIFT) & 0x1; + hv_reg->granularity = (flags & DESC_G_MASK) != 0; +} + +static inline void populate_hv_table_reg(const struct SegmentCache *seg, + hv_x64_table_register *hv_reg) +{ + memset(hv_reg, 0, sizeof(*hv_reg)); + + hv_reg->base = seg->base; + hv_reg->limit = seg->limit; +} + +static int set_special_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + struct hv_register_assoc assocs[ARRAY_SIZE(SPECIAL_REGISTER_NAMES)]; + size_t n_regs = ARRAY_SIZE(SPECIAL_REGISTER_NAMES); + int ret; + + /* set names */ + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = SPECIAL_REGISTER_NAMES[i]; + } + populate_hv_segment_reg(&env->segs[R_CS], &assocs[0].value.segment); + populate_hv_segment_reg(&env->segs[R_DS], &assocs[1].value.segment); + populate_hv_segment_reg(&env->segs[R_ES], &assocs[2].value.segment); + populate_hv_segment_reg(&env->segs[R_FS], &assocs[3].value.segment); + populate_hv_segment_reg(&env->segs[R_GS], &assocs[4].value.segment); + populate_hv_segment_reg(&env->segs[R_SS], &assocs[5].value.segment); + populate_hv_segment_reg(&env->tr, &assocs[6].value.segment); + populate_hv_segment_reg(&env->ldt, &assocs[7].value.segment); + + populate_hv_table_reg(&env->gdt, &assocs[8].value.table); + populate_hv_table_reg(&env->idt, &assocs[9].value.table); + + assocs[10].value.reg64 = env->cr[0]; + assocs[11].value.reg64 = env->cr[2]; + assocs[12].value.reg64 = env->cr[3]; + assocs[13].value.reg64 = env->cr[4]; + assocs[14].value.reg64 = cpu_get_apic_tpr(x86cpu->apic_state); + assocs[15].value.reg64 = env->efer; + assocs[16].value.reg64 = cpu_get_apic_base(x86cpu->apic_state); + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set special registers"); + return -1; + } + + return 0; +} + +static int set_fpu(const CPUState *cpu, const struct MshvFPU *regs) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(FPU_REGISTER_NAMES)]; + union hv_register_value *value; + size_t fp_i; + union hv_x64_fp_control_status_register *ctrl_status; + union hv_x64_xmm_control_status_register *xmm_ctrl_status; + int ret; + size_t n_regs = ARRAY_SIZE(FPU_REGISTER_NAMES); + + /* first 16 registers are xmm0-xmm15 */ + for (size_t i = 0; i < 16; i++) { + assocs[i].name = FPU_REGISTER_NAMES[i]; + value = &assocs[i].value; + memcpy(&value->reg128, ®s->xmm[i], 16); + } + + /* next 8 registers are fp_mmx0-fp_mmx7 */ + for (size_t i = 16; i < 24; i++) { + assocs[i].name = FPU_REGISTER_NAMES[i]; + fp_i = (i - 16); + value = &assocs[i].value; + memcpy(&value->reg128, ®s->fpr[fp_i], 16); + } + + /* last two registers are fp_control_status and xmm_control_status */ + assocs[24].name = FPU_REGISTER_NAMES[24]; + value = &assocs[24].value; + ctrl_status = &value->fp_control_status; + ctrl_status->fp_control = regs->fcw; + ctrl_status->fp_status = regs->fsw; + ctrl_status->fp_tag = regs->ftwx; + ctrl_status->reserved = 0; + ctrl_status->last_fp_op = regs->last_opcode; + ctrl_status->last_fp_rip = regs->last_ip; + + assocs[25].name = FPU_REGISTER_NAMES[25]; + value = &assocs[25].value; + xmm_ctrl_status = &value->xmm_control_status; + xmm_ctrl_status->xmm_status_control = regs->mxcsr; + xmm_ctrl_status->xmm_status_control_mask = 0; + xmm_ctrl_status->last_fp_rdp = regs->last_dp; + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set fpu registers"); + return -1; + } + + return 0; +} + +static int set_xc_reg(const CPUState *cpu, uint64_t xcr0) +{ + int ret; + struct hv_register_assoc assoc = { + .name = HV_X64_REGISTER_XFEM, + .value.reg64 = xcr0, + }; + + ret = mshv_set_generic_regs(cpu, &assoc, 1); + if (ret < 0) { + error_report("failed to set xcr0"); + return -errno; + } + return 0; +} + +static int set_cpu_state(const CPUState *cpu, const MshvFPU *fpu_regs, + uint64_t xcr0) +{ + int ret; + + ret = set_standard_regs(cpu); + if (ret < 0) { + return ret; + } + ret = set_special_regs(cpu); + if (ret < 0) { + return ret; + } + ret = set_fpu(cpu, fpu_regs); + if (ret < 0) { + return ret; + } + ret = set_xc_reg(cpu, xcr0); + if (ret < 0) { + return ret; + } + return 0; +} + +static int get_vp_state(int cpu_fd, struct mshv_get_set_vp_state *state) +{ + int ret; + + ret = ioctl(cpu_fd, MSHV_GET_VP_STATE, state); + if (ret < 0) { + error_report("failed to get partition state: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int get_lapic(int cpu_fd, + struct hv_local_interrupt_controller_state *state) +{ + int ret; + size_t size = 4096; + /* buffer aligned to 4k, as *state requires that */ + void *buffer = qemu_memalign(size, size); + struct mshv_get_set_vp_state mshv_state = { 0 }; + + mshv_state.buf_ptr = (uint64_t) buffer; + mshv_state.buf_sz = size; + mshv_state.type = MSHV_VP_STATE_LAPIC; + + ret = get_vp_state(cpu_fd, &mshv_state); + if (ret == 0) { + memcpy(state, buffer, sizeof(*state)); + } + qemu_vfree(buffer); + if (ret < 0) { + error_report("failed to get lapic"); + return -1; + } + + return 0; +} + +static uint32_t set_apic_delivery_mode(uint32_t reg, uint32_t mode) +{ + return ((reg) & ~0x700) | ((mode) << 8); +} + +static int set_vp_state(int cpu_fd, const struct mshv_get_set_vp_state *state) +{ + int ret; + + ret = ioctl(cpu_fd, MSHV_SET_VP_STATE, state); + if (ret < 0) { + error_report("failed to set partition state: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int set_lapic(int cpu_fd, + const struct hv_local_interrupt_controller_state *state) +{ + int ret; + size_t size = 4096; + /* buffer aligned to 4k, as *state requires that */ + void *buffer = qemu_memalign(size, size); + struct mshv_get_set_vp_state mshv_state = { 0 }; + + if (!state) { + error_report("lapic state is NULL"); + return -1; + } + memcpy(buffer, state, sizeof(*state)); + + mshv_state.buf_ptr = (uint64_t) buffer; + mshv_state.buf_sz = size; + mshv_state.type = MSHV_VP_STATE_LAPIC; + + ret = set_vp_state(cpu_fd, &mshv_state); + qemu_vfree(buffer); + if (ret < 0) { + error_report("failed to set lapic: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int set_lint(int cpu_fd) +{ + int ret; + uint32_t *lvt_lint0, *lvt_lint1; + + struct hv_local_interrupt_controller_state lapic_state = { 0 }; + ret = get_lapic(cpu_fd, &lapic_state); + if (ret < 0) { + return ret; + } + + lvt_lint0 = &lapic_state.apic_lvt_lint0; + *lvt_lint0 = set_apic_delivery_mode(*lvt_lint0, APIC_DM_EXTINT); + + lvt_lint1 = &lapic_state.apic_lvt_lint1; + *lvt_lint1 = set_apic_delivery_mode(*lvt_lint1, APIC_DM_NMI); + + /* TODO: should we skip setting lapic if the values are the same? */ + + return set_lapic(cpu_fd, &lapic_state); +} + +static int setup_msrs(const CPUState *cpu) +{ + int ret; + uint64_t default_type = MSR_MTRR_ENABLE | MSR_MTRR_MEM_TYPE_WB; + + /* boot msr entries */ + MshvMsrEntry msrs[9] = { + { .index = IA32_MSR_SYSENTER_CS, .data = 0x0, }, + { .index = IA32_MSR_SYSENTER_ESP, .data = 0x0, }, + { .index = IA32_MSR_SYSENTER_EIP, .data = 0x0, }, + { .index = IA32_MSR_STAR, .data = 0x0, }, + { .index = IA32_MSR_CSTAR, .data = 0x0, }, + { .index = IA32_MSR_LSTAR, .data = 0x0, }, + { .index = IA32_MSR_KERNEL_GS_BASE, .data = 0x0, }, + { .index = IA32_MSR_SFMASK, .data = 0x0, }, + { .index = IA32_MSR_MTRR_DEF_TYPE, .data = default_type, }, + }; + + ret = mshv_configure_msr(cpu, msrs, 9); + if (ret < 0) { + error_report("failed to setup msrs"); + return -1; + } + + return 0; +} + +/* + * TODO: populate topology info: + * + * X86CPU *x86cpu = X86_CPU(cpu); + * CPUX86State *env = &x86cpu->env; + * X86CPUTopoInfo *topo_info = &env->topo_info; + */ +int mshv_configure_vcpu(const CPUState *cpu, const struct MshvFPU *fpu, + uint64_t xcr0) +{ + int ret; + int cpu_fd = mshv_vcpufd(cpu); + + ret = set_cpuid2(cpu); + if (ret < 0) { + error_report("failed to set cpuid"); + return -1; + } + + ret = setup_msrs(cpu); + if (ret < 0) { + error_report("failed to setup msrs"); + return -1; + } + + ret = set_cpu_state(cpu, fpu, xcr0); + if (ret < 0) { + error_report("failed to set cpu state"); + return -1; + } + + ret = set_lint(cpu_fd); + if (ret < 0) { + error_report("failed to set lpic int"); + return -1; + } + + return 0; +} + +static int put_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + MshvFPU fpu = {0}; + int ret; + + memset(&fpu, 0, sizeof(fpu)); + + ret = mshv_configure_vcpu(cpu, &fpu, env->xcr0); + if (ret < 0) { + error_report("failed to configure vcpu"); + return ret; + } + + return 0; +} + +struct MsrPair { + uint32_t index; + uint64_t value; +}; + +static int put_msrs(const CPUState *cpu) +{ + int ret = 0; + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + MshvMsrEntries *msrs = g_malloc0(sizeof(MshvMsrEntries)); + + struct MsrPair pairs[] = { + { MSR_IA32_SYSENTER_CS, env->sysenter_cs }, + { MSR_IA32_SYSENTER_ESP, env->sysenter_esp }, + { MSR_IA32_SYSENTER_EIP, env->sysenter_eip }, + { MSR_EFER, env->efer }, + { MSR_PAT, env->pat }, + { MSR_STAR, env->star }, + { MSR_CSTAR, env->cstar }, + { MSR_LSTAR, env->lstar }, + { MSR_KERNELGSBASE, env->kernelgsbase }, + { MSR_FMASK, env->fmask }, + { MSR_MTRRdefType, env->mtrr_deftype }, + { MSR_VM_HSAVE_PA, env->vm_hsave }, + { MSR_SMI_COUNT, env->msr_smi_count }, + { MSR_IA32_PKRS, env->pkrs }, + { MSR_IA32_BNDCFGS, env->msr_bndcfgs }, + { MSR_IA32_XSS, env->xss }, + { MSR_IA32_UMWAIT_CONTROL, env->umwait }, + { MSR_IA32_TSX_CTRL, env->tsx_ctrl }, + { MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr }, + { MSR_TSC_AUX, env->tsc_aux }, + { MSR_TSC_ADJUST, env->tsc_adjust }, + { MSR_IA32_SMBASE, env->smbase }, + { MSR_IA32_SPEC_CTRL, env->spec_ctrl }, + { MSR_VIRT_SSBD, env->virt_ssbd }, + }; + + if (ARRAY_SIZE(pairs) > MSHV_MSR_ENTRIES_COUNT) { + error_report("MSR entries exceed maximum size"); + g_free(msrs); + return -1; + } + + for (size_t i = 0; i < ARRAY_SIZE(pairs); i++) { + MshvMsrEntry *entry = &msrs->entries[i]; + entry->index = pairs[i].index; + entry->reserved = 0; + entry->data = pairs[i].value; + msrs->nmsrs++; + } + + ret = mshv_configure_msr(cpu, &msrs->entries[0], msrs->nmsrs); + g_free(msrs); + return ret; +} + + +int mshv_arch_put_registers(const CPUState *cpu) +{ + int ret; + + ret = put_regs(cpu); + if (ret < 0) { + error_report("Failed to put registers"); + return -1; + } + + ret = put_msrs(cpu); + if (ret < 0) { + error_report("Failed to put msrs"); + return -1; + } + + return 0; +} + +void mshv_arch_amend_proc_features( + union hv_partition_synthetic_processor_features *features) +{ + features->access_guest_idle_reg = 1; +} + +static int set_memory_info(const struct hyperv_message *msg, + struct hv_x64_memory_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_GPA_INTERCEPT + && msg->header.message_type != HVMSG_UNMAPPED_GPA + && msg->header.message_type != HVMSG_UNACCEPTED_GPA) { + error_report("invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +static int emulate_instruction(CPUState *cpu, + const uint8_t *insn_bytes, size_t insn_len, + uint64_t gva, uint64_t gpa) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct x86_decode decode = { 0 }; + int ret; + x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len }; + + ret = mshv_load_regs(cpu); + if (ret < 0) { + error_report("failed to load registers"); + return -1; + } + + decode_instruction_stream(env, &decode, &stream); + exec_instruction(env, &decode); + + ret = mshv_store_regs(cpu); + if (ret < 0) { + error_report("failed to store registers"); + return -1; + } + + return 0; +} + +static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + size_t insn_len; + uint8_t access_type; + uint8_t *instruction_bytes; + int ret; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + insn_len = info.instruction_byte_count; + access_type = info.header.intercept_access_type; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) { + error_report("invalid intercept access type: execute"); + return -1; + } + + if (insn_len > 16) { + error_report("invalid mmio instruction length: %zu", insn_len); + return -1; + } + + trace_mshv_handle_mmio(info.guest_virtual_address, + info.guest_physical_address, + info.instruction_byte_count, access_type); + + instruction_bytes = info.instruction_bytes; + + ret = emulate_instruction(cpu, instruction_bytes, insn_len, + info.guest_virtual_address, + info.guest_physical_address); + if (ret < 0) { + error_report("failed to emulate mmio"); + return -1; + } + + *exit_reason = MshvVmExitIgnore; + + return 0; +} + +static int handle_unmapped_mem(int vm_fd, CPUState *cpu, + const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + uint64_t gpa; + int ret; + enum MshvRemapResult remap_result; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + + gpa = info.guest_physical_address; + + /* attempt to remap the region, in case of overlapping userspace mappings */ + remap_result = mshv_remap_overlap_region(vm_fd, gpa); + *exit_reason = MshvVmExitIgnore; + + switch (remap_result) { + case MshvRemapNoMapping: + /* if we didn't find a mapping, it is probably mmio */ + return handle_mmio(cpu, msg, exit_reason); + case MshvRemapOk: + break; + case MshvRemapNoOverlap: + /* This should not happen, but we are forgiving it */ + warn_report("found no overlap for unmapped region"); + *exit_reason = MshvVmExitSpecial; + break; + } + + return 0; +} + +static int set_ioport_info(const struct hyperv_message *msg, + hv_x64_io_port_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) { + error_report("Invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +static int set_x64_registers(const CPUState *cpu, const uint32_t *names, + const uint64_t *values) +{ + + hv_register_assoc assocs[2]; + int ret; + + for (size_t i = 0; i < ARRAY_SIZE(assocs); i++) { + assocs[i].name = names[i]; + assocs[i].value.reg64 = values[i]; + } + + ret = mshv_set_generic_regs(cpu, assocs, ARRAY_SIZE(assocs)); + if (ret < 0) { + error_report("failed to set x64 registers"); + return -1; + } + + return 0; +} + +static inline MemTxAttrs get_mem_attrs(bool is_secure_mode) +{ + MemTxAttrs memattr = {0}; + memattr.secure = is_secure_mode; + return memattr; +} + +static void pio_read(uint64_t port, uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + false); + if (ret != MEMTX_OK) { + error_report("Failed to read from port %lx: %d", port, ret); + abort(); + } +} + +static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + true); + return ret; +} + +static int handle_pio_non_str(const CPUState *cpu, + hv_x64_io_port_intercept_message *info) +{ + size_t len = info->access_info.access_size; + uint8_t access_type = info->header.intercept_access_type; + int ret; + uint32_t val, eax; + const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8); + size_t insn_len; + uint64_t rip, rax; + uint32_t reg_names[2]; + uint64_t reg_values[2]; + uint16_t port = info->port_number; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + union { + uint32_t u32; + uint8_t bytes[4]; + } conv; + + /* convert the first 4 bytes of rax to bytes */ + conv.u32 = (uint32_t)info->rax; + /* secure mode is set to false */ + ret = pio_write(port, conv.bytes, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + } else { + uint8_t data[4] = { 0 }; + /* secure mode is set to false */ + pio_read(info->port_number, data, len, false); + + /* Preserve high bits in EAX, but clear out high bits in RAX */ + val = *(uint32_t *)data; + eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask); + info->rax = (uint64_t)eax; + } + + insn_len = info->header.instruction_length; + + /* Advance RIP and update RAX */ + rip = info->header.rip + insn_len; + rax = info->rax; + + reg_names[0] = HV_X64_REGISTER_RIP; + reg_values[0] = rip; + reg_names[1] = HV_X64_REGISTER_RAX; + reg_values[1] = rax; + + ret = set_x64_registers(cpu, reg_names, reg_values); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int fetch_guest_state(CPUState *cpu) +{ + int ret; + + ret = mshv_get_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to get standard registers"); + return -1; + } + + ret = mshv_get_special_regs(cpu); + if (ret < 0) { + error_report("Failed to get special registers"); + return -1; + } + + return 0; +} + +static int read_memory(const CPUState *cpu, uint64_t initial_gva, + uint64_t initial_gpa, uint64_t gva, uint8_t *data, + size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_READ; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + return -1; + } + + ret = mshv_guest_mem_read(gpa, data, len, false, false); + if (ret < 0) { + error_report("failed to read guest mem"); + return -1; + } + } + + return 0; +} + +static int write_memory(const CPUState *cpu, uint64_t initial_gva, + uint64_t initial_gpa, uint64_t gva, const uint8_t *data, + size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + } + ret = mshv_guest_mem_write(gpa, data, len, false); + if (ret != MEMTX_OK) { + error_report("failed to write to mmio"); + return -1; + } + + return 0; +} + +static int handle_pio_str_write(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t src; + uint8_t data[4] = { 0 }; + size_t len = info->access_info.access_size; + + src = linear_addr(cpu, info->rsi, R_DS); + + for (size_t i = 0; i < repeat; i++) { + ret = read_memory(cpu, 0, 0, src, data, len); + if (ret < 0) { + error_report("Failed to read memory"); + return -1; + } + ret = pio_write(port, data, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + src += direction_flag ? -len : len; + info->rsi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str_read(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t dst; + size_t len = info->access_info.access_size; + uint8_t data[4] = { 0 }; + + dst = linear_addr(cpu, info->rdi, R_ES); + + for (size_t i = 0; i < repeat; i++) { + pio_read(port, data, len, false); + + ret = write_memory(cpu, 0, 0, dst, data, len); + if (ret < 0) { + error_report("Failed to write memory"); + return -1; + } + dst += direction_flag ? -len : len; + info->rdi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str(CPUState *cpu, hv_x64_io_port_intercept_message *info) +{ + uint8_t access_type = info->header.intercept_access_type; + uint16_t port = info->port_number; + bool repop = info->access_info.rep_prefix == 1; + size_t repeat = repop ? info->rcx : 1; + size_t insn_len = info->header.instruction_length; + bool direction_flag; + uint32_t reg_names[3]; + uint64_t reg_values[3]; + int ret; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + ret = fetch_guest_state(cpu); + if (ret < 0) { + error_report("Failed to fetch guest state"); + return -1; + } + + direction_flag = (env->eflags & DESC_E_MASK) != 0; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag); + if (ret < 0) { + error_report("Failed to handle pio str write"); + return -1; + } + reg_names[0] = HV_X64_REGISTER_RSI; + reg_values[0] = info->rsi; + } else { + ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag); + reg_names[0] = HV_X64_REGISTER_RDI; + reg_values[0] = info->rdi; + } + + reg_names[1] = HV_X64_REGISTER_RIP; + reg_values[1] = info->header.rip + insn_len; + reg_names[2] = HV_X64_REGISTER_RAX; + reg_values[2] = info->rax; + + ret = set_x64_registers(cpu, reg_names, reg_values); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int handle_pio(CPUState *cpu, const struct hyperv_message *msg) +{ + struct hv_x64_io_port_intercept_message info = { 0 }; + int ret; + + ret = set_ioport_info(msg, &info); + if (ret < 0) { + error_report("Failed to convert message to ioport info"); + return -1; + } + + if (info.access_info.string_op) { + return handle_pio_str(cpu, &info); + } + + return handle_pio_non_str(cpu, &info); +} + +int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit) +{ + int ret; + enum MshvVmExit exit_reason; + int cpu_fd = mshv_vcpufd(cpu); + + ret = ioctl(cpu_fd, MSHV_RUN_VP, msg); + if (ret < 0) { + return MshvVmExitShutdown; + } + + switch (msg->header.message_type) { + case HVMSG_UNRECOVERABLE_EXCEPTION: + return MshvVmExitShutdown; + case HVMSG_UNMAPPED_GPA: + ret = handle_unmapped_mem(vm_fd, cpu, msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle unmapped memory"); + return -1; + } + return exit_reason; + case HVMSG_GPA_INTERCEPT: + ret = handle_mmio(cpu, msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle mmio"); + return -1; + } + return exit_reason; + case HVMSG_X64_IO_PORT_INTERCEPT: + ret = handle_pio(cpu, msg); + if (ret < 0) { + return MshvVmExitSpecial; + } + return MshvVmExitIgnore; + default: + break; + } + + *exit = MshvVmExitIgnore; + return 0; +} + +void mshv_remove_vcpu(int vm_fd, int cpu_fd) +{ + close(cpu_fd); +} + + +int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd) +{ + int ret; + struct mshv_create_vp vp_arg = { + .vp_index = vp_index, + }; + ret = ioctl(vm_fd, MSHV_CREATE_VP, &vp_arg); + if (ret < 0) { + error_report("failed to create mshv vcpu: %s", strerror(errno)); + return -1; + } + + *cpu_fd = ret; + + return 0; +} + +static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva, + uint8_t *data, uintptr_t size, + bool fetch_instruction) +{ + int ret; + uint64_t gpa, flags; + + flags = HV_TRANSLATE_GVA_VALIDATE_READ; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + + ret = mshv_guest_mem_read(gpa, data, size, false, fetch_instruction); + if (ret < 0) { + error_report("failed to read from guest memory"); + return -1; + } + + return 0; +} + +static int guest_mem_write_with_gva(const CPUState *cpu, uint64_t gva, + const uint8_t *data, uintptr_t size) +{ + int ret; + uint64_t gpa, flags; + + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + ret = mshv_guest_mem_write(gpa, data, size, false); + if (ret < 0) { + error_report("failed to write to guest memory"); + return -1; + } + return 0; +} + +static void write_mem(CPUState *cpu, void *data, target_ulong addr, int bytes) +{ + if (guest_mem_write_with_gva(cpu, addr, data, bytes) < 0) { + error_report("failed to write memory"); + abort(); + } +} + +static void fetch_instruction(CPUState *cpu, void *data, + target_ulong addr, int bytes) +{ + if (guest_mem_read_with_gva(cpu, addr, data, bytes, true) < 0) { + error_report("failed to fetch instruction"); + abort(); + } +} + +static void read_mem(CPUState *cpu, void *data, target_ulong addr, int bytes) +{ + if (guest_mem_read_with_gva(cpu, addr, data, bytes, false) < 0) { + error_report("failed to read memory"); + abort(); + } +} + +static void read_segment_descriptor(CPUState *cpu, + struct x86_segment_descriptor *desc, + enum X86Seg seg_idx) +{ + bool ret; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + SegmentCache *seg = &env->segs[seg_idx]; + x86_segment_selector sel = { .sel = seg->selector & 0xFFFF }; + + ret = x86_read_segment_descriptor(cpu, desc, sel); + if (ret == false) { + error_report("failed to read segment descriptor"); + abort(); + } +} + +static const struct x86_emul_ops mshv_x86_emul_ops = { + .fetch_instruction = fetch_instruction, + .read_mem = read_mem, + .write_mem = write_mem, + .read_segment_descriptor = read_segment_descriptor, +}; + +void mshv_init_mmio_emu(void) +{ + init_decoder(); + init_emu(&mshv_x86_emul_ops); +} + +void mshv_arch_init_vcpu(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + AccelCPUState *state = cpu->accel; + size_t page = HV_HYP_PAGE_SIZE; + void *mem = qemu_memalign(page, 2 * page); + + /* sanity check, to make sure we don't overflow the page */ + QEMU_BUILD_BUG_ON((MAX_REGISTER_COUNT + * sizeof(hv_register_assoc) + + sizeof(hv_input_get_vp_registers) + > HV_HYP_PAGE_SIZE)); + + state->hvcall_args.base = mem; + state->hvcall_args.input_page = mem; + state->hvcall_args.output_page = (uint8_t *)mem + page; + + env->emu_mmio_buf = g_new(char, 4096); +} + +void mshv_arch_destroy_vcpu(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + AccelCPUState *state = cpu->accel; + + g_free(state->hvcall_args.base); + state->hvcall_args = (MshvHvCallArgs){0}; + g_clear_pointer(&env->emu_mmio_buf, g_free); +} + +/* + * Default Microsoft Hypervisor behavior for unimplemented MSR is to send a + * fault to the guest if it tries to access it. It is possible to override + * this behavior with a more suitable option i.e., ignore writes from the guest + * and return zero in attempt to read unimplemented. + */ +static int set_unimplemented_msr_action(int vm_fd) +{ + struct hv_input_set_partition_property in = {0}; + struct mshv_root_hvcall args = {0}; + + in.property_code = HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION; + in.property_value = HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO; + + args.code = HVCALL_SET_PARTITION_PROPERTY; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t)∈ + + trace_mshv_hvcall_args("unimplemented_msr_action", args.code, args.in_sz); + + int ret = mshv_hvcall(vm_fd, &args); + if (ret < 0) { + error_report("Failed to set unimplemented MSR action"); + return -1; + } + return 0; +} + +int mshv_arch_post_init_vm(int vm_fd) +{ + int ret; + + ret = set_unimplemented_msr_action(vm_fd); + if (ret < 0) { + error_report("Failed to set unimplemented MSR action"); + } + + return ret; +} diff --git a/target/i386/mshv/x86.c b/target/i386/mshv/x86.c new file mode 100644 index 0000000..d574b3b --- /dev/null +++ b/target/i386/mshv/x86.c @@ -0,0 +1,297 @@ +/* + * QEMU MSHV support + * + * Copyright Microsoft, Corp. 2025 + * + * Authors: Magnus Kulke <magnuskulke@microsoft.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "cpu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" +#include "qemu/typedefs.h" +#include "qemu/error-report.h" +#include "system/mshv.h" + +/* RW or Exec segment */ +static const uint8_t RWRX_SEGMENT_TYPE = 0x2; +static const uint8_t CODE_SEGMENT_TYPE = 0x8; +static const uint8_t EXPAND_DOWN_SEGMENT_TYPE = 0x4; + +typedef enum CpuMode { + REAL_MODE, + PROTECTED_MODE, + LONG_MODE, +} CpuMode; + +static CpuMode cpu_mode(CPUState *cpu) +{ + enum CpuMode m = REAL_MODE; + + if (x86_is_protected(cpu)) { + m = PROTECTED_MODE; + + if (x86_is_long_mode(cpu)) { + m = LONG_MODE; + } + } + + return m; +} + +static bool segment_type_ro(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + return (type_ & (~RWRX_SEGMENT_TYPE)) == 0; +} + +static bool segment_type_code(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + return (type_ & CODE_SEGMENT_TYPE) != 0; +} + +static bool segment_expands_down(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + + if (segment_type_code(seg)) { + return false; + } + + return (type_ & EXPAND_DOWN_SEGMENT_TYPE) != 0; +} + +static uint32_t segment_limit(const SegmentCache *seg) +{ + uint32_t limit = seg->limit; + uint32_t granularity = (seg->flags & DESC_G_MASK) != 0; + + if (granularity != 0) { + limit = (limit << 12) | 0xFFF; + } + + return limit; +} + +static uint8_t segment_db(const SegmentCache *seg) +{ + return (seg->flags >> DESC_B_SHIFT) & 1; +} + +static uint32_t segment_max_limit(const SegmentCache *seg) +{ + if (segment_db(seg) != 0) { + return 0xFFFFFFFF; + } + return 0xFFFF; +} + +static int linearize(CPUState *cpu, + target_ulong logical_addr, target_ulong *linear_addr, + X86Seg seg_idx) +{ + enum CpuMode mode; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + SegmentCache *seg = &env->segs[seg_idx]; + target_ulong base = seg->base; + target_ulong logical_addr_32b; + uint32_t limit; + /* TODO: the emulator will not pass us "write" indicator yet */ + bool write = false; + + mode = cpu_mode(cpu); + + switch (mode) { + case LONG_MODE: + if (__builtin_add_overflow(logical_addr, base, linear_addr)) { + error_report("Address overflow"); + return -1; + } + break; + case PROTECTED_MODE: + case REAL_MODE: + if (segment_type_ro(seg) && write) { + error_report("Cannot write to read-only segment"); + return -1; + } + + logical_addr_32b = logical_addr & 0xFFFFFFFF; + limit = segment_limit(seg); + + if (segment_expands_down(seg)) { + if (logical_addr_32b >= limit) { + error_report("Address exceeds limit (expands down)"); + return -1; + } + + limit = segment_max_limit(seg); + } + + if (logical_addr_32b > limit) { + error_report("Address exceeds limit %u", limit); + return -1; + } + *linear_addr = logical_addr_32b + base; + break; + default: + error_report("Unknown cpu mode: %d", mode); + return -1; + } + + return 0; +} + +bool x86_read_segment_descriptor(CPUState *cpu, + struct x86_segment_descriptor *desc, + x86_segment_selector sel) +{ + target_ulong base; + uint32_t limit; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + target_ulong gva; + + memset(desc, 0, sizeof(*desc)); + + /* valid gdt descriptors start from index 1 */ + if (!sel.index && GDT_SEL == sel.ti) { + return false; + } + + if (GDT_SEL == sel.ti) { + base = env->gdt.base; + limit = env->gdt.limit; + } else { + base = env->ldt.base; + limit = env->ldt.limit; + } + + if (sel.index * 8 >= limit) { + return false; + } + + gva = base + sel.index * 8; + emul_ops->read_mem(cpu, desc, gva, sizeof(*desc)); + + return true; +} + +bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc, + int gate) +{ + target_ulong base; + uint32_t limit; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + target_ulong gva; + + base = env->idt.base; + limit = env->idt.limit; + + memset(idt_desc, 0, sizeof(*idt_desc)); + if (gate * 8 >= limit) { + perror("call gate exceeds idt limit"); + return false; + } + + gva = base + gate * 8; + emul_ops->read_mem(cpu, idt_desc, gva, sizeof(*idt_desc)); + + return true; +} + +bool x86_is_protected(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr0 = env->cr[0]; + + return cr0 & CR0_PE_MASK; +} + +bool x86_is_real(CPUState *cpu) +{ + return !x86_is_protected(cpu); +} + +bool x86_is_v8086(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + return x86_is_protected(cpu) && (env->eflags & VM_MASK); +} + +bool x86_is_long_mode(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t efer = env->efer; + uint64_t lme_lma = (MSR_EFER_LME | MSR_EFER_LMA); + + return ((efer & lme_lma) == lme_lma); +} + +bool x86_is_long64_mode(CPUState *cpu) +{ + error_report("unimplemented: is_long64_mode()"); + abort(); +} + +bool x86_is_paging_mode(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr0 = env->cr[0]; + + return cr0 & CR0_PG_MASK; +} + +bool x86_is_pae_enabled(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr4 = env->cr[4]; + + return cr4 & CR4_PAE_MASK; +} + +target_ulong linear_addr(CPUState *cpu, target_ulong addr, X86Seg seg) +{ + int ret; + target_ulong linear_addr; + + ret = linearize(cpu, addr, &linear_addr, seg); + if (ret < 0) { + error_report("failed to linearize address"); + abort(); + } + + return linear_addr; +} + +target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size, + X86Seg seg) +{ + switch (size) { + case 2: + addr = (uint16_t)addr; + break; + case 4: + addr = (uint32_t)addr; + break; + default: + break; + } + return linear_addr(cpu, addr, seg); +} + +target_ulong linear_rip(CPUState *cpu, target_ulong rip) +{ + return linear_addr(cpu, rip, R_CS); +} diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 4e4e63d..dd5d542 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" #include "system/kvm_int.h" #include "qemu/main-loop.h" -#include "system/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/cpus.h" #include "qemu/guest-random.h" @@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { + qemu_process_cpu_events(cpu); + if (cpu_can_run(cpu)) { r = nvmm_vcpu_exec(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); } } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait_bql(cpu->halt_cond); - } - qemu_wait_io_event_common(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); nvmm_destroy_vcpu(cpu); @@ -77,16 +75,17 @@ static void nvmm_start_vcpu_thread(CPUState *cpu) */ static void nvmm_kick_vcpu_thread(CPUState *cpu) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); cpus_kick_thread(cpu); } -static void nvmm_accel_ops_class_init(ObjectClass *oc, void *data) +static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = nvmm_start_vcpu_thread; ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 04e5f7e..2e442ba 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -9,16 +9,20 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" +#include "system/address-spaces.h" +#include "system/ioport.h" #include "qemu/accel.h" +#include "accel/accel-ops.h" #include "system/nvmm.h" #include "system/cpus.h" +#include "system/memory.h" #include "system/runstate.h" #include "qemu/main-loop.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/queue.h" +#include "accel/accel-cpu-target.h" +#include "host-cpu.h" #include "migration/blocker.h" #include "strings.h" @@ -30,7 +34,6 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; - bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -47,7 +50,7 @@ struct qemu_machine { /* -------------------------------------------------------------------------- */ -static bool nvmm_allowed; +bool nvmm_allowed; static struct qemu_machine qemu_mach; static struct nvmm_machine * @@ -411,22 +414,22 @@ nvmm_vcpu_pre_run(CPUState *cpu) * Force the VCPU out of its inner loop to process any INIT requests * or commit pending TPR access. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } - if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { if (nvmm_can_take_nmi(cpu)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); event->type = NVMM_VCPU_EVENT_INTR; event->vector = 2; has_event = true; } } - if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { if (nvmm_can_take_int(cpu)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); event->type = NVMM_VCPU_EVENT_INTR; event->vector = cpu_get_pic_interrupt(env); has_event = true; @@ -434,8 +437,8 @@ nvmm_vcpu_pre_run(CPUState *cpu) } /* Don't want SMIs. */ - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); } if (sync_tpr) { @@ -508,16 +511,18 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static void nvmm_mem_callback(struct nvmm_mem *mem) { - cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); + /* TODO: Get CPUState via mem->vcpu? */ + address_space_rw(&address_space_memory, mem->gpa, MEMTXATTRS_UNSPECIFIED, + mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -649,9 +654,9 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, bql_lock(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (cpu_env(cpu)->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->exception_index = EXCP_HLT; cpu->halted = true; ret = 1; @@ -689,26 +694,26 @@ nvmm_vcpu_loop(CPUState *cpu) * Some asynchronous events must be handled outside of the inner * VCPU loop. They are handled here. */ - if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT)) { nvmm_cpu_synchronize_state(cpu); do_cpu_init(x86_cpu); /* set int/nmi windows back to the reset state */ } - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); apic_poll_irq(x86_cpu->apic_state); } - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->halted = false; } - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SIPI)) { nvmm_cpu_synchronize_state(cpu); do_cpu_sipi(x86_cpu); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_TPR); nvmm_cpu_synchronize_state(cpu); apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, env->tpr_access_type); @@ -727,9 +732,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (qcpu->stop) { @@ -741,7 +746,8 @@ nvmm_vcpu_loop(CPUState *cpu) nvmm_vcpu_pre_run(cpu); - if (qatomic_read(&cpu->exit_request)) { + /* Corresponding store-release is in cpu_exit. */ + if (qatomic_load_acquire(&cpu->exit_request)) { #if NVMM_USER_VERSION >= 2 nvmm_vcpu_stop(vcpu); #else @@ -749,8 +755,6 @@ nvmm_vcpu_loop(CPUState *cpu) #endif } - /* Read exit_request before the kernel reads the immediate exit flag */ - smp_rmb(); ret = nvmm_vcpu_run(mach, vcpu); if (ret == -1) { error_report("NVMM: Failed to exec a virtual processor," @@ -816,8 +820,6 @@ nvmm_vcpu_loop(CPUState *cpu) cpu_exec_end(cpu); bql_lock(); - qatomic_set(&cpu->exit_request, false); - return ret < 0; } @@ -827,32 +829,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -982,7 +984,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - qcpu->dirty = true; + qcpu->vcpu_dirty = true; cpu->accel = qcpu; return 0; @@ -1153,7 +1155,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = { /* -------------------------------------------------------------------------- */ static int -nvmm_accel_init(MachineState *ms) +nvmm_accel_init(AccelState *as, MachineState *ms) { int ret, err; @@ -1193,14 +1195,8 @@ nvmm_accel_init(MachineState *ms) return 0; } -int -nvmm_enabled(void) -{ - return nvmm_allowed; -} - static void -nvmm_accel_class_init(ObjectClass *oc, void *data) +nvmm_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "NVMM"; @@ -1214,10 +1210,33 @@ static const TypeInfo nvmm_accel_type = { .class_init = nvmm_accel_class_init, }; +static void nvmm_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = nvmm_cpu_instance_init; +} + +static const TypeInfo nvmm_cpu_accel_type = { + .name = ACCEL_CPU_NAME("nvmm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = nvmm_cpu_accel_class_init, + .abstract = true, +}; + static void nvmm_type_init(void) { type_register_static(&nvmm_accel_type); + type_register_static(&nvmm_cpu_accel_type); } type_init(nvmm_type_init); diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index f0aa189..a2e4d48 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, @@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), @@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status); @@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); for (i = 1; i < 2 << SHIFT; i++) { @@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c index d5bf886..7c5c02a 100644 --- a/target/i386/sev-system-stub.c +++ b/target/i386/sev-system-stub.c @@ -14,34 +14,9 @@ #include "qemu/osdep.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/error.h" #include "sev.h" -SevInfo *qmp_query_sev(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevCapability *qmp_query_sev_capabilities(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, - bool has_gpa, uint64_t gpa, Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); -} - int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); @@ -56,13 +31,6 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) g_assert_not_reached(); } -SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, - Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); diff --git a/target/i386/sev.c b/target/i386/sev.c index 0e1dbb6..1057b8a 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -26,6 +26,7 @@ #include "qemu/uuid.h" #include "qemu/error-report.h" #include "crypto/hash.h" +#include "exec/target_page.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" #include "sev.h" @@ -36,11 +37,13 @@ #include "qom/object.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "confidential-guest.h" #include "hw/i386/pc.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "hw/i386/e820_memory_layout.h" #include "qemu/queue.h" +#include "qemu/cutils.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -49,6 +52,15 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) /* hard code sha256 digest size */ #define HASH_SIZE 32 +/* Hard coded GPA that KVM uses for the VMSA */ +#define KVM_VMSA_GPA 0xFFFFFFFFF000 + +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */ +#define FLAGS_VMSA_TO_SEGCACHE(flags) \ + ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8)) +#define FLAGS_SEGCACHE_TO_VMSA(flags) \ + ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12)) + typedef struct QEMU_PACKED SevHashTableEntry { QemuUUID guid; uint16_t len; @@ -88,6 +100,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor { uint32_t size; } SevHashTableDescriptor; +typedef struct SevLaunchVmsa { + QTAILQ_ENTRY(SevLaunchVmsa) next; + + uint16_t cpu_index; + uint64_t gpa; + struct sev_es_save_area vmsa; +} SevLaunchVmsa; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -98,6 +118,8 @@ struct SevCommonState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + uint64_t sev_features; + uint64_t supported_sev_features; /* runtime state */ uint8_t api_major; @@ -106,9 +128,7 @@ struct SevCommonState { int sev_fd; SevState state; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; + QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa; }; struct SevCommonStateClass { @@ -121,7 +141,8 @@ struct SevCommonStateClass { Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); - int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -211,14 +232,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF @@ -370,6 +383,288 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; +static void sev_apply_cpu_context(CPUState *cpu) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + X86CPU *x86; + CPUX86State *env; + struct SevLaunchVmsa *launch_vmsa; + + /* See if an initial VMSA has been provided for this CPU */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu->cpu_index == launch_vmsa->cpu_index) { + x86 = X86_CPU(cpu); + env = &x86->env; + + /* + * Ideally we would provide the VMSA directly to kvm which would + * ensure that the resulting initial VMSA measurement which is + * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from + * exactly what we provide here. Currently this is not possible so + * we need to copy the parts of the VMSA structure that we currently + * support into the CPU state. + */ + cpu_load_efer(env, launch_vmsa->vmsa.efer); + cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4); + cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0); + cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3); + env->xcr0 = launch_vmsa->vmsa.xcr0; + env->pat = launch_vmsa->vmsa.g_pat; + + cpu_x86_load_seg_cache( + env, R_CS, launch_vmsa->vmsa.cs.selector, + launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib)); + cpu_x86_load_seg_cache( + env, R_DS, launch_vmsa->vmsa.ds.selector, + launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib)); + cpu_x86_load_seg_cache( + env, R_ES, launch_vmsa->vmsa.es.selector, + launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib)); + cpu_x86_load_seg_cache( + env, R_FS, launch_vmsa->vmsa.fs.selector, + launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib)); + cpu_x86_load_seg_cache( + env, R_GS, launch_vmsa->vmsa.gs.selector, + launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib)); + cpu_x86_load_seg_cache( + env, R_SS, launch_vmsa->vmsa.ss.selector, + launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib)); + + env->gdt.base = launch_vmsa->vmsa.gdtr.base; + env->gdt.limit = launch_vmsa->vmsa.gdtr.limit; + env->gdt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib); + env->idt.base = launch_vmsa->vmsa.idtr.base; + env->idt.limit = launch_vmsa->vmsa.idtr.limit; + env->idt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib); + + cpu_x86_load_seg_cache( + env, R_LDTR, launch_vmsa->vmsa.ldtr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib)); + cpu_x86_load_seg_cache( + env, R_TR, launch_vmsa->vmsa.tr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib)); + + env->dr[6] = launch_vmsa->vmsa.dr6; + env->dr[7] = launch_vmsa->vmsa.dr7; + + env->regs[R_EAX] = launch_vmsa->vmsa.rax; + env->regs[R_ECX] = launch_vmsa->vmsa.rcx; + env->regs[R_EDX] = launch_vmsa->vmsa.rdx; + env->regs[R_EBX] = launch_vmsa->vmsa.rbx; + env->regs[R_ESP] = launch_vmsa->vmsa.rsp; + env->regs[R_EBP] = launch_vmsa->vmsa.rbp; + env->regs[R_ESI] = launch_vmsa->vmsa.rsi; + env->regs[R_EDI] = launch_vmsa->vmsa.rdi; +#ifdef TARGET_X86_64 + env->regs[R_R8] = launch_vmsa->vmsa.r8; + env->regs[R_R9] = launch_vmsa->vmsa.r9; + env->regs[R_R10] = launch_vmsa->vmsa.r10; + env->regs[R_R11] = launch_vmsa->vmsa.r11; + env->regs[R_R12] = launch_vmsa->vmsa.r12; + env->regs[R_R13] = launch_vmsa->vmsa.r13; + env->regs[R_R14] = launch_vmsa->vmsa.r14; + env->regs[R_R15] = launch_vmsa->vmsa.r15; +#endif + env->eip = launch_vmsa->vmsa.rip; + env->eflags = launch_vmsa->vmsa.rflags; + + cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw); + env->mxcsr = launch_vmsa->vmsa.mxcsr; + + break; + } + } +} + +static int check_sev_features(SevCommonState *sev_common, uint64_t sev_features, + Error **errp) +{ + /* + * Ensure SEV_FEATURES is configured for correct SEV hardware and that + * the requested features are supported. If SEV-SNP is enabled then + * that feature must be enabled, otherwise it must be cleared. + */ + if (sev_snp_enabled() && !(sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is enabled but is not enabled in VMSA sev_features", + __func__); + return -1; + } else if (!sev_snp_enabled() && + (sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is not enabled but is enabled in VMSA sev_features", + __func__); + return -1; + } + if (sev_features & ~sev_common->supported_sev_features) { + error_setg(errp, + "%s: VMSA contains unsupported sev_features: %lX, " + "supported features: %lX", + __func__, sev_features, sev_common->supported_sev_features); + return -1; + } + return 0; +} + +static int check_vmsa_supported(SevCommonState *sev_common, hwaddr gpa, + const struct sev_es_save_area *vmsa, + Error **errp) +{ + struct sev_es_save_area vmsa_check; + + /* + * KVM always populates the VMSA at a fixed GPA which cannot be modified + * from userspace. Specifying a different GPA will not prevent the guest + * from starting but will cause the launch measurement to be different + * from expected. Therefore check that the provided GPA matches the KVM + * hardcoded value. + */ + if (gpa != KVM_VMSA_GPA) { + error_setg(errp, + "%s: The VMSA GPA must be %lX but is specified as %lX", + __func__, KVM_VMSA_GPA, gpa); + return -1; + } + + /* + * Clear all supported fields so we can then check the entire structure + * is zero. + */ + memcpy(&vmsa_check, vmsa, sizeof(struct sev_es_save_area)); + memset(&vmsa_check.es, 0, sizeof(vmsa_check.es)); + memset(&vmsa_check.cs, 0, sizeof(vmsa_check.cs)); + memset(&vmsa_check.ss, 0, sizeof(vmsa_check.ss)); + memset(&vmsa_check.ds, 0, sizeof(vmsa_check.ds)); + memset(&vmsa_check.fs, 0, sizeof(vmsa_check.fs)); + memset(&vmsa_check.gs, 0, sizeof(vmsa_check.gs)); + memset(&vmsa_check.gdtr, 0, sizeof(vmsa_check.gdtr)); + memset(&vmsa_check.idtr, 0, sizeof(vmsa_check.idtr)); + memset(&vmsa_check.ldtr, 0, sizeof(vmsa_check.ldtr)); + memset(&vmsa_check.tr, 0, sizeof(vmsa_check.tr)); + vmsa_check.efer = 0; + vmsa_check.cr0 = 0; + vmsa_check.cr3 = 0; + vmsa_check.cr4 = 0; + vmsa_check.xcr0 = 0; + vmsa_check.dr6 = 0; + vmsa_check.dr7 = 0; + vmsa_check.rax = 0; + vmsa_check.rcx = 0; + vmsa_check.rdx = 0; + vmsa_check.rbx = 0; + vmsa_check.rsp = 0; + vmsa_check.rbp = 0; + vmsa_check.rsi = 0; + vmsa_check.rdi = 0; + vmsa_check.r8 = 0; + vmsa_check.r9 = 0; + vmsa_check.r10 = 0; + vmsa_check.r11 = 0; + vmsa_check.r12 = 0; + vmsa_check.r13 = 0; + vmsa_check.r14 = 0; + vmsa_check.r15 = 0; + vmsa_check.rip = 0; + vmsa_check.rflags = 0; + + vmsa_check.g_pat = 0; + vmsa_check.xcr0 = 0; + + vmsa_check.x87_fcw = 0; + vmsa_check.mxcsr = 0; + + if (check_sev_features(sev_common, vmsa_check.sev_features, errp) < 0) { + return -1; + } + vmsa_check.sev_features = 0; + + if (!buffer_is_zero(&vmsa_check, sizeof(vmsa_check))) { + error_setg(errp, + "%s: The VMSA contains fields that are not " + "synchronized with KVM. Continuing would result in " + "either unpredictable guest behavior, or a " + "mismatched launch measurement.", + __func__); + return -1; + } + return 0; +} + +static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx, + uint32_t ctx_len, hwaddr gpa, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevLaunchVmsa *launch_vmsa; + CPUState *cpu; + bool exists = false; + + /* + * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The + * context buffer will contain a sev_es_save_area from the Linux kernel + * which is defined by "Table B-4. VMSA Layout, State Save Area for SEV-ES" + * in the AMD64 APM, Volume 2. + */ + + if (!sev_es_enabled()) { + error_setg(errp, "SEV: unable to set CPU context: Not supported"); + return -1; + } + + if (ctx_len < sizeof(struct sev_es_save_area)) { + error_setg(errp, "SEV: unable to set CPU context: " + "Invalid context provided"); + return -1; + } + + cpu = qemu_get_cpu(cpu_index); + if (!cpu) { + error_setg(errp, "SEV: unable to set CPU context for out of bounds " + "CPU index %d", cpu_index); + return -1; + } + + /* + * If the context of this VP has already been set then replace it with the + * new context. + */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu_index == launch_vmsa->cpu_index) { + launch_vmsa->gpa = gpa; + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + exists = true; + break; + } + } + + if (!exists) { + /* New VP context */ + launch_vmsa = g_new0(SevLaunchVmsa, 1); + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + launch_vmsa->cpu_index = cpu_index; + launch_vmsa->gpa = gpa; + QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next); + } + + /* Synchronise the VMSA with the current CPU state */ + sev_apply_cpu_context(cpu); + + return 0; +} + bool sev_enabled(void) { @@ -946,7 +1241,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -977,9 +1272,8 @@ sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t return value; } -static int -sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *addr, size_t len) +static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len, Error **errp) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -994,8 +1288,8 @@ sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { - error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", - __func__, ret, fw_error, fw_error_to_str(fw_error)); + error_setg(errp, "%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", __func__, + ret, fw_error, fw_error_to_str(fw_error)); } return ret; @@ -1005,6 +1299,16 @@ static int sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; + CPUState *cpu; + + /* + * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA. + * Synchronise the CPU state to any provided launch VMSA structures. + */ + CPU_FOREACH(cpu) { + sev_apply_cpu_context(cpu); + } + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); @@ -1123,8 +1427,8 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } -static int -snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) +static int snp_launch_update_data(uint64_t gpa, void *hva, size_t len, + int type, Error **errp) { SevLaunchUpdateData *data; @@ -1139,23 +1443,21 @@ snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) return 0; } -static int -sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *ptr, size_t len) +static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp) { - int ret = snp_launch_update_data(gpa, ptr, len, - KVM_SEV_SNP_PAGE_TYPE_NORMAL); - return ret; + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL, errp); } static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, - const KvmCpuidInfo *kvm_cpuid_info) + const KvmCpuidInfo *kvm_cpuid_info, Error **errp) { size_t i; if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { - error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + error_setg(errp, "SEV-SNP: CPUID entry count (%d) exceeds max (%d)", kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); return -1; } @@ -1197,8 +1499,8 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, return 0; } -static int -snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) +static int snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, + size_t cpuid_len, Error **errp) { KvmCpuidInfo kvm_cpuid_info = {0}; SnpCpuidInfo snp_cpuid_info; @@ -1215,26 +1517,25 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) } while (ret == -E2BIG); if (ret) { - error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", - strerror(-ret)); - return 1; + error_setg(errp, "SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return -1; } - ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); - if (ret) { - error_report("SEV-SNP: failed to generate CPUID table information"); - return 1; + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info, errp); + if (ret < 0) { + return -1; } memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); return snp_launch_update_data(cpuid_addr, hva, cpuid_len, - KVM_SEV_SNP_PAGE_TYPE_CPUID); + KVM_SEV_SNP_PAGE_TYPE_CPUID, errp); } -static int -snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, - void *hva, uint32_t len) +static int snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, + uint32_t addr, void *hva, + uint32_t len, Error **errp) { int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; if (sev_snp->parent_obj.kernel_hashes) { @@ -1246,7 +1547,7 @@ snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, sizeof(*sev_snp->kernel_hashes_data)); type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; } - return snp_launch_update_data(addr, hva, len, type); + return snp_launch_update_data(addr, hva, len, type, errp); } static int @@ -1284,12 +1585,14 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, } if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { - ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + ret = snp_launch_update_cpuid(desc->base, hva, desc->len, + &error_fatal); } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, - desc->len); + desc->len, &error_fatal); } else { - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + ret = snp_launch_update_data(desc->base, hva, desc->len, type, + &error_fatal); } if (ret) { @@ -1311,18 +1614,26 @@ sev_snp_launch_finish(SevCommonState *sev_common) struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; /* - * To boot the SNP guest, the hypervisor is required to populate the CPUID - * and Secrets page before finalizing the launch flow. The location of - * the secrets and CPUID page is available through the OVMF metadata GUID. + * Populate all the metadata pages if not using an IGVM file. In the case + * where an IGVM file is provided it will be used to configure the metadata + * pages directly. */ - metadata = pc_system_get_ovmf_sev_metadata_ptr(); - if (metadata == NULL) { - error_report("%s: Failed to locate SEV metadata header", __func__); - exit(1); - } + if (!X86_MACHINE(qdev_get_machine())->igvm) { + /* + * To boot the SNP guest, the hypervisor is required to populate the + * CPUID and Secrets page before finalizing the launch flow. The + * location of the secrets and CPUID page is available through the + * OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } - /* Populate all the metadata pages */ - snp_populate_metadata_pages(sev_snp, metadata); + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + } QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); @@ -1432,6 +1743,39 @@ static int sev_snp_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_SNP_VM; } +static int sev_init_supported_features(ConfidentialGuestSupport *cgs, + SevCommonState *sev_common, Error **errp) +{ + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + /* + * Older kernels do not support query or setting of sev_features. In this + * case the set of supported features must be zero to match the settings + * in the kernel. + */ + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == + KVM_X86_DEFAULT_VM) { + sev_common->supported_sev_features = 0; + return 0; + } + + /* Query KVM for the supported set of sev_features */ + struct kvm_device_attr attr = { + .group = KVM_X86_GRP_SEV, + .attr = KVM_X86_SEV_VMSA_FEATURES, + .addr = (unsigned long)&sev_common->supported_sev_features, + }; + if (kvm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr) < 0) { + error_setg(errp, "%s: failed to query supported sev_features", + __func__); + return -1; + } + if (sev_snp_enabled()) { + sev_common->supported_sev_features |= SVM_SEV_FEAT_SNP_ACTIVE; + } + return 0; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -1512,6 +1856,10 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + if (sev_init_supported_features(cgs, sev_common, errp) < 0) { + return -1; + } + trace_kvm_sev_init(); switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { case KVM_X86_DEFAULT_VM: @@ -1523,6 +1871,40 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) case KVM_X86_SEV_ES_VM: case KVM_X86_SNP_VM: { struct kvm_sev_init args = { 0 }; + MachineState *machine = MACHINE(qdev_get_machine()); + X86MachineState *x86machine = X86_MACHINE(qdev_get_machine()); + + /* + * If configuration is provided via an IGVM file then the IGVM file + * might contain configuration of the initial vcpu context. For SEV + * the vcpu context includes the sev_features which should be applied + * to the vcpu. + * + * KVM does not synchronize sev_features from CPU state. Instead it + * requires sev_features to be provided as part of this initialization + * call which is subsequently automatically applied to the VMSA of + * each vcpu. + * + * The IGVM file is normally processed after initialization. Therefore + * we need to pre-process it here to extract sev_features in order to + * provide it to KVM_SEV_INIT2. Each cgs_* function that is called by + * the IGVM processor detects this pre-process by observing the state + * as SEV_STATE_UNINIT. + */ + if (x86machine->igvm) { + if (IGVM_CFG_GET_CLASS(x86machine->igvm) + ->process(x86machine->igvm, machine->cgs, true, errp) == + -1) { + return -1; + } + /* + * KVM maintains a bitmask of allowed sev_features. This does not + * include SVM_SEV_FEAT_SNP_ACTIVE which is set accordingly by KVM + * itself. Therefore we need to clear this flag. + */ + args.vmsa_features = sev_common->sev_features & + ~SVM_SEV_FEAT_SNP_ACTIVE; + } ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); break; @@ -1622,9 +2004,8 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { int ret; - ret = klass->launch_update_data(sev_common, gpa, ptr, len); + ret = klass->launch_update_data(sev_common, gpa, ptr, len, errp); if (ret < 0) { - error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; } } @@ -1789,40 +2170,109 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, return sev_es_parse_reset_block(info, addr); } -void sev_es_set_reset_vector(CPUState *cpu) + +static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg) { - X86CPU *x86; - CPUX86State *env; - ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - SevCommonState *sev_common = SEV_COMMON( - object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); + vmsa_seg->selector = cpu_seg->selector; + vmsa_seg->base = cpu_seg->base; + vmsa_seg->limit = cpu_seg->limit; + vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags); +} - /* Only update if we have valid reset information */ - if (!sev_common || !sev_common->reset_data_valid) { - return; - } +static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa) +{ + const X86CPU *x86 = X86_CPU(cpu); + const CPUX86State *env = &x86->env; - /* Do not update the BSP reset state */ - if (cpu->cpu_index == 0) { - return; + /* + * Initialize the SEV-ES save area from the current state of + * the CPU. The entire state does not need to be copied, only the state + * that is copied back to the CPUState in sev_apply_cpu_context. + */ + memset(vmsa, 0, sizeof(struct sev_es_save_area)); + vmsa->efer = env->efer; + vmsa->cr0 = env->cr[0]; + vmsa->cr3 = env->cr[3]; + vmsa->cr4 = env->cr[4]; + vmsa->xcr0 = env->xcr0; + vmsa->g_pat = env->pat; + + seg_to_vmsa(&env->segs[R_CS], &vmsa->cs); + seg_to_vmsa(&env->segs[R_DS], &vmsa->ds); + seg_to_vmsa(&env->segs[R_ES], &vmsa->es); + seg_to_vmsa(&env->segs[R_FS], &vmsa->fs); + seg_to_vmsa(&env->segs[R_GS], &vmsa->gs); + seg_to_vmsa(&env->segs[R_SS], &vmsa->ss); + + seg_to_vmsa(&env->gdt, &vmsa->gdtr); + seg_to_vmsa(&env->idt, &vmsa->idtr); + seg_to_vmsa(&env->ldt, &vmsa->ldtr); + seg_to_vmsa(&env->tr, &vmsa->tr); + + vmsa->dr6 = env->dr[6]; + vmsa->dr7 = env->dr[7]; + + vmsa->rax = env->regs[R_EAX]; + vmsa->rcx = env->regs[R_ECX]; + vmsa->rdx = env->regs[R_EDX]; + vmsa->rbx = env->regs[R_EBX]; + vmsa->rsp = env->regs[R_ESP]; + vmsa->rbp = env->regs[R_EBP]; + vmsa->rsi = env->regs[R_ESI]; + vmsa->rdi = env->regs[R_EDI]; + +#ifdef TARGET_X86_64 + vmsa->r8 = env->regs[R_R8]; + vmsa->r9 = env->regs[R_R9]; + vmsa->r10 = env->regs[R_R10]; + vmsa->r11 = env->regs[R_R11]; + vmsa->r12 = env->regs[R_R12]; + vmsa->r13 = env->regs[R_R13]; + vmsa->r14 = env->regs[R_R14]; + vmsa->r15 = env->regs[R_R15]; +#endif + + vmsa->rip = env->eip; + vmsa->rflags = env->eflags; +} + +static void sev_es_set_ap_context(uint32_t reset_addr) +{ + CPUState *cpu; + struct sev_es_save_area vmsa; + SegmentCache cs; + + cs.selector = 0xf000; + cs.base = reset_addr & 0xffff0000; + cs.limit = 0xffff; + cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | + DESC_A_MASK; + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == 0) { + /* Do not update the BSP reset state */ + continue; + } + initialize_vmsa(cpu, &vmsa); + seg_to_vmsa(&cs, &vmsa.cs); + vmsa.rip = reset_addr & 0x0000ffff; + sev_set_cpu_context(cpu->cpu_index, &vmsa, + sizeof(struct sev_es_save_area), + 0, &error_fatal); } +} - x86 = X86_CPU(cpu); - env = &x86->env; - - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, - DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | - DESC_R_MASK | DESC_A_MASK); - - env->eip = sev_common->reset_ip; +void sev_es_set_reset_vector(CPUState *cpu) +{ + if (sev_enabled()) { + sev_apply_cpu_context(cpu); + } } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) { - CPUState *cpu; uint32_t addr; int ret; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1835,14 +2285,12 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) return ret; } + /* + * The reset vector is saved into a CPU context for each AP but not for + * the BSP. This is applied during guest startup or when the CPU is reset. + */ if (addr) { - sev_common->reset_cs = addr & 0xffff0000; - sev_common->reset_ip = addr & 0x0000ffff; - sev_common->reset_data_valid = true; - - CPU_FOREACH(cpu) { - sev_es_set_reset_vector(cpu); - } + sev_es_set_ap_context(addr); } return 0; @@ -2044,8 +2492,239 @@ static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) SEV_COMMON(obj)->kernel_hashes = value; } +static bool cgs_check_support(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary) +{ + return (((platform == CGS_PLATFORM_SEV_SNP) && sev_snp_enabled()) || + ((platform == CGS_PLATFORM_SEV_ES) && sev_es_enabled()) || + ((platform == CGS_PLATFORM_SEV) && sev_enabled())); +} + +static int cgs_set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + if ((cpu_index == 0) && (memory_type == CGS_PAGE_TYPE_VMSA)) { + const struct sev_es_save_area *sa = + (const struct sev_es_save_area *)ptr; + if (len < sizeof(*sa)) { + error_setg(errp, "%s: invalid VMSA length encountered", + __func__); + return -1; + } + if (check_sev_features(sev_common, sa->sev_features, errp) < 0) { + return -1; + } + sev_common->sev_features = sa->sev_features; + } + return 0; + } + + if (!sev_enabled()) { + error_setg(errp, "%s: attempt to configure guest memory, but SEV " + "is not enabled", __func__); + return -1; + } + + switch (memory_type) { + case CGS_PAGE_TYPE_NORMAL: + case CGS_PAGE_TYPE_ZERO: + return klass->launch_update_data(sev_common, gpa, ptr, len, errp); + + case CGS_PAGE_TYPE_VMSA: + if (!sev_es_enabled()) { + error_setg(errp, + "%s: attempt to configure initial VMSA, but SEV-ES " + "is not supported", + __func__); + return -1; + } + if (check_vmsa_supported(sev_common, gpa, + (const struct sev_es_save_area *)ptr, + errp) < 0) { + return -1; + } + return sev_set_cpu_context(cpu_index, ptr, len, gpa, errp); + + case CGS_PAGE_TYPE_UNMEASURED: + if (sev_snp_enabled()) { + return snp_launch_update_data( + gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_UNMEASURED, errp); + } + /* No action required if not SEV-SNP */ + return 0; + + case CGS_PAGE_TYPE_SECRETS: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure secrets page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_SECRETS, errp); + + case CGS_PAGE_TYPE_REQUIRED_MEMORY: + if (kvm_convert_memory(gpa, len, true) < 0) { + error_setg( + errp, + "%s: failed to configure required memory. gpa: %lX, type: %d", + __func__, gpa, memory_type); + return -1; + } + return 0; + + case CGS_PAGE_TYPE_CPUID: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure CPUID page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_cpuid(gpa, ptr, len, errp); + } + error_setg(errp, "%s: failed to update guest. gpa: %lX, type: %d", __func__, + gpa, memory_type); + return -1; +} + +static int cgs_get_mem_map_entry(int index, + ConfidentialGuestMemoryMapEntry *entry, + Error **errp) +{ + struct e820_entry *table; + int num_entries; + + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 1; + } + + num_entries = e820_get_table(&table); + if ((index < 0) || (index >= num_entries)) { + return 1; + } + entry->gpa = table[index].address; + entry->size = table[index].length; + switch (table[index].type) { + case E820_RAM: + entry->type = CGS_MEM_RAM; + break; + case E820_RESERVED: + entry->type = CGS_MEM_RESERVED; + break; + case E820_ACPI: + entry->type = CGS_MEM_ACPI; + break; + case E820_NVS: + entry->type = CGS_MEM_NVS; + break; + case E820_UNUSABLE: + entry->type = CGS_MEM_UNUSABLE; + break; + } + return 0; +} + +static int cgs_set_guest_policy(ConfidentialGuestPolicyType policy_type, + uint64_t policy, void *policy_data1, + uint32_t policy_data1_size, void *policy_data2, + uint32_t policy_data2_size, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 0; + } + + if (policy_type != GUEST_POLICY_SEV) { + error_setg(errp, "%s: Invalid guest policy type provided for SEV: %d", + __func__, policy_type); + return -1; + } + /* + * SEV-SNP handles policy differently. The policy flags are defined in + * kvm_start_conf.policy and an ID block and ID auth can be provided. + */ + if (sev_snp_enabled()) { + SevSnpGuestState *sev_snp_guest = + SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs); + struct kvm_sev_snp_launch_finish *finish = + &sev_snp_guest->kvm_finish_conf; + + /* + * The policy consists of flags in 'policy' and optionally an ID block + * and ID auth in policy_data1 and policy_data2 respectively. The ID + * block and auth are optional so clear any previous ID block and auth + * and set them if provided, but always set the policy flags. + */ + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + sev_snp_guest->id_block = NULL; + finish->id_block_uaddr = 0; + sev_snp_guest->id_auth = NULL; + finish->id_auth_uaddr = 0; + + if (policy_data1_size > 0) { + struct sev_snp_id_authentication *id_auth = + (struct sev_snp_id_authentication *)policy_data2; + + if (policy_data1_size != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "%s: Invalid SEV-SNP ID block: incorrect size", + __func__); + return -1; + } + if (policy_data2_size != KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, + "%s: Invalid SEV-SNP ID auth block: incorrect size", + __func__); + return -1; + } + assert(policy_data1 != NULL); + assert(policy_data2 != NULL); + + finish->id_block_uaddr = + (__u64)g_memdup2(policy_data1, KVM_SEV_SNP_ID_BLOCK_SIZE); + finish->id_auth_uaddr = + (__u64)g_memdup2(policy_data2, KVM_SEV_SNP_ID_AUTH_SIZE); + + /* + * Check if an author key has been provided and use that to flag + * whether the author key is enabled. The first of the author key + * must be non-zero to indicate the key type, which will currently + * always be 2. + */ + sev_snp_guest->kvm_finish_conf.auth_key_en = + id_auth->author_key[0] ? 1 : 0; + finish->id_block_en = 1; + } + sev_snp_guest->kvm_start_conf.policy = policy; + } else { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + /* Only the policy flags are supported for SEV and SEV-ES */ + if ((policy_data1_size > 0) || (policy_data2_size > 0) || !sev_guest) { + error_setg(errp, "%s: An ID block/ID auth block has been provided " + "but SEV-SNP is not enabled", __func__); + return -1; + } + sev_guest->policy = policy; + } + return 0; +} + static void -sev_common_class_init(ObjectClass *oc, void *data) +sev_common_class_init(ObjectClass *oc, const void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); @@ -2067,6 +2746,8 @@ static void sev_common_instance_init(Object *obj) { SevCommonState *sev_common = SEV_COMMON(obj); + ConfidentialGuestSupportClass *cgs = + CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(obj); sev_common->kvm_type = -1; @@ -2077,6 +2758,12 @@ sev_common_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev_common->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + cgs->check_support = cgs_check_support; + cgs->set_guest_state = cgs_set_guest_state; + cgs->get_mem_map_entry = cgs_get_mem_map_entry; + cgs->set_guest_policy = cgs_set_guest_policy; + + QTAILQ_INIT(&sev_common->launch_vmsa); } /* sev guest info common to sev/sev-es/sev-snp */ @@ -2088,7 +2775,7 @@ static const TypeInfo sev_common_info = { .class_size = sizeof(SevCommonStateClass), .class_init = sev_common_class_init, .abstract = true, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } } @@ -2140,7 +2827,7 @@ static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, } static void -sev_guest_class_init(ObjectClass *oc, void *data) +sev_guest_class_init(ObjectClass *oc, const void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -2394,7 +3081,7 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) } static void -sev_snp_guest_class_init(ObjectClass *oc, void *data) +sev_snp_guest_class_init(ObjectClass *oc, const void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -2404,7 +3091,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", diff --git a/target/i386/sev.h b/target/i386/sev.h index 373669e..9db1a80 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -44,6 +44,8 @@ bool sev_snp_enabled(void); #define SEV_SNP_POLICY_SMT 0x10000 #define SEV_SNP_POLICY_DBG 0x80000 +#define SVM_SEV_FEAT_SNP_ACTIVE 1 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; @@ -55,6 +57,128 @@ typedef struct SevKernelLoaderContext { size_t cmdline_size; } SevKernelLoaderContext; +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct QEMU_PACKED sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint64_t vmpl0_ssp; + uint64_t vmpl1_ssp; + uint64_t vmpl2_ssp; + uint64_t vmpl3_ssp; + uint64_t u_cet; + uint8_t reserved_0xc8[2]; + uint8_t vmpl; + uint8_t cpl; + uint8_t reserved_0xcc[4]; + uint64_t efer; + uint8_t reserved_0xd8[104]; + uint64_t xss; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr0_addr_mask; + uint64_t dr1_addr_mask; + uint64_t dr2_addr_mask; + uint64_t dr3_addr_mask; + uint8_t reserved_0x1c0[24]; + uint64_t rsp; + uint64_t s_cet; + uint64_t ssp; + uint64_t isst_addr; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_0x248[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; + uint8_t reserved_0x298[80]; + uint32_t pkru; + uint32_t tsc_aux; + uint8_t reserved_0x2f0[24]; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t reserved_0x320; /* rsp already available at 0x01d8 */ + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint8_t reserved_0x380[16]; + uint64_t guest_exit_info_1; + uint64_t guest_exit_info_2; + uint64_t guest_exit_int_info; + uint64_t guest_nrip; + uint64_t sev_features; + uint64_t vintr_ctrl; + uint64_t guest_exit_code; + uint64_t virtual_tom; + uint64_t tlb_id; + uint64_t pcpu_id; + uint64_t event_inj; + uint64_t xcr0; + uint8_t reserved_0x3f0[16]; + + /* Floating point area */ + uint64_t x87_dp; + uint32_t mxcsr; + uint16_t x87_ftw; + uint16_t x87_fsw; + uint16_t x87_fcw; + uint16_t x87_fop; + uint16_t x87_ds; + uint16_t x87_cs; + uint64_t x87_rip; + uint8_t fpreg_x87[80]; + uint8_t fpreg_xmm[256]; + uint8_t fpreg_ymm[256]; +}; + +struct QEMU_PACKED sev_snp_id_authentication { + uint32_t id_key_alg; + uint32_t auth_key_algo; + uint8_t reserved[56]; + uint8_t id_block_sig[512]; + uint8_t id_key[1028]; + uint8_t reserved2[60]; + uint8_t id_key_sig[512]; + uint8_t author_key[1028]; + uint8_t reserved3[892]; +}; + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c index e68b73a..97e3f0e 100644 --- a/target/i386/tcg/access.c +++ b/target/i386/tcg/access.c @@ -3,8 +3,9 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" #include "access.h" diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index 9aff16b..d8fd976 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -44,18 +44,32 @@ /* dynamic flags computation */ -static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries) { - uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2 = dst - src1; + uint32_t af_cf, pf, zf, sf, of; - cf = dst < src1; + /* PF, ZF, SF computed from result. */ pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + + /* + * AF, CF, OF computed from carry out vector. To compute AF and CF, rotate it + * left by one so cout(DATA_BITS - 1) is in bit 0 and cout(3) in bit 4. + * + * To compute OF, place the highest two carry bits into OF and the bit + * immediately to the right of it; then, adding CC_O / 2 XORs them. + */ + af_cf = ((carries << 1) | (carries >> (DATA_BITS - 1))) & (CC_A | CC_C); + of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O; + return pf + zf + sf + af_cf + of; +} + +static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +{ + DATA_TYPE src2 = dst - src1; + DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -66,25 +80,9 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src3) { - uint32_t cf, pf, af, zf, sf, of; - -#ifdef WIDER_TYPE - WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3; - DATA_TYPE src2 = dst - src13; - - cf = dst < src13; -#else DATA_TYPE src2 = dst - src1 - src3; - - cf = (src3 ? dst <= src1 : dst < src1); -#endif - - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & 0x10; - zf = (dst == 0) << 6; - sf = lshift(dst, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, @@ -101,16 +99,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src1 = dst + src2; - - cf = src1 < src2; - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; - zf = (dst == 0) * CC_Z; - sf = lshift(dst, 8 - DATA_BITS) & CC_S; - of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) @@ -123,25 +114,9 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src3) { - uint32_t cf, pf, af, zf, sf, of; - -#ifdef WIDER_TYPE - WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3; - DATA_TYPE src1 = dst + src23; - - cf = src1 < src23; -#else DATA_TYPE src1 = dst + src2 + src3; - - cf = (src3 ? src1 <= src2 : src1 < src2); -#endif - - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & 0x10; - zf = (dst == 0) << 6; - sf = lshift(dst, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, @@ -175,13 +150,10 @@ static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2; cf = src1; - src1 = dst - 1; - src2 = 1; pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; + af = (dst ^ (dst - 1)) & CC_A; /* bits 0..3 are all clear */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK) * CC_O; @@ -191,13 +163,10 @@ static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2; cf = src1; - src1 = dst + 1; - src2 = 1; pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; + af = (dst ^ (dst + 1)) & CC_A; /* bits 0..3 are all set */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK - 1) * CC_O; @@ -292,6 +261,5 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) #undef DATA_BITS #undef SIGN_MASK #undef DATA_TYPE -#undef DATA_MASK #undef SUFFIX #undef WIDER_TYPE diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index cda32ee..a50f57d 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -878,10 +878,10 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), - [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), + [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66), [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), - [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66), [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), /* Listed incorrectly as type 4 */ @@ -1541,7 +1541,7 @@ static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p), [0x0c] = X86_OP_ENTRYr(JMP_m, E,f64, zextT0), [0x0d] = X86_OP_ENTRYr(JMPF_m, M,p), - [0x0e] = X86_OP_ENTRYr(PUSH, E,f64), + [0x0e] = X86_OP_ENTRYr(PUSH, E,d64), }; int w = (*b & 1); @@ -2542,7 +2542,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) s->has_modrm = false; s->prefix = 0; - next_byte: + next_byte:; +#ifdef TARGET_X86_64 + /* clear any REX prefix followed by other prefixes. */ + int rex; + rex = -1; + next_byte_rex: +#endif b = x86_ldub_code(env, s); /* Collect prefixes. */ @@ -2585,13 +2591,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu) #ifdef TARGET_X86_64 case 0x40 ... 0x4f: if (CODE64(s)) { - /* REX prefix */ - s->prefix |= PREFIX_REX; - s->vex_w = (b >> 3) & 1; - s->rex_r = (b & 0x4) << 1; - s->rex_x = (b & 0x2) << 2; - s->rex_b = (b & 0x1) << 3; - goto next_byte; + /* + * REX prefix; ignored unless it is the last prefix, so + * for now just stash it + */ + rex = b; + goto next_byte_rex; } break; #endif @@ -2618,10 +2623,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ - | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { + | PREFIX_LOCK | PREFIX_DATA)) { goto illegal_op; } #ifdef TARGET_X86_64 + if (rex != -1) { + goto illegal_op; + } s->rex_r = (~vex2 >> 4) & 8; #endif if (b == 0xc5) { @@ -2661,6 +2669,16 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* Post-process prefixes. */ if (CODE64(s)) { +#ifdef TARGET_X86_64 + if (rex != -1) { + s->prefix |= PREFIX_REX; + s->vex_w = (rex >> 3) & 1; + s->rex_r = (rex & 0x4) << 1; + s->rex_x = (rex & 0x2) << 2; + s->rex_b = (rex & 0x1) << 3; + } +#endif + /* * In 64-bit mode, the default data size is 32-bit. Select 64-bit * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence @@ -2704,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.check & X86_CHECK_i64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) { goto illegal_op; } } else { if (decode.e.check & X86_CHECK_o64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) { goto illegal_op; } } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664..1a7fab93 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv break; case X86_OP_SEG: /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ - gen_movl_seg(s, op->n, s->T0); + gen_movl_seg(s, op->n, v, op->n == R_SS); break; case X86_OP_INT: if (op->has_ea) { @@ -1170,11 +1160,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode); static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_ADC by transforming as follows: + * CC_ADC: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_ADD: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_ADD(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, c_in, s->T1); @@ -1693,22 +1700,22 @@ static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode) switch (jcc_op) { case JCC_O: /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); - tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); - tcg_gen_and_tl(s->tmp0, s->tmp0, newv); - tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + tcg_gen_xor_tl(cmp_lhs, s->cc_srcT, cmpv); + tcg_gen_and_tl(cmp_lhs, cmp_lhs, newv); + tcg_gen_sextract_tl(cmp_lhs, cmp_lhs, 0, 8 << ot); break; case JCC_P: - tcg_gen_ext8u_tl(s->tmp0, s->T0); - tcg_gen_ctpop_tl(s->tmp0, s->tmp0); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(1); + tcg_gen_ext8u_tl(cmp_lhs, s->T0); + tcg_gen_ctpop_tl(cmp_lhs, cmp_lhs); break; case JCC_S: - tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); + tcg_gen_sextract_tl(cmp_lhs, s->T0, 0, 8 << ot); break; default: @@ -1796,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) { #ifdef TARGET_X86_64 - MemOp mop = MO_TE | MO_128 | MO_ALIGN; + MemOp mop = MO_LE | MO_128 | MO_ALIGN; TCGv_i64 t0, t1; TCGv_i128 cmp, val; @@ -1853,13 +1860,13 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) /* Only require atomic with LOCK; non-parallel handled in generator. */ if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ); } else { tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); + s->mem_index, MO_LEUQ); } - /* Set tmp0 to match the required value of Z. */ + /* Compute the required value of Z. */ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); Z = tcg_temp_new(); tcg_gen_trunc_i64_tl(Z, cmp); @@ -1899,9 +1906,10 @@ static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_crc32(s->T0, tmp, s->T1, tcg_constant_i32(8 << ot)); } static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode) @@ -2359,8 +2367,10 @@ static void gen_LAR(DisasContext *s, X86DecodedInsn *decode) static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_ldmxcsr(tcg_env, tmp); } static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) @@ -2372,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) gen_op_ld_v(s, MO_16, s->T1, s->A0); /* load the segment here to handle exceptions properly */ - gen_movl_seg(s, seg, s->T1); + gen_movl_seg(s, seg, s->T1, false); } static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) @@ -2573,11 +2583,13 @@ static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode) static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode) { typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; + TCGv_i32 tmp = tcg_temp_new_i32(); + ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; fn = s->prefix & PREFIX_DATA ? pd : ps; - fn(s->tmp2_i32, tcg_env, OP_PTR2); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + fn(tmp, tcg_env, OP_PTR2); + tcg_gen_extu_i32_tl(s->T0, tmp); } static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode) @@ -2674,13 +2686,17 @@ static void gen_MULX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); - tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); - break; + { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t0, s->T0); + tcg_gen_trunc_tl_i32(t1, s->T1); + tcg_gen_mulu2_i32(t0, t1, t0, t1); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], t0); + tcg_gen_extu_i32_tl(s->T0, t1); + break; + } case MO_64: #endif @@ -2997,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by @@ -3724,10 +3740,14 @@ static void gen_RORX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - break; + { + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + tcg_gen_rotri_i32(tmp, tmp, b); + tcg_gen_extu_i32_tl(s->T0, tmp); + break; + } case MO_64: #endif @@ -3830,22 +3850,64 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode); static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_SBB by transforming as follows: + * CC_SBB: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_SUB: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_SUB(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); + + /* + * Here the change is as follows: + * CC_SBB: src1 = T0, src2 = T0, src3 = c_in + * CC_SUB: src1 = 0, src2 = c_in (no src3) + * + * The difference also does not matter: + * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases + * therefore AF comes straight from dst (in fact it is c_in) + * - for OF, src1 and src2 have the same sign in both cases, meaning there + * can be no overflow + */ + if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) { + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_neg_tl(s->T0, c_in); + } else { + /* + * Do not negate c_in because it will often be dead and only the + * instruction generated by negsetcond will survive. + */ + gen_neg_setcc(s, JCC_B << 1, s->T0); + } + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_src = c_in; + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_SUBB + ot; + return; + } + if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, s->T1, c_in); tcg_gen_neg_tl(s->T0, s->T0); tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - /* - * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by - * negsetcond, and CC_OP_SUBB as the cc_op. - */ tcg_gen_sub_tl(s->T0, s->T0, s->T1); tcg_gen_sub_tl(s->T0, s->T0, c_in); } @@ -3956,8 +4018,7 @@ static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, false, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, false, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); } else { @@ -4009,8 +4070,7 @@ static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, true, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, true, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); } else { @@ -4277,7 +4337,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode) } return; } - in = s->tmp2_i32; + in = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(in, s->T1); #else in = s->T1; @@ -4307,7 +4367,7 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode, return; } - out = s->tmp2_i32; + out = tcg_temp_new_i32(); #else out = s->T0; #endif @@ -4359,7 +4419,7 @@ static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode) gen_pextr(s, decode, MO_32); } -static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) +static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode, TCGv_i32 tmp) { int val = decode->immediate; int dest_word = (val >> 4) & 3; @@ -4376,7 +4436,7 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) } if (new_mask != (val & 15)) { - tcg_gen_st_i32(s->tmp2_i32, tcg_env, + tcg_gen_st_i32(tmp, tcg_env, vector_elem_offset(&decode->op[0], MO_32, dest_word)); } @@ -4395,15 +4455,19 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode) { int val = decode->immediate; - tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, tcg_env, vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); - gen_vinsertps(s, decode); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_vinsertps(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode) @@ -4524,25 +4588,29 @@ static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode) static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_qemu_st_i32(tmp, s->A0, s->mem_index, MO_LEUL); } static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode) diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index de71e68..6fb8036 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/log.h" #include "system/runstate.h" #include "exec/helper-proto.h" diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index c1184ca..b3b2382 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "tcg-cpu.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" @@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env) set_float_default_nan_pattern(0b11000000, &env->mmx_status); set_float_default_nan_pattern(0b11000000, &env->sse_status); /* - * TODO: x86 does flush-to-zero detection after rounding (the SDM + * x86 does flush-to-zero detection after rounding (the SDM * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush * when we detect underflow, which x86 does after rounding). */ - set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status); } -static inline uint8_t save_exception_flags(CPUX86State *env) +static inline int save_exception_flags(CPUX86State *env) { - uint8_t old_flags = get_float_exception_flags(&env->fp_status); + int old_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); return old_flags; } -static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +static void merge_exception_flags(CPUX86State *env, int old_flags) { - uint8_t new_flags = get_float_exception_flags(&env->fp_status); + int new_flags = get_float_exception_flags(&env->fp_status); float_raise(old_flags, &env->fp_status); fpu_set_exception(env, ((new_flags & float_flag_invalid ? FPUS_IE : 0) | @@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) (new_flags & float_flag_overflow ? FPUS_OE : 0) | (new_flags & float_flag_underflow ? FPUS_UE : 0) | (new_flags & float_flag_inexact ? FPUS_PE : 0) | - (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); + (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0))); } static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); floatx80 ret = floatx80_div(a, b, &env->fp_status); merge_exception_flags(env, old_flags); return ret; @@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) void helper_fldl_FT0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) void helper_fldl_ST0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env) uint64_t helper_fstl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env) int32_t helper_fist_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env) int32_t helper_fistl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env) int64_t helper_fistll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64(ST0, &env->fp_status); @@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env) int32_t helper_fistt_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env) int32_t helper_fisttl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env) int64_t helper_fisttll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); @@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); @@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env) void helper_fucom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); @@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) void helper_fucomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) void helper_fadd_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); merge_exception_flags(env, old_flags); } @@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); merge_exception_flags(env, old_flags); } @@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; @@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = { void helper_f2xm1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t sig = extractFloatx80Frac(ST0); int32_t exp = extractFloatx80Exp(ST0); bool sign = extractFloatx80Sign(ST0); @@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = { void helper_fpatan(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env) void helper_fxtract(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; @@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env) static void helper_fprem_common(CPUX86State *env, bool mod) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t quotient; CPU_LDoubleU temp0, temp1; int exp0, exp1, expdiff; @@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, void helper_fyl2xp1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env) void helper_fyl2x(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env) void helper_fsqrt(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; @@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_invalid_encoding(ST1, &env->fp_status) || floatx80_invalid_encoding(ST0, &env->fp_status)) { float_raise(float_flag_invalid, &env->fp_status); @@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env) } else { int n; FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; - uint8_t save_flags = get_float_exception_flags(&env->fp_status); + int save_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); set_float_exception_flags(save_flags, &env->fp_status); @@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env) /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) | (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | (mxcsr & FPUS_OE ? float_flag_overflow : 0) | (mxcsr & FPUS_UE ? float_flag_underflow : 0) | @@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env) void update_mxcsr_from_sse_status(CPUX86State *env) { - uint8_t flags = get_float_exception_flags(&env->sse_status); - /* - * The MXCSR denormal flag has opposite semantics to - * float_flag_input_denormal_flushed (the softfloat code sets that flag - * only when flushing input denormals to zero, but SSE sets it - * only when not flushing them to zero), so is not converted - * here. - */ + int flags = get_float_exception_flags(&env->sse_status); env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_input_denormal_used ? FPUS_DE : 0) | (flags & float_flag_divbyzero ? FPUS_ZE : 0) | (flags & float_flag_overflow ? FPUS_OE : 0) | (flags & float_flag_underflow ? FPUS_UE : 0) | diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 54d8453..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -20,7 +20,6 @@ #ifndef I386_HELPER_TCG_H #define I386_HELPER_TCG_H -#include "exec/exec-all.h" #include "qemu/host-utils.h" /* Maximum instruction code size */ @@ -98,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -116,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 1a02e9d..46741d9 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "qapi/error.h" diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c index 3ef84e9..9e7c2d8 100644 --- a/target/i386/tcg/mem_helper.c +++ b/target/i386/tcg/mem_helper.c @@ -20,8 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "tcg/tcg.h" diff --git a/target/i386/tcg/mpx_helper.c b/target/i386/tcg/mpx_helper.c index 22423eed..fa8abcc 100644 --- a/target/i386/tcg/mpx_helper.c +++ b/target/i386/tcg/mpx_helper.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "exec/target_page.h" #include "helper-tcg.h" diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 7196211..f49fe85 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -22,12 +22,13 @@ #include "cpu.h" #include "qemu/log.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/log.h" #include "helper-tcg.h" #include "seg_helper.h" #include "access.h" +#include "tcg-cpu.h" #ifdef TARGET_X86_64 #define SET_ESP(val, sp_mask) \ @@ -128,6 +129,22 @@ int get_pg_mode(CPUX86State *env) return pg_mode; } +static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; + int mmu_index_base = + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (pl < 3 && (env->eflags & AC_MASK) + ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX); + + return mmu_index_base + mmu_index_32; +} + +int cpu_mmu_index_kernel(CPUX86State *env) +{ + return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK); +} + /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, uint32_t *e2_ptr, int selector, @@ -309,10 +326,10 @@ static void tss_set_busy(CPUX86State *env, int tss_selector, bool value, #define SWITCH_TSS_IRET 1 #define SWITCH_TSS_CALL 2 -/* return 0 if switching to a 16-bit selector */ -static int switch_tss_ra(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip, uintptr_t retaddr) +static void switch_tss_ra(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + uint32_t error_code, uintptr_t retaddr) { int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, i; target_ulong tss_base; @@ -439,7 +456,7 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, new_segs[i] = access_ldw(&new, tss_base + (0x48 + i * 4)); } new_ldt = access_ldw(&new, tss_base + 0x60); - new_trap = access_ldl(&new, tss_base + 0x64); + new_trap = access_ldw(&new, tss_base + 0x64) & 1; } else { /* 16 bit */ new_cr3 = 0; @@ -456,10 +473,6 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, new_segs[R_GS] = 0; new_trap = 0; } - /* XXX: avoid a compiler warning, see - http://support.amd.com/us/Processor_TechDocs/24593.pdf - chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */ - (void)new_trap; /* clear busy bit (it is restartable) */ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { @@ -582,14 +595,43 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK); } #endif - return type >> 3; + + if (has_error_code) { + int cpl = env->hflags & HF_CPL_MASK; + StackAccess sa; + + /* push the error code */ + sa.env = env; + sa.ra = retaddr; + sa.mmu_index = x86_mmu_index_pl(env, cpl); + sa.sp = env->regs[R_ESP]; + if (env->segs[R_SS].flags & DESC_B_MASK) { + sa.sp_mask = 0xffffffff; + } else { + sa.sp_mask = 0xffff; + } + sa.ss_base = env->segs[R_SS].base; + if (type & 8) { + pushl(&sa, error_code); + } else { + pushw(&sa, error_code); + } + SET_ESP(sa.sp, sa.sp_mask); + } + + if (new_trap) { + env->dr[6] |= DR6_BT; + raise_exception_ra(env, EXCP01_DB, retaddr); + } } -static int switch_tss(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip) +static void switch_tss(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + int error_code) { - return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0); + switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, + has_error_code, error_code, 0); } static inline unsigned int get_sp_mask(unsigned int e2) @@ -702,25 +744,8 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, if (!(e2 & DESC_P_MASK)) { raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); } - shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); - if (has_error_code) { - /* push the error code on the destination stack */ - cpl = env->hflags & HF_CPL_MASK; - sa.mmu_index = x86_mmu_index_pl(env, cpl); - if (env->segs[R_SS].flags & DESC_B_MASK) { - sa.sp_mask = 0xffffffff; - } else { - sa.sp_mask = 0xffff; - } - sa.sp = env->regs[R_ESP]; - sa.ss_base = env->segs[R_SS].base; - if (shift) { - pushl(&sa, error_code); - } else { - pushw(&sa, error_code); - } - SET_ESP(sa.sp, sa.sp_mask); - } + switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip, + has_error_code, error_code); return; } @@ -1516,7 +1541,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, + false, 0, GETPC()); break; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -1728,7 +1754,8 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, + false, 0, GETPC()); return; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -2239,7 +2266,8 @@ void helper_iret_protected(CPUX86State *env, int shift, int next_eip) if (type != 3) { raise_exception_err_ra(env, EXCP0A_TSS, tss_selector & 0xfffc, GETPC()); } - switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, GETPC()); + switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, + false, 0, GETPC()); } else { helper_ret_protected(env, shift, 1, 0, GETPC()); } diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h index ebf1035..ea98e1a 100644 --- a/target/i386/tcg/seg_helper.h +++ b/target/i386/tcg/seg_helper.h @@ -20,6 +20,8 @@ #ifndef SEG_HELPER_H #define SEG_HELPER_H +#include "cpu.h" + //#define DEBUG_PCALL #ifdef DEBUG_PCALL @@ -31,12 +33,12 @@ # define LOG_PCALL_STATE(cpu) do { } while (0) #endif +int cpu_mmu_index_kernel(CPUX86State *env); + /* * TODO: Convert callers to compute cpu_mmu_index_kernel once * and use *_mmuidx_ra directly. */ -#define cpu_ldub_kernel_ra(e, p, r) \ - cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_lduw_kernel_ra(e, p, r) \ cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_ldl_kernel_ra(e, p, r) \ @@ -44,8 +46,6 @@ #define cpu_ldq_kernel_ra(e, p, r) \ cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) -#define cpu_stb_kernel_ra(e, p, v, r) \ - cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stw_kernel_ra(e, p, v, r) \ cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stl_kernel_ra(e, p, v, r) \ @@ -53,12 +53,10 @@ #define cpu_stq_kernel_ra(e, p, v, r) \ cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) -#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0) #define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0) #define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0) #define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0) -#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0) #define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0) #define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0) #define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0) diff --git a/target/i386/tcg/system/bpt_helper.c b/target/i386/tcg/system/bpt_helper.c index be232c1..aebb5ca 100644 --- a/target/i386/tcg/system/bpt_helper.c +++ b/target/i386/tcg/system/bpt_helper.c @@ -19,8 +19,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "exec/watchpoint.h" #include "tcg/helper-tcg.h" diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c index 6876329..f622b5d 100644 --- a/target/i386/tcg/system/excp_helper.c +++ b/target/i386/tcg/system/excp_helper.c @@ -19,9 +19,12 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "tcg/helper-tcg.h" typedef struct TranslateParams { @@ -589,7 +592,8 @@ static bool get_physical_address(CPUX86State *env, vaddr addr, if (sext != 0 && sext != -1) { *err = (TranslateFault){ .exception_index = EXCP0D_GPF, - .cr2 = addr, + /* non-canonical #GP doesn't change CR2 */ + .cr2 = env->cr[2], }; return false; } diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c index ce18c75..9c3f5cc 100644 --- a/target/i386/tcg/system/misc_helper.c +++ b/target/i386/tcg/system/misc_helper.c @@ -21,8 +21,9 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/address-spaces.h" +#include "accel/tcg/cpu-ldst.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "exec/cputlb.h" #include "tcg/helper-tcg.h" #include "hw/i386/apic.h" diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c index b07cc9f..8c7856b 100644 --- a/target/i386/tcg/system/seg_helper.c +++ b/target/i386/tcg/system/seg_helper.c @@ -23,7 +23,7 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "../seg_helper.h" @@ -133,7 +133,7 @@ bool x86_cpu_exec_halt(CPUState *cpu) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); @@ -178,31 +178,31 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) */ switch (interrupt_request) { case CPU_INTERRUPT_POLL: - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); break; case CPU_INTERRUPT_SIPI: + cpu_reset_interrupt(cs, CPU_INTERRUPT_SIPI); do_cpu_sipi(cpu); break; case CPU_INTERRUPT_SMI: cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_SMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_SMI); do_smm_enter(cpu); break; case CPU_INTERRUPT_NMI: cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_NMI); env->hflags2 |= HF2_NMI_MASK; do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); break; case CPU_INTERRUPT_MCE: - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_MCE); do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); break; case CPU_INTERRUPT_HARD: cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); - cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | - CPU_INTERRUPT_VIRQ); + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ); intno = cpu_get_pic_interrupt(env); qemu_log_mask(CPU_LOG_INT, "Servicing hardware INT=0x%02x\n", intno); @@ -215,7 +215,7 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) qemu_log_mask(CPU_LOG_INT, "Servicing virtual hardware INT=0x%02x\n", intno); do_interrupt_x86_hardirq(env, intno, 1); - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); env->int_ctl &= ~V_IRQ_MASK; break; } diff --git a/target/i386/tcg/system/smm_helper.c b/target/i386/tcg/system/smm_helper.c index 251eb78..fb028a8 100644 --- a/target/i386/tcg/system/smm_helper.c +++ b/target/i386/tcg/system/smm_helper.c @@ -168,7 +168,7 @@ void do_smm_enter(X86CPU *cpu) env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK)); cpu_x86_update_cr4(env, 0); - env->dr[7] = 0x00000400; + helper_set_dr(env, 7, 0x00000400); cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, @@ -233,8 +233,8 @@ void helper_rsm(CPUX86State *env) env->eip = x86_ldq_phys(cs, sm_state + 0x7f78); cpu_load_eflags(env, x86_ldl_phys(cs, sm_state + 0x7f70), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7f68); - env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7f60); + helper_set_dr(env, 6, x86_ldl_phys(cs, sm_state + 0x7f68)); + helper_set_dr(env, 7, x86_ldl_phys(cs, sm_state + 0x7f60)); cpu_x86_update_cr4(env, x86_ldl_phys(cs, sm_state + 0x7f48)); cpu_x86_update_cr3(env, x86_ldq_phys(cs, sm_state + 0x7f50)); @@ -268,8 +268,8 @@ void helper_rsm(CPUX86State *env) env->regs[R_EDX] = x86_ldl_phys(cs, sm_state + 0x7fd8); env->regs[R_ECX] = x86_ldl_phys(cs, sm_state + 0x7fd4); env->regs[R_EAX] = x86_ldl_phys(cs, sm_state + 0x7fd0); - env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7fcc); - env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7fc8); + helper_set_dr(env, 6, x86_ldl_phys(cs, sm_state + 0x7fcc)); + helper_set_dr(env, 7, x86_ldl_phys(cs, sm_state + 0x7fc8)); env->tr.selector = x86_ldl_phys(cs, sm_state + 0x7fc4) & 0xffff; env->tr.base = x86_ldl_phys(cs, sm_state + 0x7f64); diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c index f9982b7..505788b 100644 --- a/target/i386/tcg/system/svm_helper.c +++ b/target/i386/tcg/system/svm_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" /* Secure Virtual Machine helpers */ @@ -49,7 +49,7 @@ static void svm_save_seg(CPUX86State *env, int mmu_idx, hwaddr addr, static inline void svm_canonicalization(CPUX86State *env, target_ulong *seg_base) { uint16_t shift_amt = 64 - cpu_x86_virtual_addr_width(env); - *seg_base = ((((long) *seg_base) << shift_amt) >> shift_amt); + *seg_base = (((int64_t) *seg_base) << shift_amt) >> shift_amt; } static void svm_load_seg(CPUX86State *env, int mmu_idx, hwaddr addr, @@ -403,7 +403,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->hflags2 |= HF2_GIF_MASK; if (ctl_has_irq(env)) { - cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + cpu_set_interrupt(cs, CPU_INTERRUPT_VIRQ); } if (virtual_gif_set(env)) { @@ -824,7 +824,7 @@ void do_vmexit(CPUX86State *env) env->intercept_exceptions = 0; /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); env->int_ctl = 0; /* Clears the TSC_OFFSET inside the processor. */ diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c index 13a3507..7255862 100644 --- a/target/i386/tcg/system/tcg-cpu.c +++ b/target/i386/tcg/system/tcg-cpu.c @@ -23,7 +23,8 @@ #include "system/system.h" #include "qemu/units.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "tcg/tcg-cpu.h" @@ -73,8 +74,8 @@ bool tcg_cpu_realizefn(CPUState *cs, Error **errp) memory_region_set_enabled(cpu->cpu_as_mem, true); cs->num_ases = 2; - cpu_address_space_init(cs, 0, "cpu-memory", cs->memory); - cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root); + cpu_address_space_init(cs, X86ASIdx_MEM, "cpu-memory", cs->memory); + cpu_address_space_init(cs, X86ASIdx_SMM, "cpu-smm", cpu->cpu_as_root); /* ... SMRAM with higher priority, linked from /machine/smram. */ cpu->machine_done.notify = tcg_cpu_machine_done; diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index b8aff82..6f5dc06 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -23,7 +23,8 @@ #include "qemu/accel.h" #include "accel/accel-cpu-target.h" #include "exec/translation-block.h" - +#include "exec/target_page.h" +#include "accel/tcg/cpu-ops.h" #include "tcg-cpu.h" /* Frob eflags into and out of the CPU temporary format. */ @@ -47,6 +48,25 @@ static void x86_cpu_exec_exit(CPUState *cs) env->eflags = cpu_compute_eflags(env); } +static TCGTBCPUState x86_get_tb_cpu_state(CPUState *cs) +{ + CPUX86State *env = cpu_env(cs); + uint32_t flags, cs_base; + vaddr pc; + + flags = env->hflags | + (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); + if (env->hflags & HF_CS64_MASK) { + cs_base = 0; + pc = env->eip; + } else { + cs_base = env->segs[R_CS].base; + pc = (uint32_t)(cs_base + env->eip); + } + + return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base }; +} + static void x86_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -94,6 +114,23 @@ static void x86_restore_state_to_opc(CPUState *cs, } } +int x86_mmu_index_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; + int mmu_index_base = + pl == 3 ? MMU_USER64_IDX : + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; +} + +static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPUX86State *env = cpu_env(cs); + return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK); +} + #ifndef CONFIG_USER_ONLY static bool x86_debug_check_breakpoint(CPUState *cs) { @@ -103,15 +140,36 @@ static bool x86_debug_check_breakpoint(CPUState *cs) /* RF disables all architectural breakpoints. */ return !(env->eflags & RF_MASK); } -#endif -#include "accel/tcg/cpu-ops.h" +static void x86_cpu_exec_reset(CPUState *cs) +{ + CPUArchState *env = cpu_env(cs); + + cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); + do_cpu_init(env_archcpu(env)); + cs->exception_index = EXCP_HALTED; +} -static const TCGCPUOps x86_tcg_ops = { +static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result; +} +#endif + +const TCGCPUOps x86_tcg_ops = { + .mttcg_supported = true, + .precise_smc = true, + /* + * The x86 has a strong memory model with some store-after-load re-ordering + */ + .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD, .initialize = tcg_x86_init, .translate_code = x86_translate_code, + .get_tb_cpu_state = x86_get_tb_cpu_state, .synchronize_from_tb = x86_cpu_synchronize_from_tb, .restore_state_to_opc = x86_restore_state_to_opc, + .mmu_index = x86_cpu_mmu_index, .cpu_exec_enter = x86_cpu_exec_enter, .cpu_exec_exit = x86_cpu_exec_exit, #ifdef CONFIG_USER_ONLY @@ -120,9 +178,11 @@ static const TCGCPUOps x86_tcg_ops = { .record_sigbus = x86_cpu_record_sigbus, #else .tlb_fill = x86_cpu_tlb_fill, + .pointer_wrap = x86_pointer_wrap, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_halt = x86_cpu_exec_halt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, + .cpu_exec_reset = x86_cpu_exec_reset, .do_unaligned_access = x86_cpu_do_unaligned_access, .debug_excp_handler = breakpoint_handler, .debug_check_breakpoint = x86_debug_check_breakpoint, @@ -130,17 +190,6 @@ static const TCGCPUOps x86_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) -{ - /* for x86, all cpus use the same set of operations */ - cc->tcg_ops = &x86_tcg_ops; -} - -static void x86_tcg_cpu_class_init(CPUClass *cc) -{ - cc->init_accel_cpu = x86_tcg_cpu_init_ops; -} - static void x86_tcg_cpu_xsave_init(void) { #define XO(bit, field) \ @@ -181,7 +230,7 @@ static void x86_tcg_cpu_instance_init(CPUState *cs) x86_tcg_cpu_xsave_init(); } -static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); @@ -189,7 +238,6 @@ static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) acc->cpu_target_realize = tcg_cpu_realizefn; #endif /* CONFIG_USER_ONLY */ - acc->cpu_class_init = x86_tcg_cpu_class_init; acc->cpu_instance_init = x86_tcg_cpu_instance_init; } static const TypeInfo x86_tcg_cpu_accel_type_info = { diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h index 53a8494..85bcd61 100644 --- a/target/i386/tcg/tcg-cpu.h +++ b/target/i386/tcg/tcg-cpu.h @@ -19,6 +19,8 @@ #ifndef TCG_CPU_H #define TCG_CPU_H +#include "cpu.h" + #define XSAVE_FCW_FSW_OFFSET 0x000 #define XSAVE_FTW_FOP_OFFSET 0x004 #define XSAVE_CWD_RIP_OFFSET 0x008 @@ -76,6 +78,10 @@ QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFF QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET); QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET); +extern const TCGCPUOps x86_tcg_ops; + bool tcg_cpu_realizefn(CPUState *cs, Error **errp); +int x86_mmu_index_pl(CPUX86State *env, unsigned pl); + #endif /* TCG_CPU_H */ diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a8935f4..0cb87d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -20,11 +20,12 @@ #include "qemu/host-utils.h" #include "cpu.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/translation-block.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "exec/translator.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "exec/helper-proto.h" @@ -134,10 +135,7 @@ typedef struct DisasContext { TCGv T1; /* TCG local register indexes (only used inside old micro ops) */ - TCGv tmp0; - TCGv tmp4; TCGv_i32 tmp2_i32; - TCGv_i32 tmp3_i32; TCGv_i64 tmp1_i64; sigjmp_buf jmpbuf; @@ -1183,6 +1181,26 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) return cc; } +static void gen_neg_setcc(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_addi_tl(reg, cc.reg, -1); + } else { + tcg_gen_neg_tl(reg, cc.reg); + } + return; + } + + if (cc.use_reg2) { + tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + static void gen_setcc(DisasContext *s, int b, TCGv reg) { CCPrepare cc = gen_prepare_cc(s, b, reg); @@ -1300,30 +1318,35 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift) { + TCGv_i32 port = tcg_temp_new_i32(); + gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ tcg_gen_movi_tl(s->T0, 0); gen_op_st_v(s, ot, s->T0, s->A0); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); - gen_helper_in_func(ot, s->T0, s->tmp2_i32); + tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]); + tcg_gen_andi_i32(port, port, 0xffff); + gen_helper_in_func(ot, s->T0, port); gen_op_st_v(s, ot, s->T0, s->A0); gen_op_add_reg(s, s->aflag, R_EDI, dshift); - gen_bpt_io(s, s->tmp2_i32, ot); + gen_bpt_io(s, port, ot); } static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift) { + TCGv_i32 port = tcg_temp_new_i32(); + TCGv_i32 value = tcg_temp_new_i32(); + gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); - gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); + tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]); + tcg_gen_andi_i32(port, port, 0xffff); + tcg_gen_trunc_tl_i32(value, s->T0); + gen_helper_out_func(ot, port, value); gen_op_add_reg(s, s->aflag, R_ESI, dshift); - gen_bpt_io(s, s->tmp2_i32, ot); + gen_bpt_io(s, port, ot); } #define REP_MAX 65535 @@ -1560,10 +1583,13 @@ static bool check_cpl0(DisasContext *s) } /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, +static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot, bool is_right, TCGv count) { target_ulong mask = (ot == MO_64 ? 63 : 31); + TCGv cc_src = tcg_temp_new(); + TCGv tmp = tcg_temp_new(); + TCGv hishift; switch (ot) { case MO_16: @@ -1571,9 +1597,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A portion by constructing it as a 32-bit value. */ if (is_right) { - tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16); + tcg_gen_deposit_tl(tmp, s->T0, s->T1, 16, 16); tcg_gen_mov_tl(s->T1, s->T0); - tcg_gen_mov_tl(s->T0, s->tmp0); + tcg_gen_mov_tl(s->T0, tmp); } else { tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16); } @@ -1584,47 +1610,52 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, case MO_32: #ifdef TARGET_X86_64 /* Concatenate the two 32-bit values and use a 64-bit shift. */ - tcg_gen_subi_tl(s->tmp0, count, 1); + tcg_gen_subi_tl(tmp, count, 1); if (is_right) { tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1); - tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_i64(cc_src, s->T0, tmp); tcg_gen_shr_i64(s->T0, s->T0, count); } else { tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0); - tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_i64(cc_src, s->T0, tmp); tcg_gen_shl_i64(s->T0, s->T0, count); - tcg_gen_shri_i64(s->tmp0, s->tmp0, 32); + tcg_gen_shri_i64(cc_src, cc_src, 32); tcg_gen_shri_i64(s->T0, s->T0, 32); } break; #endif default: - tcg_gen_subi_tl(s->tmp0, count, 1); + hishift = tcg_temp_new(); + tcg_gen_subi_tl(tmp, count, 1); if (is_right) { - tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_tl(cc_src, s->T0, tmp); - tcg_gen_subfi_tl(s->tmp4, mask + 1, count); + /* mask + 1 - count = mask - tmp = mask ^ tmp */ + tcg_gen_xori_tl(hishift, tmp, mask); tcg_gen_shr_tl(s->T0, s->T0, count); - tcg_gen_shl_tl(s->T1, s->T1, s->tmp4); + tcg_gen_shl_tl(s->T1, s->T1, hishift); } else { - tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_tl(cc_src, s->T0, tmp); + + /* mask + 1 - count = mask - tmp = mask ^ tmp */ + tcg_gen_xori_tl(hishift, tmp, mask); + tcg_gen_shl_tl(s->T0, s->T0, count); + tcg_gen_shr_tl(s->T1, s->T1, hishift); + if (ot == MO_16) { /* Only needed if count > 16, for Intel behaviour. */ - tcg_gen_subfi_tl(s->tmp4, 33, count); - tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4); - tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4); + tcg_gen_shri_tl(tmp, s->T1, 1); + tcg_gen_or_tl(cc_src, cc_src, tmp); } - - tcg_gen_subfi_tl(s->tmp4, mask + 1, count); - tcg_gen_shl_tl(s->T0, s->T0, count); - tcg_gen_shr_tl(s->T1, s->T1, s->tmp4); } - tcg_gen_movi_tl(s->tmp4, 0); - tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4, - s->tmp4, s->T1); + tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, + count, tcg_constant_tl(0), + tcg_constant_tl(0), s->T1); tcg_gen_or_tl(s->T0, s->T0, s->T1); break; } + + return cc_src; } #define X86_MAX_INSN_LENGTH 15 @@ -1843,14 +1874,16 @@ static void gen_bndck(DisasContext *s, X86DecodedInsn *decode, TCGCond cond, TCGv_i64 bndv) { TCGv ea = gen_lea_modrm_1(s, decode->mem, false); + TCGv_i32 t32 = tcg_temp_new_i32(); + TCGv_i64 t64 = tcg_temp_new_i64(); - tcg_gen_extu_tl_i64(s->tmp1_i64, ea); + tcg_gen_extu_tl_i64(t64, ea); if (!CODE64(s)) { - tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64); + tcg_gen_ext32u_i64(t64, t64); } - tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv); - tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64); - gen_helper_bndck(tcg_env, s->tmp2_i32); + tcg_gen_setcond_i64(cond, t64, t64, bndv); + tcg_gen_extrl_i64_i32(t32, t64); + gen_helper_bndck(tcg_env, t32); } /* generate modrm load of memory or register. */ @@ -1992,25 +2025,39 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg) /* move SRC to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src) +static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq) { if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, src); - gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32); - /* abort translation because the addseg value may change or - because ss32 may change. For R_SS, translation must always - stop as a special handling must be done to disable hardware - interrupts for the next instruction */ - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } else if (CODE32(s) && seg_reg < R_FS) { + TCGv_i32 sel = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(sel, src); + gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); + + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_real(s, seg_reg, src); - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } + } + + /* + * For MOV or POP to SS (but not LSS) translation must always + * stop as a special handling must be done to disable hardware + * interrupts for the next instruction. + * + * This is the last instruction, so it's okay to overwrite + * HF_TF_MASK; the next TB will start with the flag set. + * + * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which + * might have been set above. + */ + if (inhibit_irq) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + s->flags &= ~HF_TF_MASK; } } @@ -2148,14 +2195,17 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) level &= 31; if (level != 0) { int i; + if (level > 1) { + TCGv fp = tcg_temp_new(); - /* Copy level-1 pointers from the previous frame. */ - for (i = 1; i < level; ++i) { - gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); - gen_op_ld_v(s, d_ot, s->tmp0, s->A0); + /* Copy level-1 pointers from the previous frame. */ + for (i = 1; i < level; ++i) { + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); + gen_op_ld_v(s, d_ot, fp, s->A0); - gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); - gen_op_st_v(s, d_ot, s->tmp0, s->A0); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); + gen_op_st_v(s, d_ot, fp, s->A0); + } } /* Push the current FrameTemp as the last level. */ @@ -2258,7 +2308,7 @@ gen_eob(DisasContext *s, int mode) if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { + } else if (s->flags & HF_TF_MASK) { gen_helper_single_step(tcg_env); } else if (mode == DISAS_JUMP && /* give irqs a chance to happen */ @@ -2378,10 +2428,11 @@ static void gen_ldy_env_A0(DisasContext *s, int offset, bool align) int mem_index = s->mem_index; TCGv_i128 t0 = tcg_temp_new_i128(); TCGv_i128 t1 = tcg_temp_new_i128(); + TCGv a0_hi = tcg_temp_new(); tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); - tcg_gen_addi_tl(s->tmp0, s->A0, 16); - tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop); + tcg_gen_addi_tl(a0_hi, s->A0, 16); + tcg_gen_qemu_ld_i128(t1, a0_hi, mem_index, mop); tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); @@ -2392,12 +2443,13 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR; int mem_index = s->mem_index; TCGv_i128 t = tcg_temp_new_i128(); + TCGv a0_hi = tcg_temp_new(); tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); - tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_addi_tl(a0_hi, s->A0, 16); tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); - tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); + tcg_gen_qemu_st_i128(t, a0_hi, mem_index, mop); } #include "emit.c.inc" @@ -3601,7 +3653,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode) return; illegal_op: gen_illegal_opcode(s); - return; } #include "decode-new.c.inc" @@ -3744,11 +3795,8 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->T1 = tcg_temp_new(); dc->A0 = tcg_temp_new(); - dc->tmp0 = tcg_temp_new(); dc->tmp1_i64 = tcg_temp_new_i64(); dc->tmp2_i32 = tcg_temp_new_i32(); - dc->tmp3_i32 = tcg_temp_new_i32(); - dc->tmp4 = tcg_temp_new(); dc->cc_srcT = tcg_temp_new(); } diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c index b3bdb78..98fab4cb 100644 --- a/target/i386/tcg/user/excp_helper.c +++ b/target/i386/tcg/user/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/helper-tcg.h" void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c index c45f2ac..263f599 100644 --- a/target/i386/tcg/user/seg_helper.c +++ b/target/i386/tcg/user/seg_helper.c @@ -21,8 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "tcg/seg_helper.h" diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index 81fdd06..f758861 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "system/kvm_int.h" #include "qemu/main-loop.h" -#include "system/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/cpus.h" #include "qemu/guest-random.h" @@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { + qemu_process_cpu_events(cpu); + if (cpu_can_run(cpu)) { r = whpx_vcpu_exec(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); } } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait_bql(cpu->halt_cond); - } - qemu_wait_io_event_common(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); whpx_destroy_vcpu(cpu); @@ -83,19 +81,19 @@ static bool whpx_vcpu_thread_is_idle(CPUState *cpu) return !whpx_apic_in_platform(); } -static void whpx_accel_ops_class_init(ObjectClass *oc, void *data) +static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; ops->synchronize_state = whpx_cpu_synchronize_state; ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm; - ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume; } static const TypeInfo whpx_accel_ops_type = { diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h index e6cf155..54cfc25 100644 --- a/target/i386/whpx/whpx-accel-ops.h +++ b/target/i386/whpx/whpx-accel-ops.h @@ -21,7 +21,6 @@ void whpx_cpu_synchronize_state(CPUState *cpu); void whpx_cpu_synchronize_post_reset(CPUState *cpu); void whpx_cpu_synchronize_post_init(CPUState *cpu); void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); -void whpx_cpu_synchronize_pre_resume(bool step_pending); /* state subset only touched by the VCPU itself during runtime */ #define WHPX_SET_RUNTIME_STATE 1 diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index 41fb8c5..2567618 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -10,10 +10,11 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" +#include "system/address-spaces.h" +#include "system/ioport.h" #include "gdbstub/helpers.h" #include "qemu/accel.h" +#include "accel/accel-ops.h" #include "system/whpx.h" #include "system/cpus.h" #include "system/runstate.h" @@ -26,6 +27,8 @@ #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" #include "migration/blocker.h" +#include "host-cpu.h" +#include "accel/accel-cpu-target.h" #include <winerror.h> #include "whpx-internal.h" @@ -237,13 +240,12 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; - bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; }; -static bool whpx_allowed; +bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; static uint32_t max_vcpu_index; @@ -549,8 +551,6 @@ static void whpx_set_registers(CPUState *cpu, int level) error_report("WHPX: Failed to set virtual processor context, hr=%08lx", hr); } - - return; } static int whpx_get_tsc(CPUState *cpu) @@ -771,8 +771,6 @@ static void whpx_get_registers(CPUState *cpu) } x86_update_hflags(env); - - return; } static HRESULT CALLBACK whpx_emu_ioport_callback( @@ -790,8 +788,11 @@ static HRESULT CALLBACK whpx_emu_mmio_callback( void *ctx, WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) { - cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, - ma->Direction); + CPUState *cs = (CPUState *)ctx; + AddressSpace *as = cpu_addressspace(cs, MEMTXATTRS_UNSPECIFIED); + + address_space_rw(as, ma->GpaAddress, MEMTXATTRS_UNSPECIFIED, + ma->Data, ma->AccessSize, ma->Direction); return S_OK; } @@ -840,7 +841,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; return hr; } @@ -1395,7 +1396,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1438,9 +1439,9 @@ static int whpx_handle_halt(CPUState *cpu) int ret = 0; bql_lock(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (cpu_env(cpu)->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->exception_index = EXCP_HLT; cpu->halted = true; ret = 1; @@ -1471,16 +1472,16 @@ static void whpx_vcpu_pre_run(CPUState *cpu) /* Inject NMI */ if (!vcpu->interruption_pending && - cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); vcpu->interruptable = false; new_int.InterruptionType = WHvX64PendingNmi; new_int.InterruptionPending = 1; new_int.InterruptionVector = 2; } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); } } @@ -1488,13 +1489,13 @@ static void whpx_vcpu_pre_run(CPUState *cpu) * Force the VCPU out of its inner loop to process any INIT requests or * commit pending TPR access. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } } @@ -1503,8 +1504,8 @@ static void whpx_vcpu_pre_run(CPUState *cpu) if (!vcpu->interruption_pending && vcpu->interruptable && (env->eflags & IF_MASK)) { assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { new_int.InterruptionType = WHvX64PendingInterrupt; @@ -1521,8 +1522,8 @@ static void whpx_vcpu_pre_run(CPUState *cpu) reg_count += 1; } } else if (vcpu->ready_for_pic_interrupt && - (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { reg_names[reg_count] = WHvRegisterPendingEvent; @@ -1541,14 +1542,14 @@ static void whpx_vcpu_pre_run(CPUState *cpu) if (tpr != vcpu->tpr) { vcpu->tpr = tpr; reg_values[reg_count].Reg64 = tpr; - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); reg_names[reg_count] = WHvX64RegisterCr8; reg_count += 1; } /* Update the state of the interrupt delivery notification */ if (!vcpu->window_registered && - cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { reg_values[reg_count].DeliverabilityNotifications = (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { .InterruptNotification = 1 @@ -1570,8 +1571,6 @@ static void whpx_vcpu_pre_run(CPUState *cpu) " hr=%08lx", hr); } } - - return; } static void whpx_vcpu_post_run(CPUState *cpu) @@ -1595,8 +1594,6 @@ static void whpx_vcpu_post_run(CPUState *cpu) vcpu->interruptable = !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; - - return; } static void whpx_vcpu_process_async_events(CPUState *cpu) @@ -1605,37 +1602,35 @@ static void whpx_vcpu_process_async_events(CPUState *cpu) CPUX86State *env = &x86_cpu->env; AccelCPUState *vcpu = cpu->accel; - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { whpx_cpu_synchronize_state(cpu); do_cpu_init(x86_cpu); vcpu->interruptable = true; } - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); apic_poll_irq(x86_cpu->apic_state); } - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->halted = false; } - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SIPI)) { whpx_cpu_synchronize_state(cpu); do_cpu_sipi(x86_cpu); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_TPR); whpx_cpu_synchronize_state(cpu); apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, env->tpr_access_type); } - - return; } static int whpx_vcpu_run(CPUState *cpu) @@ -1714,15 +1709,16 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { whpx_vcpu_pre_run(cpu); - if (qatomic_read(&cpu->exit_request)) { + /* Corresponding store-release is in cpu_exit. */ + if (qatomic_load_acquire(&cpu->exit_request)) { whpx_vcpu_kick(cpu); } } @@ -2057,16 +2053,14 @@ static int whpx_vcpu_run(CPUState *cpu) whpx_last_vcpu_stopping(cpu); } - qatomic_set(&cpu->exit_request, false); - return ret < 0; } static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { whpx_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } @@ -2074,20 +2068,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } /* @@ -2096,7 +2090,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2116,7 +2110,7 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } -void whpx_cpu_synchronize_pre_resume(bool step_pending) +static void whpx_pre_resume_vm(AccelState *as, bool step_pending) { whpx_global.step_pending = step_pending; } @@ -2236,7 +2230,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - vcpu->dirty = true; + cpu->vcpu_dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); @@ -2280,7 +2274,6 @@ void whpx_destroy_vcpu(CPUState *cpu) whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); g_free(cpu->accel); - return; } void whpx_vcpu_kick(CPUState *cpu) @@ -2512,11 +2505,33 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, } } +static void whpx_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void whpx_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = whpx_cpu_instance_init; +} + +static const TypeInfo whpx_cpu_accel_type = { + .name = ACCEL_CPU_NAME("whpx"), + + .parent = TYPE_ACCEL_CPU, + .class_init = whpx_cpu_accel_class_init, + .abstract = true, +}; + /* * Partition support */ -static int whpx_accel_init(MachineState *ms) +static int whpx_accel_init(AccelState *as, MachineState *ms) { struct whpx_state *whpx; int ret; @@ -2700,20 +2715,16 @@ error: return ret; } -int whpx_enabled(void) -{ - return whpx_allowed; -} - bool whpx_apic_in_platform(void) { return whpx_global.apic_in_platform; } -static void whpx_accel_class_init(ObjectClass *oc, void *data) +static void whpx_accel_class_init(ObjectClass *oc, const void *data) { AccelClass *ac = ACCEL_CLASS(oc); ac->name = "WHPX"; ac->init_machine = whpx_accel_init; + ac->pre_resume_vm = whpx_pre_resume_vm; ac->allowed = &whpx_allowed; object_class_property_add(oc, "kernel-irqchip", "on|off|split", @@ -2742,6 +2753,7 @@ static const TypeInfo whpx_accel_type = { static void whpx_type_init(void) { type_register_static(&whpx_accel_type); + type_register_static(&whpx_cpu_accel_type); } bool init_whp_dispatch(void) diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c index 630a961..e1ef6d4 100644 --- a/target/i386/whpx/whpx-apic.c +++ b/target/i386/whpx/whpx-apic.c @@ -252,7 +252,7 @@ static void whpx_apic_realize(DeviceState *dev, Error **errp) msi_nonbroken = true; } -static void whpx_apic_class_init(ObjectClass *klass, void *data) +static void whpx_apic_class_init(ObjectClass *klass, const void *data) { APICCommonClass *k = APIC_COMMON_CLASS(klass); diff --git a/target/loongarch/README b/target/loongarch/README index 0b9dc0d..1ffd342 100644 --- a/target/loongarch/README +++ b/target/loongarch/README @@ -11,7 +11,7 @@ - System emulation - You can reference docs/system/loongarch/loongson3.rst to get the information about system emulation of LoongArch. + You can reference docs/system/loongarch/virt.rst to get the information about system emulation of LoongArch. - Linux-user emulation diff --git a/target/loongarch/cpu-csr.h b/target/loongarch/cpu-csr.h index 0834e91..9097fdd 100644 --- a/target/loongarch/cpu-csr.h +++ b/target/loongarch/cpu-csr.h @@ -34,11 +34,13 @@ FIELD(CSR_MISC, ALCL, 12, 4) FIELD(CSR_MISC, DWPL, 16, 3) #define LOONGARCH_CSR_ECFG 0x4 /* Exception config */ -FIELD(CSR_ECFG, LIE, 0, 13) +FIELD(CSR_ECFG, LIE, 0, 15) /* bit 15 is msg interrupt enabled */ +FIELD(CSR_ECFG, MSGINT, 14, 1) FIELD(CSR_ECFG, VS, 16, 3) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ -FIELD(CSR_ESTAT, IS, 0, 13) +FIELD(CSR_ESTAT, IS, 0, 15) /* bit 15 is msg interrupt enabled */ +FIELD(CSR_ESTAT, MSGINT, 14, 1) FIELD(CSR_ESTAT, ECODE, 16, 6) FIELD(CSR_ESTAT, ESUBCODE, 22, 9) @@ -106,6 +108,7 @@ FIELD(CSR_PWCH, DIR4_WIDTH, 18, 6) #define LOONGARCH_CSR_STLBPS 0x1e /* Stlb page size */ FIELD(CSR_STLBPS, PS, 0, 5) +FIELD(CSR_STLBPS, RESERVE, 5, 27) #define LOONGARCH_CSR_RVACFG 0x1f /* Reduced virtual address config */ FIELD(CSR_RVACFG, RBITS, 0, 4) @@ -186,6 +189,9 @@ FIELD(CSR_MERRCTL, ISMERR, 0, 1) #define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */ +#define LOONGARCH_CSR_MSGIS(N) (0xa0 + N) +#define LOONGARCH_CSR_MSGIR 0xa4 + /* Direct map windows CSRs*/ #define LOONGARCH_CSR_DMW(N) (0x180 + N) FIELD(CSR_DMW, PLV0, 0, 1) diff --git a/target/loongarch/cpu-mmu.h b/target/loongarch/cpu-mmu.h new file mode 100644 index 0000000..0068d22 --- /dev/null +++ b/target/loongarch/cpu-mmu.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch CPU parameters for QEMU. + * + * Copyright (c) 2025 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_CPU_MMU_H +#define LOONGARCH_CPU_MMU_H + +typedef enum TLBRet { + TLBRET_MATCH, + TLBRET_BADADDR, + TLBRET_NOMATCH, + TLBRET_INVALID, + TLBRET_DIRTY, + TLBRET_RI, + TLBRET_XI, + TLBRET_PE, +} TLBRet; + +typedef struct MMUContext { + vaddr addr; + uint64_t pte; + hwaddr physical; + int ps; /* page size shift */ + int prot; +} MMUContext; + +bool check_ps(CPULoongArchState *ent, uint8_t ps); +TLBRet loongarch_check_pte(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx); +TLBRet get_physical_address(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug); +void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, + uint64_t *dir_width, target_ulong level); +hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); + +#endif /* LOONGARCH_CPU_MMU_H */ diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h index 5243794..58cc45a 100644 --- a/target/loongarch/cpu-param.h +++ b/target/loongarch/cpu-param.h @@ -13,6 +13,6 @@ #define TARGET_PAGE_BITS 12 -#define TCG_GUEST_DEFAULT_MO (0) +#define TARGET_INSN_START_EXTRA_WORDS 0 #endif diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index ea1665e..86490e0 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -15,9 +15,9 @@ #include "system/kvm.h" #include "kvm/kvm_loongarch.h" #include "hw/qdev-properties.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "cpu.h" +#include "cpu-mmu.h" #include "internals.h" #include "fpu/softfloat-helpers.h" #include "csr.h" @@ -28,10 +28,7 @@ #ifdef CONFIG_KVM #include <linux/kvm.h> #endif -#ifdef CONFIG_TCG -#include "exec/cpu_ldst.h" -#include "tcg/tcg.h" -#endif +#include "tcg/tcg_loongarch.h" const char * const regnames[32] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -47,62 +44,6 @@ const char * const fregnames[32] = { "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", }; -struct TypeExcp { - int32_t exccode; - const char * const name; -}; - -static const struct TypeExcp excp_names[] = { - {EXCCODE_INT, "Interrupt"}, - {EXCCODE_PIL, "Page invalid exception for load"}, - {EXCCODE_PIS, "Page invalid exception for store"}, - {EXCCODE_PIF, "Page invalid exception for fetch"}, - {EXCCODE_PME, "Page modified exception"}, - {EXCCODE_PNR, "Page Not Readable exception"}, - {EXCCODE_PNX, "Page Not Executable exception"}, - {EXCCODE_PPI, "Page Privilege error"}, - {EXCCODE_ADEF, "Address error for instruction fetch"}, - {EXCCODE_ADEM, "Address error for Memory access"}, - {EXCCODE_SYS, "Syscall"}, - {EXCCODE_BRK, "Break"}, - {EXCCODE_INE, "Instruction Non-Existent"}, - {EXCCODE_IPE, "Instruction privilege error"}, - {EXCCODE_FPD, "Floating Point Disabled"}, - {EXCCODE_FPE, "Floating Point Exception"}, - {EXCCODE_DBP, "Debug breakpoint"}, - {EXCCODE_BCE, "Bound Check Exception"}, - {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, - {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, - {EXCP_HLT, "EXCP_HLT"}, -}; - -const char *loongarch_exception_name(int32_t exception) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(excp_names); i++) { - if (excp_names[i].exccode == exception) { - return excp_names[i].name; - } - } - return "Unknown"; -} - -void G_NORETURN do_raise_exception(CPULoongArchState *env, - uint32_t exception, - uintptr_t pc) -{ - CPUState *cs = env_cpu(env); - - qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", - __func__, - exception, - loongarch_exception_name(exception)); - cs->exception_index = exception; - - cpu_loop_exit_restore(cs, pc); -} - static void loongarch_cpu_set_pc(CPUState *cs, vaddr value) { set_pc(cpu_env(cs), value); @@ -138,18 +79,8 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) } } -static inline bool cpu_loongarch_hw_interrupts_enabled(CPULoongArchState *env) -{ - bool ret = 0; - - ret = (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE) && - !(FIELD_EX64(env->CSR_DBG, CSR_DBG, DST))); - - return ret; -} - /* Check if there is pending and not masked out interrupt */ -static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) +bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) { uint32_t pending; uint32_t status; @@ -161,244 +92,156 @@ static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) } #endif -#ifdef CONFIG_TCG #ifndef CONFIG_USER_ONLY -static void loongarch_cpu_do_interrupt(CPUState *cs) +bool loongarch_cpu_has_work(CPUState *cs) { - CPULoongArchState *env = cpu_env(cs); - bool update_badinstr = 1; - int cause = -1; - bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); - uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); - - if (cs->exception_index != EXCCODE_INT) { - qemu_log_mask(CPU_LOG_INT, - "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", - __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, - cs->exception_index, - loongarch_exception_name(cs->exception_index)); - } - - switch (cs->exception_index) { - case EXCCODE_DBP: - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DCL, 1); - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, ECODE, 0xC); - goto set_DERA; - set_DERA: - env->CSR_DERA = env->pc; - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DST, 1); - set_pc(env, env->CSR_EENTRY + 0x480); - break; - case EXCCODE_INT: - if (FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DEI, 1); - goto set_DERA; - } - QEMU_FALLTHROUGH; - case EXCCODE_PIF: - case EXCCODE_ADEF: - cause = cs->exception_index; - update_badinstr = 0; - break; - case EXCCODE_SYS: - case EXCCODE_BRK: - case EXCCODE_INE: - case EXCCODE_IPE: - case EXCCODE_FPD: - case EXCCODE_FPE: - case EXCCODE_SXD: - case EXCCODE_ASXD: - env->CSR_BADV = env->pc; - QEMU_FALLTHROUGH; - case EXCCODE_BCE: - case EXCCODE_ADEM: - case EXCCODE_PIL: - case EXCCODE_PIS: - case EXCCODE_PME: - case EXCCODE_PNR: - case EXCCODE_PNX: - case EXCCODE_PPI: - cause = cs->exception_index; - break; - default: - qemu_log("Error: exception(%d) has not been supported\n", - cs->exception_index); - abort(); - } - - if (update_badinstr) { - env->CSR_BADI = cpu_ldl_code(env, env->pc); - } + bool has_work = false; - /* Save PLV and IE */ - if (tlbfill) { - env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV, - FIELD_EX64(env->CSR_CRMD, - CSR_CRMD, PLV)); - env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); - /* set the DA mode */ - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 1); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 0); - env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, - PC, (env->pc >> 2)); - } else { - env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE, - EXCODE_MCODE(cause)); - env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ESUBCODE, - EXCODE_SUBCODE(cause)); - env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PPLV, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV)); - env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PIE, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); - env->CSR_ERA = env->pc; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && + cpu_loongarch_hw_interrupts_pending(cpu_env(cs))) { + has_work = true; } - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, 0); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, 0); + return has_work; +} +#endif /* !CONFIG_USER_ONLY */ - if (vec_size) { - vec_size = (1 << vec_size) * 4; - } +static void loongarch_la464_init_csr(Object *obj) +{ +#ifndef CONFIG_USER_ONLY + static bool initialized; + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + CPULoongArchState *env = &cpu->env; + int i, num; - if (cs->exception_index == EXCCODE_INT) { - /* Interrupt */ - uint32_t vector = 0; - uint32_t pending = FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS); - pending &= FIELD_EX64(env->CSR_ECFG, CSR_ECFG, LIE); - - /* Find the highest-priority interrupt. */ - vector = 31 - clz32(pending); - set_pc(env, env->CSR_EENTRY + \ - (EXCCODE_EXTERNAL_INT + vector) * vec_size); - qemu_log_mask(CPU_LOG_INT, - "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " cause %d\n" " A " TARGET_FMT_lx " D " - TARGET_FMT_lx " vector = %d ExC " TARGET_FMT_lx "ExS" - TARGET_FMT_lx "\n", - __func__, env->pc, env->CSR_ERA, - cause, env->CSR_BADV, env->CSR_DERA, vector, - env->CSR_ECFG, env->CSR_ESTAT); - } else { - if (tlbfill) { - set_pc(env, env->CSR_TLBRENTRY); - } else { - set_pc(env, env->CSR_EENTRY + EXCODE_MCODE(cause) * vec_size); + if (!initialized) { + initialized = true; + num = FIELD_EX64(env->CSR_PRCFG1, CSR_PRCFG1, SAVE_NUM); + for (i = num; i < 16; i++) { + set_csr_flag(LOONGARCH_CSR_SAVE(i), CSRFL_UNUSED); } - qemu_log_mask(CPU_LOG_INT, - "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " cause %d%s\n, ESTAT " TARGET_FMT_lx - " EXCFG " TARGET_FMT_lx " BADVA " TARGET_FMT_lx - "BADI " TARGET_FMT_lx " SYS_NUM " TARGET_FMT_lu - " cpu %d asid " TARGET_FMT_lx "\n", __func__, env->pc, - tlbfill ? env->CSR_TLBRERA : env->CSR_ERA, - cause, tlbfill ? "(refill)" : "", env->CSR_ESTAT, - env->CSR_ECFG, - tlbfill ? env->CSR_TLBRBADV : env->CSR_BADV, - env->CSR_BADI, env->gpr[11], cs->cpu_index, - env->CSR_ASID); + set_csr_flag(LOONGARCH_CSR_IMPCTL1, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_IMPCTL2, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRCTL, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRINFO1, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRINFO2, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRENTRY, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRERA, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRSAVE, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_CTAG, CSRFL_UNUSED); } - cs->exception_index = -1; +#endif } -static void loongarch_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, - vaddr addr, unsigned size, - MMUAccessType access_type, - int mmu_idx, MemTxAttrs attrs, - MemTxResult response, - uintptr_t retaddr) +static bool loongarch_get_lsx(Object *obj, Error **errp) { - CPULoongArchState *env = cpu_env(cs); - - if (access_type == MMU_INST_FETCH) { - do_raise_exception(env, EXCCODE_ADEF, retaddr); - } else { - do_raise_exception(env, EXCCODE_ADEM, retaddr); - } + return LOONGARCH_CPU(obj)->lsx != ON_OFF_AUTO_OFF; } -static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +static void loongarch_set_lsx(Object *obj, bool value, Error **errp) { - if (interrupt_request & CPU_INTERRUPT_HARD) { - CPULoongArchState *env = cpu_env(cs); - - if (cpu_loongarch_hw_interrupts_enabled(env) && - cpu_loongarch_hw_interrupts_pending(env)) { - /* Raise it */ - cs->exception_index = EXCCODE_INT; - loongarch_cpu_do_interrupt(cs); - return true; + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + uint32_t val; + + cpu->lsx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + if (cpu->lsx == ON_OFF_AUTO_OFF) { + cpu->lasx = ON_OFF_AUTO_OFF; + if (cpu->lasx == ON_OFF_AUTO_ON) { + error_setg(errp, "Failed to disable LSX since LASX is enabled"); + return; } } - return false; -} -#endif -static void loongarch_cpu_synchronize_from_tb(CPUState *cs, - const TranslationBlock *tb) -{ - tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); - set_pc(cpu_env(cs), tb->pc); + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; + } + + /* LSX feature detection in TCG mode */ + val = cpu->env.cpucfg[2]; + if (cpu->lsx == ON_OFF_AUTO_ON) { + if (FIELD_EX32(val, CPUCFG2, LSX) == 0) { + error_setg(errp, "Failed to enable LSX in TCG mode"); + return; + } + } else { + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, 0); + val = cpu->env.cpucfg[2]; + } + + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LSX, value); } -static void loongarch_restore_state_to_opc(CPUState *cs, - const TranslationBlock *tb, - const uint64_t *data) +static bool loongarch_get_lasx(Object *obj, Error **errp) { - set_pc(cpu_env(cs), data[0]); + return LOONGARCH_CPU(obj)->lasx != ON_OFF_AUTO_OFF; } -#endif /* CONFIG_TCG */ -#ifndef CONFIG_USER_ONLY -static bool loongarch_cpu_has_work(CPUState *cs) +static void loongarch_set_lasx(Object *obj, bool value, Error **errp) { - bool has_work = false; + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + uint32_t val; - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && - cpu_loongarch_hw_interrupts_pending(cpu_env(cs))) { - has_work = true; + cpu->lasx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + if ((cpu->lsx == ON_OFF_AUTO_OFF) && (cpu->lasx == ON_OFF_AUTO_ON)) { + error_setg(errp, "Failed to enable LASX since lSX is disabled"); + return; } - return has_work; + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; + } + + /* LASX feature detection in TCG mode */ + val = cpu->env.cpucfg[2]; + if (cpu->lasx == ON_OFF_AUTO_ON) { + if (FIELD_EX32(val, CPUCFG2, LASX) == 0) { + error_setg(errp, "Failed to enable LASX in TCG mode"); + return; + } + } + + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, value); } -#endif /* !CONFIG_USER_ONLY */ -static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch) +static bool loongarch_get_msgint(Object *obj, Error **errp) { - CPULoongArchState *env = cpu_env(cs); + return LOONGARCH_CPU(obj)->msgint != ON_OFF_AUTO_OFF; +} + +static void loongarch_set_msgint(Object *obj, bool value, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + + cpu->msgint = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) { - return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; } - return MMU_DA_IDX; + + cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, value); } -static void loongarch_la464_init_csr(Object *obj) +static void loongarch_cpu_post_init(Object *obj) { -#ifndef CONFIG_USER_ONLY - static bool initialized; LoongArchCPU *cpu = LOONGARCH_CPU(obj); - CPULoongArchState *env = &cpu->env; - int i, num; - if (!initialized) { - initialized = true; - num = FIELD_EX64(env->CSR_PRCFG1, CSR_PRCFG1, SAVE_NUM); - for (i = num; i < 16; i++) { - set_csr_flag(LOONGARCH_CSR_SAVE(i), CSRFL_UNUSED); - } - set_csr_flag(LOONGARCH_CSR_IMPCTL1, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_IMPCTL2, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRCTL, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRINFO1, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRINFO2, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRENTRY, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRERA, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRSAVE, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_CTAG, CSRFL_UNUSED); + cpu->lbt = ON_OFF_AUTO_OFF; + cpu->pmu = ON_OFF_AUTO_OFF; + cpu->lsx = ON_OFF_AUTO_AUTO; + cpu->lasx = ON_OFF_AUTO_AUTO; + object_property_add_bool(obj, "lsx", loongarch_get_lsx, + loongarch_set_lsx); + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); + object_property_add_bool(obj, "msgint", loongarch_get_msgint, + loongarch_set_msgint); + /* lbt is enabled only in kvm mode, not supported in tcg mode */ + if (kvm_enabled()) { + kvm_loongarch_cpu_post_init(cpu); } -#endif } static void loongarch_la464_initfn(Object *obj) @@ -431,7 +274,7 @@ static void loongarch_la464_initfn(Object *obj) data = FIELD_DP32(data, CPUCFG1, EP, 1); data = FIELD_DP32(data, CPUCFG1, RPLV, 1); data = FIELD_DP32(data, CPUCFG1, HP, 1); - data = FIELD_DP32(data, CPUCFG1, IOCSR_BRD, 1); + data = FIELD_DP32(data, CPUCFG1, CRC, 1); env->cpucfg[1] = data; data = 0; @@ -502,6 +345,7 @@ static void loongarch_la464_initfn(Object *obj) env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_WAYS, 7); env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_SETS, 8); + cpu->msgint = ON_OFF_AUTO_OFF; loongarch_la464_init_csr(obj); loongarch_cpu_post_init(obj); } @@ -530,14 +374,21 @@ static void loongarch_la132_initfn(Object *obj) data = FIELD_DP32(data, CPUCFG1, EP, 0); data = FIELD_DP32(data, CPUCFG1, RPLV, 0); data = FIELD_DP32(data, CPUCFG1, HP, 1); - data = FIELD_DP32(data, CPUCFG1, IOCSR_BRD, 1); + data = FIELD_DP32(data, CPUCFG1, CRC, 1); env->cpucfg[1] = data; + cpu->msgint = ON_OFF_AUTO_OFF; } static void loongarch_max_initfn(Object *obj) { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); /* '-cpu max' for TCG: we use cpu la464. */ loongarch_la464_initfn(obj); + + if (tcg_enabled()) { + cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, 1); + cpu->msgint = ON_OFF_AUTO_AUTO; + } } static void loongarch_cpu_reset_hold(Object *obj, ResetType type) @@ -662,96 +513,6 @@ static void loongarch_cpu_unrealizefn(DeviceState *dev) lacc->parent_unrealize(dev); } -static bool loongarch_get_lsx(Object *obj, Error **errp) -{ - return LOONGARCH_CPU(obj)->lsx != ON_OFF_AUTO_OFF; -} - -static void loongarch_set_lsx(Object *obj, bool value, Error **errp) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - uint32_t val; - - cpu->lsx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if (cpu->lsx == ON_OFF_AUTO_OFF) { - cpu->lasx = ON_OFF_AUTO_OFF; - if (cpu->lasx == ON_OFF_AUTO_ON) { - error_setg(errp, "Failed to disable LSX since LASX is enabled"); - return; - } - } - - if (kvm_enabled()) { - /* kvm feature detection in function kvm_arch_init_vcpu */ - return; - } - - /* LSX feature detection in TCG mode */ - val = cpu->env.cpucfg[2]; - if (cpu->lsx == ON_OFF_AUTO_ON) { - if (FIELD_EX32(val, CPUCFG2, LSX) == 0) { - error_setg(errp, "Failed to enable LSX in TCG mode"); - return; - } - } else { - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, 0); - val = cpu->env.cpucfg[2]; - } - - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LSX, value); -} - -static bool loongarch_get_lasx(Object *obj, Error **errp) -{ - return LOONGARCH_CPU(obj)->lasx != ON_OFF_AUTO_OFF; -} - -static void loongarch_set_lasx(Object *obj, bool value, Error **errp) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - uint32_t val; - - cpu->lasx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if ((cpu->lsx == ON_OFF_AUTO_OFF) && (cpu->lasx == ON_OFF_AUTO_ON)) { - error_setg(errp, "Failed to enable LASX since lSX is disabled"); - return; - } - - if (kvm_enabled()) { - /* kvm feature detection in function kvm_arch_init_vcpu */ - return; - } - - /* LASX feature detection in TCG mode */ - val = cpu->env.cpucfg[2]; - if (cpu->lasx == ON_OFF_AUTO_ON) { - if (FIELD_EX32(val, CPUCFG2, LASX) == 0) { - error_setg(errp, "Failed to enable LASX in TCG mode"); - return; - } - } - - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, value); -} - -void loongarch_cpu_post_init(Object *obj) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - - cpu->lbt = ON_OFF_AUTO_OFF; - cpu->pmu = ON_OFF_AUTO_OFF; - cpu->lsx = ON_OFF_AUTO_AUTO; - cpu->lasx = ON_OFF_AUTO_AUTO; - object_property_add_bool(obj, "lsx", loongarch_get_lsx, - loongarch_set_lsx); - object_property_add_bool(obj, "lasx", loongarch_get_lasx, - loongarch_set_lasx); - /* lbt is enabled only in kvm mode, not supported in tcg mode */ - if (kvm_enabled()) { - kvm_loongarch_cpu_post_init(cpu); - } -} - static void loongarch_cpu_init(Object *obj) { #ifndef CONFIG_USER_ONLY @@ -860,25 +621,6 @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -#ifdef CONFIG_TCG -#include "accel/tcg/cpu-ops.h" - -static const TCGCPUOps loongarch_tcg_ops = { - .initialize = loongarch_translate_init, - .translate_code = loongarch_translate_code, - .synchronize_from_tb = loongarch_cpu_synchronize_from_tb, - .restore_state_to_opc = loongarch_restore_state_to_opc, - -#ifndef CONFIG_USER_ONLY - .tlb_fill = loongarch_cpu_tlb_fill, - .cpu_exec_interrupt = loongarch_cpu_exec_interrupt, - .cpu_exec_halt = loongarch_cpu_has_work, - .do_interrupt = loongarch_cpu_do_interrupt, - .do_transaction_failed = loongarch_cpu_do_transaction_failed, -#endif -}; -#endif /* CONFIG_TCG */ - #ifndef CONFIG_USER_ONLY #include "hw/core/sysemu-cpu-ops.h" @@ -903,7 +645,7 @@ static const Property loongarch_cpu_properties[] = { DEFINE_PROP_INT32("node-id", LoongArchCPU, node_id, CPU_UNSET_NUMA_NODE_ID), }; -static void loongarch_cpu_class_init(ObjectClass *c, void *data) +static void loongarch_cpu_class_init(ObjectClass *c, const void *data) { LoongArchCPUClass *lacc = LOONGARCH_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); @@ -919,7 +661,6 @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data) &lacc->parent_phases); cc->class_by_name = loongarch_cpu_class_by_name; - cc->mmu_index = loongarch_cpu_mmu_index; cc->dump_state = loongarch_cpu_dump_state; cc->set_pc = loongarch_cpu_set_pc; cc->get_pc = loongarch_cpu_get_pc; @@ -944,7 +685,7 @@ static const gchar *loongarch32_gdb_arch_name(CPUState *cs) return "loongarch32"; } -static void loongarch32_cpu_class_init(ObjectClass *c, void *data) +static void loongarch32_cpu_class_init(ObjectClass *c, const void *data) { CPUClass *cc = CPU_CLASS(c); @@ -957,7 +698,7 @@ static const gchar *loongarch64_gdb_arch_name(CPUState *cs) return "loongarch64"; } -static void loongarch64_cpu_class_init(ObjectClass *c, void *data) +static void loongarch64_cpu_class_init(ObjectClass *c, const void *data) { CPUClass *cc = CPU_CLASS(c); diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index 254e4fb..b8e3b46 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -9,37 +9,18 @@ #define LOONGARCH_CPU_H #include "qemu/int128.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "fpu/softfloat-types.h" #include "hw/registerfields.h" #include "qemu/timer.h" #ifndef CONFIG_USER_ONLY -#include "exec/memory.h" +#include "system/memory.h" #endif #include "cpu-csr.h" #include "cpu-qom.h" -#define IOCSRF_TEMP 0 -#define IOCSRF_NODECNT 1 -#define IOCSRF_MSI 2 -#define IOCSRF_EXTIOI 3 -#define IOCSRF_CSRIPI 4 -#define IOCSRF_FREQCSR 5 -#define IOCSRF_FREQSCALE 6 -#define IOCSRF_DVFSV1 7 -#define IOCSRF_GMOD 9 -#define IOCSRF_VM 11 - -#define VERSION_REG 0x0 -#define FEATURE_REG 0x8 -#define VENDOR_REG 0x10 -#define CPUNAME_REG 0x20 -#define MISC_FUNC_REG 0x420 -#define IOCSRM_EXTIOI_EN 48 -#define IOCSRM_EXTIOI_INT_ENCODE 49 - -#define IOCSR_MEM_SIZE 0x428 - #define FCSR0_M1 0x1f /* FCSR1 mask, Enables */ #define FCSR0_M2 0x1f1f0000 /* FCSR2 mask, Cause and Flags */ #define FCSR0_M3 0x300 /* FCSR3 mask, Round Mode */ @@ -129,7 +110,7 @@ FIELD(CPUCFG1, RI, 21, 1) FIELD(CPUCFG1, EP, 22, 1) FIELD(CPUCFG1, RPLV, 23, 1) FIELD(CPUCFG1, HP, 24, 1) -FIELD(CPUCFG1, IOCSR_BRD, 25, 1) +FIELD(CPUCFG1, CRC, 25, 1) FIELD(CPUCFG1, MSG_INT, 26, 1) /* cpucfg[1].arch */ @@ -236,9 +217,10 @@ FIELD(CSR_CRMD, WE, 9, 1) extern const char * const regnames[32]; extern const char * const fregnames[32]; -#define N_IRQS 13 +#define N_IRQS 15 #define IRQ_TIMER 11 #define IRQ_IPI 12 +#define INT_DMSI 14 #define LOONGARCH_STLB 2048 /* 2048 STLB */ #define LOONGARCH_MTLB 64 /* 64 MTLB */ @@ -252,6 +234,13 @@ FIELD(TLB_MISC, ASID, 1, 10) FIELD(TLB_MISC, VPPN, 13, 35) FIELD(TLB_MISC, PS, 48, 6) +/*Msg interrupt registers */ +#define N_MSGIS 4 +FIELD(CSR_MSGIS, IS, 0, 63) +FIELD(CSR_MSGIR, INTNUM, 0, 8) +FIELD(CSR_MSGIR, ACTIVE, 31, 1) +FIELD(CSR_MSGIE, PT, 0, 8) + #define LSX_LEN (128) #define LASX_LEN (256) @@ -369,6 +358,10 @@ typedef struct CPUArchState { uint64_t CSR_DBG; uint64_t CSR_DERA; uint64_t CSR_DSAVE; + /* Msg interrupt registers */ + uint64_t CSR_MSGIS[N_MSGIS]; + uint64_t CSR_MSGIR; + uint64_t CSR_MSGIE; struct { uint64_t guest_addr; } stealtime; @@ -385,11 +378,7 @@ typedef struct CPUArchState { #endif AddressSpace *address_space_iocsr; - bool load_elf; - uint64_t elf_address; uint32_t mp_state; - - struct loongarch_boot_info *boot_info; #endif } CPULoongArchState; @@ -415,6 +404,7 @@ struct ArchCPU { OnOffAuto pmu; OnOffAuto lsx; OnOffAuto lasx; + OnOffAuto msgint; OnOffAuto kvm_pv_ipi; OnOffAuto kvm_steal_time; int32_t socket_id; /* socket-id of this CPU */ @@ -490,30 +480,6 @@ static inline void set_pc(CPULoongArchState *env, uint64_t value) #define HW_FLAGS_VA32 0x20 #define HW_FLAGS_EUEN_ASXE 0x40 -static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *cs_base = 0; - *flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); - *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; - *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; - *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE; - *flags |= is_va32(env) * HW_FLAGS_VA32; -} - -#include "exec/cpu-all.h" - #define CPU_RESOLVING_TYPE TYPE_LOONGARCH_CPU -void loongarch_cpu_post_init(Object *obj); - -#ifdef CONFIG_KVM -void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu); -#else -static inline void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu) -{ -} -#endif - #endif /* LOONGARCH_CPU_H */ diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c index 930466c..4a9db3e 100644 --- a/target/loongarch/cpu_helper.c +++ b/target/loongarch/cpu_helper.c @@ -7,28 +7,52 @@ */ #include "qemu/osdep.h" +#include "system/tcg.h" #include "cpu.h" +#include "accel/tcg/cpu-mmu-index.h" +#include "exec/target_page.h" #include "internals.h" #include "cpu-csr.h" +#include "cpu-mmu.h" +#include "tcg/tcg_loongarch.h" -#ifdef CONFIG_TCG -static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - int access_type, int index, int mmu_idx) +void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, + uint64_t *dir_width, target_ulong level) +{ + switch (level) { + case 1: + *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); + *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); + break; + case 2: + *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); + *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); + break; + case 3: + *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); + *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); + break; + case 4: + *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); + *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); + break; + default: + /* level may be zero for ldpte */ + *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); + *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); + break; + } +} + +TLBRet loongarch_check_pte(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx) { - LoongArchTLB *tlb = &env->tlb[index]; uint64_t plv = mmu_idx; uint64_t tlb_entry, tlb_ppn; - uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; + uint8_t tlb_ps, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - n = (address >> tlb_ps) & 0x1;/* Odd or even */ - - tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; + tlb_entry = context->pte; + tlb_ps = context->ps; tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); @@ -45,7 +69,7 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, } /* Remove sw bit between bit12 -- bit PS*/ - tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); + tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) - 1)); /* Check access rights */ if (!tlb_v) { @@ -69,87 +93,30 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, return TLBRET_DIRTY; } - *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | - (address & MAKE_64BIT_MASK(0, tlb_ps)); - *prot = PAGE_READ; + context->physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | + (context->addr & MAKE_64BIT_MASK(0, tlb_ps)); + context->prot = PAGE_READ; if (tlb_d) { - *prot |= PAGE_WRITE; + context->prot |= PAGE_WRITE; } if (!tlb_nx) { - *prot |= PAGE_EXEC; + context->prot |= PAGE_EXEC; } return TLBRET_MATCH; } -/* - * One tlb entry holds an adjacent odd/even pair, the vpn is the - * content of the virtual page number divided by 2. So the - * compare vpn is bit[47:15] for 16KiB page. while the vppn - * field in tlb entry contains bit[47:13], so need adjust. - * virt_vpn = vaddr[47:13] - */ -bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, - int *index) -{ - LoongArchTLB *tlb; - uint16_t csr_asid, tlb_asid, stlb_idx; - uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; - int i, compare_shift; - uint64_t vpn, tlb_vppn; - - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); - stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ - compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - - /* Search STLB */ - for (i = 0; i < 8; ++i) { - tlb = &env->tlb[i * 256 + stlb_idx]; - tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - if (tlb_e) { - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - - if ((tlb_g == 1 || tlb_asid == csr_asid) && - (vpn == (tlb_vppn >> compare_shift))) { - *index = i * 256 + stlb_idx; - return true; - } - } - } - - /* Search MTLB */ - for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { - tlb = &env->tlb[i]; - tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - if (tlb_e) { - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - if ((tlb_g == 1 || tlb_asid == csr_asid) && - (vpn == (tlb_vppn >> compare_shift))) { - *index = i; - return true; - } - } - } - return false; -} - -static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address) +static TLBRet loongarch_page_table_walker(CPULoongArchState *env, + MMUContext *context, + int access_type, int mmu_idx) { CPUState *cs = env_cpu(env); target_ulong index, phys; uint64_t dir_base, dir_width; uint64_t base; int level; + vaddr address; + address = context->addr; if ((address >> 63) & 0x1) { base = env->CSR_PGDH; } else { @@ -191,65 +158,38 @@ static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical, base = ldq_phys(cs->as, phys); } - /* TODO: check plv and other bits? */ - - /* base is pte, in normal pte format */ - if (!FIELD_EX64(base, TLBENTRY, V)) { - return TLBRET_NOMATCH; - } - - if (!FIELD_EX64(base, TLBENTRY, D)) { - *prot = PAGE_READ; - } else { - *prot = PAGE_READ | PAGE_WRITE; - } - - /* get TARGET_PAGE_SIZE aligned physical address */ - base += (address & TARGET_PHYS_MASK) & ((1 << dir_base) - 1); - /* mask RPLV, NX, NR bits */ - base = FIELD_DP64(base, TLBENTRY_64, RPLV, 0); - base = FIELD_DP64(base, TLBENTRY_64, NX, 0); - base = FIELD_DP64(base, TLBENTRY_64, NR, 0); - /* mask other attribute bits */ - *physical = base & TARGET_PAGE_MASK; - - return 0; + context->ps = dir_base; + context->pte = base; + return loongarch_check_pte(env, context, access_type, mmu_idx); } -static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, - int is_debug) +static TLBRet loongarch_map_address(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug) { - int index, match; + TLBRet ret; + + if (tcg_enabled()) { + ret = loongarch_get_addr_from_tlb(env, context, access_type, mmu_idx); + if (ret != TLBRET_NOMATCH) { + return ret; + } + } - match = loongarch_tlb_search(env, address, &index); - if (match) { - return loongarch_map_tlb_entry(env, physical, prot, - address, access_type, index, mmu_idx); - } else if (is_debug) { + if (is_debug) { /* * For debugger memory access, we want to do the map when there is a * legal mapping, even if the mapping is not yet in TLB. return 0 if * there is a valid map, else none zero. */ - return loongarch_page_table_walker(env, physical, prot, address); + return loongarch_page_table_walker(env, context, access_type, mmu_idx); } return TLBRET_NOMATCH; } -#else -static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, - int is_debug) -{ - return TLBRET_NOMATCH; -} -#endif -static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, - target_ulong dmw) +static hwaddr dmw_va2pa(CPULoongArchState *env, vaddr va, target_ulong dmw) { if (is_la64(env)) { return va & TARGET_VIRT_MASK; @@ -260,9 +200,9 @@ static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, } } -int get_physical_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, int is_debug) +TLBRet get_physical_address(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug) { int user_mode = mmu_idx == MMU_USER_IDX; int kernel_mode = mmu_idx == MMU_KERNEL_IDX; @@ -270,11 +210,13 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical, int64_t addr_high; uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); + vaddr address; /* Check PG and DA */ + address = context->addr; if (da & !pg) { - *physical = address & TARGET_PHYS_MASK; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + context->physical = address & TARGET_PHYS_MASK; + context->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return TLBRET_MATCH; } @@ -292,32 +234,31 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical, base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); } if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { - *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + context->physical = dmw_va2pa(env, address, env->CSR_DMW[i]); + context->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return TLBRET_MATCH; } } /* Check valid extension */ - addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); - if (!(addr_high == 0 || addr_high == -1)) { + addr_high = (int64_t)address >> (TARGET_VIRT_ADDR_SPACE_BITS - 1); + if (!(addr_high == 0 || addr_high == -1ULL)) { return TLBRET_BADADDR; } /* Mapped address */ - return loongarch_map_address(env, physical, prot, address, - access_type, mmu_idx, is_debug); + return loongarch_map_address(env, context, access_type, mmu_idx, is_debug); } hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { CPULoongArchState *env = cpu_env(cs); - hwaddr phys_addr; - int prot; + MMUContext context; - if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, - cpu_mmu_index(cs, false), 1) != 0) { + context.addr = addr; + if (get_physical_address(env, &context, MMU_DATA_LOAD, + cpu_mmu_index(cs, false), 1) != TLBRET_MATCH) { return -1; } - return phys_addr; + return context.physical; } diff --git a/target/loongarch/csr.c b/target/loongarch/csr.c index 7ea0a30..f973780 100644 --- a/target/loongarch/csr.c +++ b/target/loongarch/csr.c @@ -97,6 +97,11 @@ static CSRInfo csr_info[] = { CSR_OFF(DBG), CSR_OFF(DERA), CSR_OFF(DSAVE), + CSR_OFF_ARRAY(MSGIS, 0), + CSR_OFF_ARRAY(MSGIS, 1), + CSR_OFF_ARRAY(MSGIS, 2), + CSR_OFF_ARRAY(MSGIS, 3), + CSR_OFF(MSGIR), }; CSRInfo *get_csr(unsigned int csr_num) diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index 1d5cb01..99981ab 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -1,722 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Copyright (c) 2021 Loongson Technology Corporation Limited + * Copyright (c) 2025 Loongson Technology Corporation Limited */ -DEF_HELPER_2(raise_exception, noreturn, env, i32) - -DEF_HELPER_FLAGS_1(bitrev_w, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(bitrev_d, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl) - -DEF_HELPER_FLAGS_3(asrtle_d, TCG_CALL_NO_WG, void, env, tl, tl) -DEF_HELPER_FLAGS_3(asrtgt_d, TCG_CALL_NO_WG, void, env, tl, tl) - -DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl) -DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl) -DEF_HELPER_FLAGS_2(cpucfg, TCG_CALL_NO_RWG_SE, tl, env, tl) - -/* Floating-point helper */ -DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fsub_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fsub_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmul_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmul_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fdiv_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fdiv_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmax_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmax_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmin_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmin_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmaxa_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmaxa_d, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmina_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fmina_d, TCG_CALL_NO_WG, i64, env, i64, i64) - -DEF_HELPER_FLAGS_5(fmuladd_s, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i32) -DEF_HELPER_FLAGS_5(fmuladd_d, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i32) - -DEF_HELPER_FLAGS_3(fscaleb_s, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(fscaleb_d, TCG_CALL_NO_WG, i64, env, i64, i64) - -DEF_HELPER_FLAGS_2(flogb_s, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(flogb_d, TCG_CALL_NO_WG, i64, env, i64) - -DEF_HELPER_FLAGS_2(fsqrt_s, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(fsqrt_d, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(frsqrt_s, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(frsqrt_d, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(frecip_s, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(frecip_d, TCG_CALL_NO_WG, i64, env, i64) - -DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, i64, env, i64) -DEF_HELPER_FLAGS_2(fclass_d, TCG_CALL_NO_RWG_SE, i64, env, i64) - -/* fcmp.cXXX.s */ -DEF_HELPER_4(fcmp_c_s, i64, env, i64, i64, i32) -/* fcmp.sXXX.s */ -DEF_HELPER_4(fcmp_s_s, i64, env, i64, i64, i32) -/* fcmp.cXXX.d */ -DEF_HELPER_4(fcmp_c_d, i64, env, i64, i64, i32) -/* fcmp.sXXX.d */ -DEF_HELPER_4(fcmp_s_d, i64, env, i64, i64, i32) - -DEF_HELPER_2(fcvt_d_s, i64, env, i64) -DEF_HELPER_2(fcvt_s_d, i64, env, i64) -DEF_HELPER_2(ffint_d_w, i64, env, i64) -DEF_HELPER_2(ffint_d_l, i64, env, i64) -DEF_HELPER_2(ffint_s_w, i64, env, i64) -DEF_HELPER_2(ffint_s_l, i64, env, i64) -DEF_HELPER_2(ftintrm_l_s, i64, env, i64) -DEF_HELPER_2(ftintrm_l_d, i64, env, i64) -DEF_HELPER_2(ftintrm_w_s, i64, env, i64) -DEF_HELPER_2(ftintrm_w_d, i64, env, i64) -DEF_HELPER_2(ftintrp_l_s, i64, env, i64) -DEF_HELPER_2(ftintrp_l_d, i64, env, i64) -DEF_HELPER_2(ftintrp_w_s, i64, env, i64) -DEF_HELPER_2(ftintrp_w_d, i64, env, i64) -DEF_HELPER_2(ftintrz_l_s, i64, env, i64) -DEF_HELPER_2(ftintrz_l_d, i64, env, i64) -DEF_HELPER_2(ftintrz_w_s, i64, env, i64) -DEF_HELPER_2(ftintrz_w_d, i64, env, i64) -DEF_HELPER_2(ftintrne_l_s, i64, env, i64) -DEF_HELPER_2(ftintrne_l_d, i64, env, i64) -DEF_HELPER_2(ftintrne_w_s, i64, env, i64) -DEF_HELPER_2(ftintrne_w_d, i64, env, i64) -DEF_HELPER_2(ftint_l_s, i64, env, i64) -DEF_HELPER_2(ftint_l_d, i64, env, i64) -DEF_HELPER_2(ftint_w_s, i64, env, i64) -DEF_HELPER_2(ftint_w_d, i64, env, i64) -DEF_HELPER_2(frint_s, i64, env, i64) -DEF_HELPER_2(frint_d, i64, env, i64) - -DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env) - -DEF_HELPER_1(rdtime_d, i64, env) - -#ifndef CONFIG_USER_ONLY -/* CSRs helper */ -DEF_HELPER_1(csrrd_pgd, i64, env) -DEF_HELPER_1(csrrd_cpuid, i64, env) -DEF_HELPER_1(csrrd_tval, i64, env) -DEF_HELPER_2(csrwr_stlbps, i64, env, tl) -DEF_HELPER_2(csrwr_estat, i64, env, tl) -DEF_HELPER_2(csrwr_asid, i64, env, tl) -DEF_HELPER_2(csrwr_tcfg, i64, env, tl) -DEF_HELPER_2(csrwr_ticlr, i64, env, tl) -DEF_HELPER_2(csrwr_pwcl, i64, env, tl) -DEF_HELPER_2(iocsrrd_b, i64, env, tl) -DEF_HELPER_2(iocsrrd_h, i64, env, tl) -DEF_HELPER_2(iocsrrd_w, i64, env, tl) -DEF_HELPER_2(iocsrrd_d, i64, env, tl) -DEF_HELPER_3(iocsrwr_b, void, env, tl, tl) -DEF_HELPER_3(iocsrwr_h, void, env, tl, tl) -DEF_HELPER_3(iocsrwr_w, void, env, tl, tl) -DEF_HELPER_3(iocsrwr_d, void, env, tl, tl) - -/* TLB helper */ -DEF_HELPER_1(tlbwr, void, env) -DEF_HELPER_1(tlbfill, void, env) -DEF_HELPER_1(tlbsrch, void, env) -DEF_HELPER_1(tlbrd, void, env) -DEF_HELPER_1(tlbclr, void, env) -DEF_HELPER_1(tlbflush, void, env) -DEF_HELPER_1(invtlb_all, void, env) -DEF_HELPER_2(invtlb_all_g, void, env, i32) -DEF_HELPER_2(invtlb_all_asid, void, env, tl) -DEF_HELPER_3(invtlb_page_asid, void, env, tl, tl) -DEF_HELPER_3(invtlb_page_asid_or_g, void, env, tl, tl) - -DEF_HELPER_4(lddir, tl, env, tl, tl, i32) -DEF_HELPER_4(ldpte, void, env, tl, tl, i32) -DEF_HELPER_1(ertn, void, env) -DEF_HELPER_1(idle, void, env) -#endif - -/* LoongArch LSX */ -DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsubwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsubwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsubwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwev_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vabsd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmini_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vmaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vmuh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmuh_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmulwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmulwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmulwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmadd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmsub_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmsub_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vmaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitclr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitclr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitclri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitclri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitclri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitclri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vbitset_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitset_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitset_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitset_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitseti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitseti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitseti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitseti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vbitrev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitrev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitrev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitrev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) -DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) - -DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vseqi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vseqi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vslei_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslei_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vslti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32) -DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32) -DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32) -DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32) - -DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32) -DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32) -DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32) -DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32) -DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32) -DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32) -DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32) -DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32) - -DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) - -DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) -DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +#include "tcg/helper.h" diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h index 9fdc305..8793bd9 100644 --- a/target/loongarch/internals.h +++ b/target/loongarch/internals.h @@ -24,27 +24,12 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, uint32_t exception, uintptr_t pc); -const char *loongarch_exception_name(int32_t exception); - #ifdef CONFIG_TCG int ieee_ex_to_loongarch(int xcpt); void restore_fp_status(CPULoongArchState *env); #endif #ifndef CONFIG_USER_ONLY -enum { - TLBRET_MATCH = 0, - TLBRET_BADADDR = 1, - TLBRET_NOMATCH = 2, - TLBRET_INVALID = 3, - TLBRET_DIRTY = 4, - TLBRET_RI = 5, - TLBRET_XI = 6, - TLBRET_PE = 7, -}; - -bool check_ps(CPULoongArchState *ent, uint8_t ps); - extern const VMStateDescription vmstate_loongarch_cpu; void loongarch_cpu_set_irq(void *opaque, int irq, int level); @@ -54,20 +39,8 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, uint64_t value); -bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, - int *index); -int get_physical_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, int is_debug); -void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, - uint64_t *dir_width, target_ulong level); -hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); - -#ifdef CONFIG_TCG -bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); -#endif +bool loongarch_cpu_has_work(CPUState *cs); +bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env); #endif /* !CONFIG_USER_ONLY */ uint64_t read_fcc(CPULoongArchState *env); diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index f0e3cfe..4e4f4e7 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -18,7 +18,7 @@ #include "system/kvm_int.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "hw/boards.h" #include "hw/irq.h" #include "hw/loongarch/virt.h" @@ -325,7 +325,7 @@ static int kvm_loongarch_get_csr(CPUState *cs) return ret; } -static int kvm_loongarch_put_csr(CPUState *cs, int level) +static int kvm_loongarch_put_csr(CPUState *cs, KvmPutState level) { int ret = 0; CPULoongArchState *env = cpu_env(cs); @@ -763,7 +763,7 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { int ret; static int once; @@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp) env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI); } } + return 0; +} +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ return 0; } @@ -1236,6 +1240,22 @@ void kvm_arch_init_irq_routing(KVMState *s) { } +void kvm_loongarch_init_irq_routing(void) +{ + int i; + + kvm_async_interrupts_allowed = true; + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); + if (kvm_has_gsi_routing()) { + for (i = 0; i < KVM_IRQCHIP_NUM_PINS; ++i) { + kvm_irqchip_add_irq_route(kvm_state, i, 0, i); + } + + kvm_gsi_routing_allowed = true; + kvm_irqchip_commit_routes(kvm_state); + } +} + int kvm_arch_get_default_type(MachineState *ms) { return 0; @@ -1249,7 +1269,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) int kvm_arch_irqchip_create(KVMState *s) { - return 0; + if (kvm_kernel_irqchip_split()) { + error_report("kernel_irqchip=split is not supported on LoongArch"); + exit(1); + } + + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h index 1051a34..5147567 100644 --- a/target/loongarch/kvm/kvm_loongarch.h +++ b/target/loongarch/kvm/kvm_loongarch.h @@ -5,11 +5,11 @@ * Copyright (c) 2023 Loongson Technology Corporation Limited */ -#include "cpu.h" - #ifndef QEMU_KVM_LOONGARCH_H #define QEMU_KVM_LOONGARCH_H +void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu); +void kvm_loongarch_init_irq_routing(void); int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); void kvm_arch_reset_vcpu(CPUState *cs); diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c index 6f732d8..1d8cd32 100644 --- a/target/loongarch/loongarch-qmp-cmds.c +++ b/target/loongarch/loongarch-qmp-cmds.c @@ -7,8 +7,9 @@ */ #include "qemu/osdep.h" +#include "qemu/target-info.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" @@ -32,7 +33,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_LOONGARCH_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, loongarch_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c index 4e70f5c..73190fb 100644 --- a/target/loongarch/machine.c +++ b/target/loongarch/machine.c @@ -45,6 +45,26 @@ static const VMStateDescription vmstate_fpu = { }, }; +static bool msgint_needed(void *opaque) +{ + LoongArchCPU *cpu = opaque; + + return FIELD_EX64(cpu->env.cpucfg[1], CPUCFG1, MSG_INT); +} + +static const VMStateDescription vmstate_msgint = { + .name = "cpu/msgint", + .version_id = 1, + .minimum_version_id = 1, + .needed = msgint_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.CSR_MSGIS, LoongArchCPU, N_MSGIS), + VMSTATE_UINT64(env.CSR_MSGIR, LoongArchCPU), + VMSTATE_UINT64(env.CSR_MSGIE, LoongArchCPU), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_lsxh_reg = { .name = "lsxh_reg", .version_id = 1, @@ -168,8 +188,8 @@ static const VMStateDescription vmstate_tlb = { /* LoongArch CPU state */ const VMStateDescription vmstate_loongarch_cpu = { .name = "cpu", - .version_id = 3, - .minimum_version_id = 3, + .version_id = 4, + .minimum_version_id = 4, .fields = (const VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), VMSTATE_UINTTL(env.pc, LoongArchCPU), @@ -245,6 +265,7 @@ const VMStateDescription vmstate_loongarch_cpu = { &vmstate_tlb, #endif &vmstate_lbt, + &vmstate_msgint, NULL } }; diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c index 6a7a65c..5ebe15f 100644 --- a/target/loongarch/tcg/csr_helper.c +++ b/target/loongarch/tcg/csr_helper.c @@ -13,9 +13,10 @@ #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "hw/irq.h" #include "cpu-csr.h" +#include "cpu-mmu.h" target_ulong helper_csrwr_stlbps(CPULoongArchState *env, target_ulong val) { @@ -25,11 +26,16 @@ target_ulong helper_csrwr_stlbps(CPULoongArchState *env, target_ulong val) * The real hardware only supports the min tlb_ps is 12 * tlb_ps=0 may cause undefined-behavior. */ - uint8_t tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + uint8_t tlb_ps = FIELD_EX64(val, CSR_STLBPS, PS); if (!check_ps(env, tlb_ps)) { qemu_log_mask(LOG_GUEST_ERROR, "Attempted set ps %d\n", tlb_ps); + } else { + /* Only update PS field, reserved bit keeps zero */ + val = FIELD_DP64(val, CSR_STLBPS, RESERVE, 0); + env->CSR_STLBPS = val; } + return old_v; } @@ -68,6 +74,27 @@ target_ulong helper_csrrd_tval(CPULoongArchState *env) return cpu_loongarch_get_constant_timer_ticks(cpu); } +target_ulong helper_csrrd_msgir(CPULoongArchState *env) +{ + int irq, new; + + irq = find_first_bit((unsigned long *)env->CSR_MSGIS, 256); + if (irq < 256) { + clear_bit(irq, (unsigned long *)env->CSR_MSGIS); + new = find_first_bit((unsigned long *)env->CSR_MSGIS, 256); + if (new < 256) { + return irq; + } + + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, MSGINT, 0); + } else { + /* bit 31 set 1 for no invalid irq */ + irq = BIT(31); + } + + return irq; +} + target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val) { int64_t old_v = env->CSR_ESTAT; @@ -131,8 +158,8 @@ target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val) } if (!check_ps(env, ptbase)) { qemu_log_mask(LOG_GUEST_ERROR, - "Attrmpted set ptbase 2^%d\n", ptbase); + "Attempted set ptbase 2^%d\n", ptbase); } - env->CSR_PWCL =val; + env->CSR_PWCL = val; return old_v; } diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c index a83acf6..fc9c64c 100644 --- a/target/loongarch/tcg/fpu_helper.c +++ b/target/loongarch/tcg/fpu_helper.c @@ -8,8 +8,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "fpu/softfloat.h" #include "internals.h" diff --git a/target/loongarch/tcg/helper.h b/target/loongarch/tcg/helper.h new file mode 100644 index 0000000..db57dbf --- /dev/null +++ b/target/loongarch/tcg/helper.h @@ -0,0 +1,723 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +DEF_HELPER_2(raise_exception, noreturn, env, i32) + +DEF_HELPER_FLAGS_1(bitrev_w, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(bitrev_d, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl) + +DEF_HELPER_FLAGS_3(asrtle_d, TCG_CALL_NO_WG, void, env, tl, tl) +DEF_HELPER_FLAGS_3(asrtgt_d, TCG_CALL_NO_WG, void, env, tl, tl) + +DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl) +DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl) +DEF_HELPER_FLAGS_2(cpucfg, TCG_CALL_NO_RWG_SE, tl, env, tl) + +/* Floating-point helper */ +DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fsub_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fsub_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmul_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmul_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fdiv_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fdiv_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmax_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmax_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmin_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmin_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmaxa_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmaxa_d, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmina_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fmina_d, TCG_CALL_NO_WG, i64, env, i64, i64) + +DEF_HELPER_FLAGS_5(fmuladd_s, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i32) +DEF_HELPER_FLAGS_5(fmuladd_d, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i32) + +DEF_HELPER_FLAGS_3(fscaleb_s, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(fscaleb_d, TCG_CALL_NO_WG, i64, env, i64, i64) + +DEF_HELPER_FLAGS_2(flogb_s, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(flogb_d, TCG_CALL_NO_WG, i64, env, i64) + +DEF_HELPER_FLAGS_2(fsqrt_s, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(fsqrt_d, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(frsqrt_s, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(frsqrt_d, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(frecip_s, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(frecip_d, TCG_CALL_NO_WG, i64, env, i64) + +DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, i64, env, i64) +DEF_HELPER_FLAGS_2(fclass_d, TCG_CALL_NO_RWG_SE, i64, env, i64) + +/* fcmp.cXXX.s */ +DEF_HELPER_4(fcmp_c_s, i64, env, i64, i64, i32) +/* fcmp.sXXX.s */ +DEF_HELPER_4(fcmp_s_s, i64, env, i64, i64, i32) +/* fcmp.cXXX.d */ +DEF_HELPER_4(fcmp_c_d, i64, env, i64, i64, i32) +/* fcmp.sXXX.d */ +DEF_HELPER_4(fcmp_s_d, i64, env, i64, i64, i32) + +DEF_HELPER_2(fcvt_d_s, i64, env, i64) +DEF_HELPER_2(fcvt_s_d, i64, env, i64) +DEF_HELPER_2(ffint_d_w, i64, env, i64) +DEF_HELPER_2(ffint_d_l, i64, env, i64) +DEF_HELPER_2(ffint_s_w, i64, env, i64) +DEF_HELPER_2(ffint_s_l, i64, env, i64) +DEF_HELPER_2(ftintrm_l_s, i64, env, i64) +DEF_HELPER_2(ftintrm_l_d, i64, env, i64) +DEF_HELPER_2(ftintrm_w_s, i64, env, i64) +DEF_HELPER_2(ftintrm_w_d, i64, env, i64) +DEF_HELPER_2(ftintrp_l_s, i64, env, i64) +DEF_HELPER_2(ftintrp_l_d, i64, env, i64) +DEF_HELPER_2(ftintrp_w_s, i64, env, i64) +DEF_HELPER_2(ftintrp_w_d, i64, env, i64) +DEF_HELPER_2(ftintrz_l_s, i64, env, i64) +DEF_HELPER_2(ftintrz_l_d, i64, env, i64) +DEF_HELPER_2(ftintrz_w_s, i64, env, i64) +DEF_HELPER_2(ftintrz_w_d, i64, env, i64) +DEF_HELPER_2(ftintrne_l_s, i64, env, i64) +DEF_HELPER_2(ftintrne_l_d, i64, env, i64) +DEF_HELPER_2(ftintrne_w_s, i64, env, i64) +DEF_HELPER_2(ftintrne_w_d, i64, env, i64) +DEF_HELPER_2(ftint_l_s, i64, env, i64) +DEF_HELPER_2(ftint_l_d, i64, env, i64) +DEF_HELPER_2(ftint_w_s, i64, env, i64) +DEF_HELPER_2(ftint_w_d, i64, env, i64) +DEF_HELPER_2(frint_s, i64, env, i64) +DEF_HELPER_2(frint_d, i64, env, i64) + +DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env) + +DEF_HELPER_1(rdtime_d, i64, env) + +#ifndef CONFIG_USER_ONLY +/* CSRs helper */ +DEF_HELPER_1(csrrd_pgd, i64, env) +DEF_HELPER_1(csrrd_cpuid, i64, env) +DEF_HELPER_1(csrrd_tval, i64, env) +DEF_HELPER_1(csrrd_msgir, i64, env) +DEF_HELPER_2(csrwr_stlbps, i64, env, tl) +DEF_HELPER_2(csrwr_estat, i64, env, tl) +DEF_HELPER_2(csrwr_asid, i64, env, tl) +DEF_HELPER_2(csrwr_tcfg, i64, env, tl) +DEF_HELPER_2(csrwr_ticlr, i64, env, tl) +DEF_HELPER_2(csrwr_pwcl, i64, env, tl) +DEF_HELPER_2(iocsrrd_b, i64, env, tl) +DEF_HELPER_2(iocsrrd_h, i64, env, tl) +DEF_HELPER_2(iocsrrd_w, i64, env, tl) +DEF_HELPER_2(iocsrrd_d, i64, env, tl) +DEF_HELPER_3(iocsrwr_b, void, env, tl, tl) +DEF_HELPER_3(iocsrwr_h, void, env, tl, tl) +DEF_HELPER_3(iocsrwr_w, void, env, tl, tl) +DEF_HELPER_3(iocsrwr_d, void, env, tl, tl) + +/* TLB helper */ +DEF_HELPER_1(tlbwr, void, env) +DEF_HELPER_1(tlbfill, void, env) +DEF_HELPER_1(tlbsrch, void, env) +DEF_HELPER_1(tlbrd, void, env) +DEF_HELPER_1(tlbclr, void, env) +DEF_HELPER_1(tlbflush, void, env) +DEF_HELPER_1(invtlb_all, void, env) +DEF_HELPER_2(invtlb_all_g, void, env, i32) +DEF_HELPER_2(invtlb_all_asid, void, env, tl) +DEF_HELPER_3(invtlb_page_asid, void, env, tl, tl) +DEF_HELPER_3(invtlb_page_asid_or_g, void, env, tl, tl) + +DEF_HELPER_4(lddir, tl, env, tl, tl, i32) +DEF_HELPER_4(ldpte, void, env, tl, tl, i32) +DEF_HELPER_1(ertn, void, env) +DEF_HELPER_1(idle, void, env) +#endif + +/* LoongArch LSX */ +DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsubwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsubwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwev_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsubwod_q_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwev_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vaddwod_q_du_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vavg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavg_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vavgr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vavgr_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vabsd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vabsd_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vadda_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vadda_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vadda_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vadda_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmini_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmini_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vmaxi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vmaxi_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vmuh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmuh_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmulwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmulwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmulwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmulwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmadd_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmsub_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmsub_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmaddwev_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_h_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_w_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_d_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vmaddwev_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwev_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitclr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitclr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitclri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitclri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitclri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitclri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vbitset_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitset_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitset_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitset_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitseti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitseti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitseti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitseti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vbitrev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitrev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitrev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitrev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vbitrevi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vseqi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vseqi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vslei_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslei_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vslti_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32) + +DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32) +DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32) +DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32) +DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32) +DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32) +DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32) +DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32) +DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32) + +DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc index 3d70d75..77eeedb 100644 --- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc @@ -74,38 +74,38 @@ TRANS(sc_w, ALL, gen_sc, MO_TESL) TRANS(ll_d, 64, gen_ll, MO_TEUQ) TRANS(sc_d, 64, gen_sc, MO_TEUQ) TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS64(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS64(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS64(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS64(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS64(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS64(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS64(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS64(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) +TRANS64(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS64(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS64(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS64(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS64(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS64(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS64(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS64(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS64(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) +TRANS64(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_branch.c.inc b/target/loongarch/tcg/insn_trans/trans_branch.c.inc index 221e515..f94c1f3 100644 --- a/target/loongarch/tcg/insn_trans/trans_branch.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_branch.c.inc @@ -80,5 +80,5 @@ TRANS(bltu, ALL, gen_rr_bc, TCG_COND_LTU) TRANS(bgeu, ALL, gen_rr_bc, TCG_COND_GEU) TRANS(beqz, ALL, gen_rz_bc, TCG_COND_EQ) TRANS(bnez, ALL, gen_rz_bc, TCG_COND_NE) -TRANS(bceqz, 64, gen_cz_bc, TCG_COND_EQ) -TRANS(bcnez, 64, gen_cz_bc, TCG_COND_NE) +TRANS(bceqz, FP, gen_cz_bc, TCG_COND_EQ) +TRANS(bcnez, FP, gen_cz_bc, TCG_COND_NE) diff --git a/target/loongarch/tcg/insn_trans/trans_extra.c.inc b/target/loongarch/tcg/insn_trans/trans_extra.c.inc index cfa361f..298a80c 100644 --- a/target/loongarch/tcg/insn_trans/trans_extra.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_extra.c.inc @@ -69,6 +69,10 @@ static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a) static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a) { + if (!avail_64(ctx)) { + return false; + } + return gen_rdtime(ctx, a, 0, 0); } @@ -97,11 +101,11 @@ static bool gen_crc(DisasContext *ctx, arg_rrr *a, return true; } -TRANS(crc_w_b_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(1)) -TRANS(crc_w_h_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(2)) -TRANS(crc_w_w_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(4)) -TRANS(crc_w_d_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) -TRANS(crcc_w_b_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(1)) -TRANS(crcc_w_h_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(2)) -TRANS(crcc_w_w_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(4)) -TRANS(crcc_w_d_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) +TRANS(crc_w_b_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(1)) +TRANS(crc_w_h_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(2)) +TRANS(crc_w_w_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(4)) +TRANS64(crc_w_d_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) +TRANS(crcc_w_b_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(1)) +TRANS(crcc_w_h_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(2)) +TRANS(crcc_w_w_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(4)) +TRANS64(crcc_w_d_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc index f4a0dea..ff6cf34 100644 --- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc @@ -183,16 +183,16 @@ TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s) TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d) TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s) TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d) -TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) -TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) +TRANS64(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) +TRANS64(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s) TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d) TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s) TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d) TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s) TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d) -TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) -TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) +TRANS64(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) +TRANS64(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s) TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d) TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0) diff --git a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc index 3babf69..6a2c030 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc @@ -4,10 +4,15 @@ */ /* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ -static uint32_t get_fcmp_flags(int cond) +static uint32_t get_fcmp_flags(DisasContext *ctx, int cond) { uint32_t flags = 0; + /*check cond , cond =[0-8,10,12] */ + if ((cond > 8) &&(cond != 10) && (cond != 12)) { + return -1; + } + if (cond & 0x1) { flags |= FCMP_LT; } @@ -26,9 +31,14 @@ static uint32_t get_fcmp_flags(int cond) static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >>1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_SP(ctx)) { return false; } @@ -39,8 +49,6 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); @@ -50,9 +58,14 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_DP(ctx)) { return false; } @@ -63,8 +76,6 @@ static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); diff --git a/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc index 833c059..ca1d76a 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc @@ -29,5 +29,5 @@ TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w) TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l) TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w) TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l) -TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s) -TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d) +TRANS64(frint_s, FP_SP, gen_ff, gen_helper_frint_s) +TRANS64(frint_d, FP_DP, gen_ff, gen_helper_frint_d) diff --git a/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc index 13452bc..79da471 100644 --- a/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc @@ -148,11 +148,11 @@ TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL) TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ) TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL) TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ) -TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) -TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) -TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL) -TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) -TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) -TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) -TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) -TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) +TRANS64(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) +TRANS64(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) +TRANS64(fldle_s, FP_SP, gen_fload_le, MO_TEUL) +TRANS64(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) +TRANS64(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) +TRANS64(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) +TRANS64(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) +TRANS64(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc index ecbfe23..a407ab5 100644 --- a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc @@ -83,6 +83,7 @@ void loongarch_csr_translate_init(void) SET_CSR_FUNC(TCFG, NULL, gen_helper_csrwr_tcfg); SET_CSR_FUNC(TVAL, gen_helper_csrrd_tval, NULL); SET_CSR_FUNC(TICLR, NULL, gen_helper_csrwr_ticlr); + SET_CSR_FUNC(MSGIR, gen_helper_csrrd_msgir, NULL); } #undef SET_CSR_FUNC @@ -233,11 +234,11 @@ static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a, TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b) TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h) TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w) -TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) +TRANS64(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b) TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h) TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w) -TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) +TRANS64(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) static void check_mmu_idx(DisasContext *ctx) { diff --git a/target/loongarch/tcg/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc index 3773077..136c4c8 100644 --- a/target/loongarch/tcg/insn_trans/trans_shift.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_shift.c.inc @@ -78,7 +78,7 @@ TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w) TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d) TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d) TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d) -TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) +TRANS(rotr_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d) TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl) TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl) @@ -86,5 +86,5 @@ TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl) TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl) TRANS(srai_w, ALL, gen_rri_c, EXT_NONE, EXT_NONE, gen_sari_w) TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl) -TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) +TRANS(rotri_w, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl) diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index dff9277..38bccf2 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -3465,7 +3465,7 @@ TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) { int mode; - uint64_t data, t; + uint64_t data = 0, t; /* * imm bit [11:8] is mode, mode value is 0-12. @@ -3570,22 +3570,31 @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) } break; default: - generate_exception(ctx, EXCCODE_INE); g_assert_not_reached(); } return data; } +static bool check_valid_vldi_mode(arg_vldi *a) +{ + return extract32(a->imm, 8, 4) <= 12; +} + static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) { int sel, vece; uint64_t value; - if (!check_vec(ctx, oprsz)) { + sel = (a->imm >> 12) & 0x1; + + if (sel && !check_valid_vldi_mode(a)) { + generate_exception(ctx, EXCCODE_INE); return true; } - sel = (a->imm >> 12) & 0x1; + if (!check_vec(ctx, oprsz)) { + return true; + } if (sel) { value = vldi_get_value(ctx, a->imm); @@ -4655,19 +4664,23 @@ TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if(flags == -1){ + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; @@ -4675,19 +4688,23 @@ static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; diff --git a/target/loongarch/tcg/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c index b6916f5..c155f48 100644 --- a/target/loongarch/tcg/iocsr_helper.c +++ b/target/loongarch/tcg/iocsr_helper.c @@ -9,8 +9,7 @@ #include "cpu.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #define GET_MEMTXATTRS(cas) \ ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index}) diff --git a/target/loongarch/tcg/meson.build b/target/loongarch/tcg/meson.build index bdf34f9..b7adfe4 100644 --- a/target/loongarch/tcg/meson.build +++ b/target/loongarch/tcg/meson.build @@ -7,6 +7,7 @@ loongarch_ss.add([zlib, gen]) loongarch_ss.add(files( 'fpu_helper.c', 'op_helper.c', + 'tcg_cpu.c', 'translate.c', 'vec_helper.c', )) diff --git a/target/loongarch/tcg/op_helper.c b/target/loongarch/tcg/op_helper.c index b17208e..16ac0d4 100644 --- a/target/loongarch/tcg/op_helper.c +++ b/target/loongarch/tcg/op_helper.c @@ -10,8 +10,7 @@ #include "cpu.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "internals.h" #include "qemu/crc32c.h" #include <zlib.h> /* for crc32 */ diff --git a/target/loongarch/tcg/tcg_cpu.c b/target/loongarch/tcg/tcg_cpu.c new file mode 100644 index 0000000..82b54e6 --- /dev/null +++ b/target/loongarch/tcg/tcg_cpu.c @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch CPU parameters for QEMU. + * + * Copyright (c) 2025 Loongson Technology Corporation Limited + */ +#include "qemu/osdep.h" +#include "qemu/accel.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "accel/accel-cpu-target.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/cpu-ops.h" +#include "exec/translation-block.h" +#include "exec/target_page.h" +#include "tcg_loongarch.h" +#include "internals.h" + +struct TypeExcp { + int32_t exccode; + const char * const name; +}; + +static const struct TypeExcp excp_names[] = { + {EXCCODE_INT, "Interrupt"}, + {EXCCODE_PIL, "Page invalid exception for load"}, + {EXCCODE_PIS, "Page invalid exception for store"}, + {EXCCODE_PIF, "Page invalid exception for fetch"}, + {EXCCODE_PME, "Page modified exception"}, + {EXCCODE_PNR, "Page Not Readable exception"}, + {EXCCODE_PNX, "Page Not Executable exception"}, + {EXCCODE_PPI, "Page Privilege error"}, + {EXCCODE_ADEF, "Address error for instruction fetch"}, + {EXCCODE_ADEM, "Address error for Memory access"}, + {EXCCODE_SYS, "Syscall"}, + {EXCCODE_BRK, "Break"}, + {EXCCODE_INE, "Instruction Non-Existent"}, + {EXCCODE_IPE, "Instruction privilege error"}, + {EXCCODE_FPD, "Floating Point Disabled"}, + {EXCCODE_FPE, "Floating Point Exception"}, + {EXCCODE_DBP, "Debug breakpoint"}, + {EXCCODE_BCE, "Bound Check Exception"}, + {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, + {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, + {EXCP_HLT, "EXCP_HLT"}, +}; + +static const char *loongarch_exception_name(int32_t exception) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(excp_names); i++) { + if (excp_names[i].exccode == exception) { + return excp_names[i].name; + } + } + return "Unknown"; +} + +void G_NORETURN do_raise_exception(CPULoongArchState *env, + uint32_t exception, + uintptr_t pc) +{ + CPUState *cs = env_cpu(env); + + qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", + __func__, + exception, + loongarch_exception_name(exception)); + cs->exception_index = exception; + + cpu_loop_exit_restore(cs, pc); +} + +#ifndef CONFIG_USER_ONLY +static void loongarch_cpu_do_interrupt(CPUState *cs) +{ + CPULoongArchState *env = cpu_env(cs); + bool update_badinstr = 1; + int cause = -1; + bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); + uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); + + if (cs->exception_index != EXCCODE_INT) { + qemu_log_mask(CPU_LOG_INT, + "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", + __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, + cs->exception_index, + loongarch_exception_name(cs->exception_index)); + } + + switch (cs->exception_index) { + case EXCCODE_DBP: + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DCL, 1); + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, ECODE, 0xC); + goto set_DERA; + set_DERA: + env->CSR_DERA = env->pc; + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DST, 1); + set_pc(env, env->CSR_EENTRY + 0x480); + break; + case EXCCODE_INT: + if (FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DEI, 1); + goto set_DERA; + } + QEMU_FALLTHROUGH; + case EXCCODE_PIF: + case EXCCODE_ADEF: + cause = cs->exception_index; + update_badinstr = 0; + break; + case EXCCODE_SYS: + case EXCCODE_BRK: + case EXCCODE_INE: + case EXCCODE_IPE: + case EXCCODE_FPD: + case EXCCODE_FPE: + case EXCCODE_SXD: + case EXCCODE_ASXD: + env->CSR_BADV = env->pc; + QEMU_FALLTHROUGH; + case EXCCODE_BCE: + case EXCCODE_ADEM: + case EXCCODE_PIL: + case EXCCODE_PIS: + case EXCCODE_PME: + case EXCCODE_PNR: + case EXCCODE_PNX: + case EXCCODE_PPI: + cause = cs->exception_index; + break; + default: + qemu_log("Error: exception(%d) has not been supported\n", + cs->exception_index); + abort(); + } + + if (update_badinstr) { + env->CSR_BADI = cpu_ldl_code(env, env->pc); + } + + /* Save PLV and IE */ + if (tlbfill) { + env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV, + FIELD_EX64(env->CSR_CRMD, + CSR_CRMD, PLV)); + env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); + /* set the DA mode */ + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 1); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 0); + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, + PC, (env->pc >> 2)); + } else { + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE, + EXCODE_MCODE(cause)); + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ESUBCODE, + EXCODE_SUBCODE(cause)); + env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PPLV, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV)); + env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PIE, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); + env->CSR_ERA = env->pc; + } + + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, 0); + + if (vec_size) { + vec_size = (1 << vec_size) * 4; + } + + if (cs->exception_index == EXCCODE_INT) { + /* Interrupt */ + uint32_t vector = 0; + uint32_t pending = FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS); + pending &= FIELD_EX64(env->CSR_ECFG, CSR_ECFG, LIE); + + /* Find the highest-priority interrupt. */ + vector = 31 - clz32(pending); + set_pc(env, env->CSR_EENTRY + \ + (EXCCODE_EXTERNAL_INT + vector) * vec_size); + qemu_log_mask(CPU_LOG_INT, + "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " cause %d\n" " A " TARGET_FMT_lx " D " + TARGET_FMT_lx " vector = %d ExC " TARGET_FMT_lx "ExS" + TARGET_FMT_lx "\n", + __func__, env->pc, env->CSR_ERA, + cause, env->CSR_BADV, env->CSR_DERA, vector, + env->CSR_ECFG, env->CSR_ESTAT); + } else { + if (tlbfill) { + set_pc(env, env->CSR_TLBRENTRY); + } else { + set_pc(env, env->CSR_EENTRY + EXCODE_MCODE(cause) * vec_size); + } + qemu_log_mask(CPU_LOG_INT, + "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " cause %d%s\n, ESTAT " TARGET_FMT_lx + " EXCFG " TARGET_FMT_lx " BADVA " TARGET_FMT_lx + "BADI " TARGET_FMT_lx " SYS_NUM " TARGET_FMT_lu + " cpu %d asid " TARGET_FMT_lx "\n", __func__, env->pc, + tlbfill ? env->CSR_TLBRERA : env->CSR_ERA, + cause, tlbfill ? "(refill)" : "", env->CSR_ESTAT, + env->CSR_ECFG, + tlbfill ? env->CSR_TLBRBADV : env->CSR_BADV, + env->CSR_BADI, env->gpr[11], cs->cpu_index, + env->CSR_ASID); + } + cs->exception_index = -1; +} + +static void loongarch_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, + vaddr addr, unsigned size, + MMUAccessType access_type, + int mmu_idx, MemTxAttrs attrs, + MemTxResult response, + uintptr_t retaddr) +{ + CPULoongArchState *env = cpu_env(cs); + + if (access_type == MMU_INST_FETCH) { + do_raise_exception(env, EXCCODE_ADEF, retaddr); + } else { + do_raise_exception(env, EXCCODE_ADEM, retaddr); + } +} + +static inline bool cpu_loongarch_hw_interrupts_enabled(CPULoongArchState *env) +{ + bool ret = 0; + + ret = (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE) && + !(FIELD_EX64(env->CSR_DBG, CSR_DBG, DST))); + + return ret; +} + +static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + if (interrupt_request & CPU_INTERRUPT_HARD) { + CPULoongArchState *env = cpu_env(cs); + + if (cpu_loongarch_hw_interrupts_enabled(env) && + cpu_loongarch_hw_interrupts_pending(env)) { + /* Raise it */ + cs->exception_index = EXCCODE_INT; + loongarch_cpu_do_interrupt(cs); + return true; + } + } + return false; +} + +static vaddr loongarch_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return is_va32(cpu_env(cs)) ? (uint32_t)result : result; +} +#endif + +static TCGTBCPUState loongarch_get_tb_cpu_state(CPUState *cs) +{ + CPULoongArchState *env = cpu_env(cs); + uint32_t flags; + + flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE; + flags |= is_va32(env) * HW_FLAGS_VA32; + + return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; +} + +static void loongarch_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); + set_pc(cpu_env(cs), tb->pc); +} + +static void loongarch_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data) +{ + set_pc(cpu_env(cs), data[0]); +} + +static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPULoongArchState *env = cpu_env(cs); + + if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) { + return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); + } + return MMU_DA_IDX; +} + +const TCGCPUOps loongarch_tcg_ops = { + .guest_default_memory_order = 0, + .mttcg_supported = true, + + .initialize = loongarch_translate_init, + .translate_code = loongarch_translate_code, + .get_tb_cpu_state = loongarch_get_tb_cpu_state, + .synchronize_from_tb = loongarch_cpu_synchronize_from_tb, + .restore_state_to_opc = loongarch_restore_state_to_opc, + .mmu_index = loongarch_cpu_mmu_index, + +#ifndef CONFIG_USER_ONLY + .tlb_fill = loongarch_cpu_tlb_fill, + .pointer_wrap = loongarch_pointer_wrap, + .cpu_exec_interrupt = loongarch_cpu_exec_interrupt, + .cpu_exec_halt = loongarch_cpu_has_work, + .cpu_exec_reset = cpu_reset, + .do_interrupt = loongarch_cpu_do_interrupt, + .do_transaction_failed = loongarch_cpu_do_transaction_failed, +#endif +}; diff --git a/target/loongarch/tcg/tcg_loongarch.h b/target/loongarch/tcg/tcg_loongarch.h index da2539e..7fb627f 100644 --- a/target/loongarch/tcg/tcg_loongarch.h +++ b/target/loongarch/tcg/tcg_loongarch.h @@ -6,7 +6,18 @@ */ #ifndef TARGET_LOONGARCH_TCG_LOONGARCH_H #define TARGET_LOONGARCH_TCG_LOONGARCH_H +#include "cpu.h" +#include "cpu-mmu.h" +extern const TCGCPUOps loongarch_tcg_ops; void loongarch_csr_translate_init(void); +bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + +TLBRet loongarch_get_addr_from_tlb(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx); + #endif /* TARGET_LOONGARCH_TCG_LOONGARCH_H */ diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c index 70d1b5c..8cfce48 100644 --- a/target/loongarch/tcg/tlb_helper.c +++ b/target/loongarch/tcg/tlb_helper.c @@ -10,14 +10,28 @@ #include "qemu/guest-random.h" #include "cpu.h" +#include "cpu-mmu.h" #include "internals.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" -#include "exec/cpu_ldst.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/log.h" #include "cpu-csr.h" +#include "tcg/tcg_loongarch.h" + +typedef bool (*tlb_match)(bool global, int asid, int tlb_asid); + +static bool tlb_match_any(bool global, int asid, int tlb_asid) +{ + return global || tlb_asid == asid; +} + +static bool tlb_match_asid(bool global, int asid, int tlb_asid) +{ + return !global && tlb_asid == asid; +} bool check_ps(CPULoongArchState *env, uint8_t tlb_ps) { @@ -27,36 +41,8 @@ bool check_ps(CPULoongArchState *env, uint8_t tlb_ps) return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2); } -void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, - uint64_t *dir_width, target_ulong level) -{ - switch (level) { - case 1: - *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); - *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); - break; - case 2: - *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); - *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); - break; - case 3: - *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); - *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); - break; - case 4: - *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); - *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); - break; - default: - /* level may be zero for ldpte */ - *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); - *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); - break; - } -} - -static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, - MMUAccessType access_type, int tlb_error) +static void raise_mmu_exception(CPULoongArchState *env, vaddr address, + MMUAccessType access_type, TLBRet tlb_error) { CPUState *cs = env_cpu(env); @@ -127,8 +113,7 @@ static void invalidate_tlb_entry(CPULoongArchState *env, int index) target_ulong addr, mask, pagesize; uint8_t tlb_ps; LoongArchTLB *tlb = &env->tlb[index]; - - int mmu_idx = cpu_mmu_index(env_cpu(env), false); + int idxmap = BIT(MMU_KERNEL_IDX) | BIT(MMU_USER_IDX); uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V); uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V); uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); @@ -137,24 +122,22 @@ static void invalidate_tlb_entry(CPULoongArchState *env, int index) if (!tlb_e) { return; } - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } + + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); pagesize = MAKE_64BIT_MASK(tlb_ps, 1); mask = MAKE_64BIT_MASK(0, tlb_ps + 1); + addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; + addr = sextract64(addr, 0, TARGET_VIRT_ADDR_SPACE_BITS); if (tlb_v0) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */ tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); + idxmap, TARGET_LONG_BITS); } if (tlb_v1) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */ - tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); + tlb_flush_range_by_mmuidx(env_cpu(env), addr + pagesize, pagesize, + idxmap, TARGET_LONG_BITS); } } @@ -173,9 +156,8 @@ static void invalidate_tlb(CPULoongArchState *env, int index) invalidate_tlb_entry(env, index); } -static void fill_tlb_entry(CPULoongArchState *env, int index) +static void fill_tlb_entry(CPULoongArchState *env, LoongArchTLB *tlb) { - LoongArchTLB *tlb = &env->tlb[index]; uint64_t lo0, lo1, csr_vppn; uint16_t csr_asid; uint8_t csr_ps; @@ -200,17 +182,8 @@ static void fill_tlb_entry(CPULoongArchState *env, int index) lo1 = env->CSR_TLBELO1; } - /*check csr_ps */ - if (!check_ps(env, csr_ps)) { - qemu_log_mask(LOG_GUEST_ERROR, "csr_ps %d is illegal\n", csr_ps); - return; - } - - /* Only MTLB has the ps fields */ - if (index >= LOONGARCH_STLB) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); - } - + /* Store page size in field PS */ + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn); tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1); csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); @@ -229,6 +202,83 @@ static uint32_t get_random_tlb(uint32_t low, uint32_t high) return val % (high - low + 1) + low; } +/* + * One tlb entry holds an adjacent odd/even pair, the vpn is the + * content of the virtual page number divided by 2. So the + * compare vpn is bit[47:15] for 16KiB page. while the vppn + * field in tlb entry contains bit[47:13], so need adjust. + * virt_vpn = vaddr[47:13] + */ +static LoongArchTLB *loongarch_tlb_search_cb(CPULoongArchState *env, + vaddr vaddr, int csr_asid, + tlb_match func) +{ + LoongArchTLB *tlb; + uint16_t tlb_asid, stlb_idx; + uint8_t tlb_e, tlb_ps, stlb_ps; + bool tlb_g; + int i, compare_shift; + uint64_t vpn, tlb_vppn; + + stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); + stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ + compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + + /* Search STLB */ + for (i = 0; i < 8; ++i) { + tlb = &env->tlb[i * 256 + stlb_idx]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (tlb_e) { + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = !!FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + + if (func(tlb_g, csr_asid, tlb_asid) && + (vpn == (tlb_vppn >> compare_shift))) { + return tlb; + } + } + } + + /* Search MTLB */ + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { + tlb = &env->tlb[i]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (tlb_e) { + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); + if (func(tlb_g, csr_asid, tlb_asid) && + (vpn == (tlb_vppn >> compare_shift))) { + return tlb; + } + } + } + return NULL; +} + +static bool loongarch_tlb_search(CPULoongArchState *env, vaddr vaddr, + int *index) +{ + int csr_asid; + tlb_match func; + LoongArchTLB *tlb; + + func = tlb_match_any; + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + tlb = loongarch_tlb_search_cb(env, vaddr, csr_asid, func); + if (tlb) { + *index = tlb - env->tlb; + return true; + } + + return false; +} + void helper_tlbsrch(CPULoongArchState *env) { int index, match; @@ -256,12 +306,7 @@ void helper_tlbrd(CPULoongArchState *env) index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); tlb = &env->tlb[index]; - - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); if (!tlb_e) { @@ -287,61 +332,107 @@ void helper_tlbrd(CPULoongArchState *env) void helper_tlbwr(CPULoongArchState *env) { int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + LoongArchTLB *old, new = {}; + bool skip_inv = false; + uint8_t tlb_v0, tlb_v1; - invalidate_tlb(env, index); - + old = env->tlb + index; if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) { - env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc, - TLB_MISC, E, 0); + invalidate_tlb(env, index); return; } - fill_tlb_entry(env, index); + fill_tlb_entry(env, &new); + /* Check whether ASID/VPPN is the same */ + if (old->tlb_misc == new.tlb_misc) { + /* Check whether both even/odd pages is the same or invalid */ + tlb_v0 = FIELD_EX64(old->tlb_entry0, TLBENTRY, V); + tlb_v1 = FIELD_EX64(old->tlb_entry1, TLBENTRY, V); + if ((!tlb_v0 || new.tlb_entry0 == old->tlb_entry0) && + (!tlb_v1 || new.tlb_entry1 == old->tlb_entry1)) { + skip_inv = true; + } + } + + /* flush tlb before updating the entry */ + if (!skip_inv) { + invalidate_tlb(env, index); + } + + *old = new; } void helper_tlbfill(CPULoongArchState *env) { uint64_t address, entryhi; - int index, set, stlb_idx; + int index, set, i, stlb_idx; uint16_t pagesize, stlb_ps; + uint16_t asid, tlb_asid; + LoongArchTLB *tlb; + uint8_t tlb_e; if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { entryhi = env->CSR_TLBREHI; + /* Validity of pagesize is checked in helper_ldpte() */ pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); } else { entryhi = env->CSR_TLBEHI; + /* Validity of pagesize is checked in helper_tlbrd() */ pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); } - if (!check_ps(env, pagesize)) { - qemu_log_mask(LOG_GUEST_ERROR, "pagesize %d is illegal\n", pagesize); - return; - } - + /* Validity of stlb_ps is checked in helper_csrwr_stlbps() */ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - if (!check_ps(env, stlb_ps)) { - qemu_log_mask(LOG_GUEST_ERROR, "stlb_ps %d is illegal\n", stlb_ps); - return; - } - + asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); if (pagesize == stlb_ps) { /* Only write into STLB bits [47:13] */ address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT); - - /* Choose one set ramdomly */ - set = get_random_tlb(0, 7); - - /* Index in one set */ + set = -1; stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */ + for (i = 0; i < 8; ++i) { + tlb = &env->tlb[i * 256 + stlb_idx]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (!tlb_e) { + set = i; + break; + } + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + if (asid != tlb_asid) { + set = i; + } + } + + /* Choose one set randomly */ + if (set < 0) { + set = get_random_tlb(0, 7); + } index = set * 256 + stlb_idx; } else { /* Only write into MTLB */ - index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); + index = -1; + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { + tlb = &env->tlb[i]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + + if (!tlb_e) { + index = i; + break; + } + + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + if (asid != tlb_asid) { + index = i; + } + } + + if (index < 0) { + index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); + } } invalidate_tlb(env, index); - fill_tlb_entry(env, index); + fill_tlb_entry(env, env->tlb + index); } void helper_tlbclr(CPULoongArchState *env) @@ -443,67 +534,29 @@ void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info) void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info, target_ulong addr) { - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - - if (!tlb_e) { - continue; - } - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + int asid = info & 0x3ff; + LoongArchTLB *tlb; + tlb_match func; - if (!tlb_g && (tlb_asid == asid) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } + func = tlb_match_asid; + tlb = loongarch_tlb_search_cb(env, addr, asid, func); + if (tlb) { + invalidate_tlb(env, tlb - env->tlb); } - tlb_flush(env_cpu(env)); } void helper_invtlb_page_asid_or_g(CPULoongArchState *env, target_ulong info, target_ulong addr) { - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - - if (!tlb_e) { - continue; - } - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + int asid = info & 0x3ff; + LoongArchTLB *tlb; + tlb_match func; - if ((tlb_g || (tlb_asid == asid)) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } + func = tlb_match_any; + tlb = loongarch_tlb_search_cb(env, addr, asid, func); + if (tlb) { + invalidate_tlb(env, tlb - env->tlb); } - tlb_flush(env_cpu(env)); } bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, @@ -513,13 +566,15 @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPULoongArchState *env = cpu_env(cs); hwaddr physical; int prot; - int ret; + MMUContext context; + TLBRet ret; /* Data access */ - ret = get_physical_address(env, &physical, &prot, address, - access_type, mmu_idx, 0); - + context.addr = address; + ret = get_physical_address(env, &context, access_type, mmu_idx, 0); if (ret == TLBRET_MATCH) { + physical = context.physical; + prot = context.prot; tlb_set_page(cs, address & TARGET_PAGE_MASK, physical & TARGET_PAGE_MASK, prot, mmu_idx, TARGET_PAGE_SIZE); @@ -578,10 +633,11 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, uint32_t mem_idx) { CPUState *cs = env_cpu(env); - target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; + target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, badv; uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); uint64_t dir_base, dir_width; + uint8_t ps; /* * The parameter "base" has only two types, @@ -618,6 +674,11 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, if (odd) { tmp0 += MAKE_64BIT_MASK(ps, 1); } + + if (!check_ps(env, ps)) { + qemu_log_mask(LOG_GUEST_ERROR, "Illegal huge pagesize %d\n", ps); + return; + } } else { badv = env->CSR_TLBRBADV; @@ -637,3 +698,33 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, } env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps); } + +static TLBRet loongarch_map_tlb_entry(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int index, + int mmu_idx) +{ + LoongArchTLB *tlb = &env->tlb[index]; + uint8_t tlb_ps, n; + + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + n = (context->addr >> tlb_ps) & 0x1;/* Odd or even */ + context->pte = n ? tlb->tlb_entry1 : tlb->tlb_entry0; + context->ps = tlb_ps; + return loongarch_check_pte(env, context, access_type, mmu_idx); +} + +TLBRet loongarch_get_addr_from_tlb(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx) +{ + int index, match; + + match = loongarch_tlb_search(env, context->addr, &index); + if (match) { + return loongarch_map_tlb_entry(env, context, access_type, index, + mmu_idx); + } + + return TLBRET_NOMATCH; +} diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c index e59e4ed..53a0b4c 100644 --- a/target/loongarch/tcg/translate.c +++ b/target/loongarch/tcg/translate.c @@ -11,6 +11,7 @@ #include "tcg/tcg-op-gvec.h" #include "exec/translation-block.h" #include "exec/translator.h" +#include "exec/target_page.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/log.h" diff --git a/target/loongarch/tcg/vec_helper.c b/target/loongarch/tcg/vec_helper.c index 3faf52c..a270998 100644 --- a/target/loongarch/tcg/vec_helper.c +++ b/target/loongarch/tcg/vec_helper.c @@ -7,7 +7,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "internals.h" diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h index 195f535..bbe015b 100644 --- a/target/loongarch/translate.h +++ b/target/loongarch/translate.h @@ -14,6 +14,10 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \ { return avail_##AVAIL(ctx) && FUNC(ctx, a, __VA_ARGS__); } +#define TRANS64(NAME, AVAIL, FUNC, ...) \ + static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \ + { return avail_64(ctx) && avail_##AVAIL(ctx) && FUNC(ctx, a, __VA_ARGS__); } + #define avail_ALL(C) true #define avail_64(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, ARCH) == \ CPUCFG1_ARCH_LA64) @@ -25,6 +29,7 @@ #define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX)) #define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX)) #define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR)) +#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC)) /* * If an operation is being performed on less than TARGET_LONG_BITS, diff --git a/target/m68k/cpu-param.h b/target/m68k/cpu-param.h index 7afbf6d..256a2b5 100644 --- a/target/m68k/cpu-param.h +++ b/target/m68k/cpu-param.h @@ -17,4 +17,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +#define TARGET_INSN_START_EXTRA_WORDS 1 + #endif diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index 0065e1c..f1b6731 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -23,6 +23,8 @@ #include "cpu.h" #include "migration/vmstate.h" #include "fpu/softfloat.h" +#include "exec/translation-block.h" +#include "accel/tcg/cpu-ops.h" static void m68k_cpu_set_pc(CPUState *cs, vaddr value) { @@ -38,6 +40,24 @@ static vaddr m68k_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState m68k_get_tb_cpu_state(CPUState *cs) +{ + CPUM68KState *env = cpu_env(cs); + uint32_t flags; + + flags = (env->macsr >> 4) & TB_FLAGS_MACSR; + if (env->sr & SR_S) { + flags |= TB_FLAGS_MSR_S; + flags |= (env->sfc << (TB_FLAGS_SFC_S_BIT - 2)) & TB_FLAGS_SFC_S; + flags |= (env->dfc << (TB_FLAGS_DFC_S_BIT - 2)) & TB_FLAGS_DFC_S; + } + if (M68K_SR_TRACE(env->sr) == M68K_SR_TRACE_ANY_INS) { + flags |= TB_FLAGS_TRACE; + } + + return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; +} + static void m68k_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data) @@ -54,7 +74,7 @@ static void m68k_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool m68k_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ @@ -586,23 +606,29 @@ static const struct SysemuCPUOps m68k_sysemu_ops = { }; #endif /* !CONFIG_USER_ONLY */ -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps m68k_tcg_ops = { + /* MTTCG not yet supported: require strict ordering */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = false, + .initialize = m68k_tcg_init, .translate_code = m68k_translate_code, + .get_tb_cpu_state = m68k_get_tb_cpu_state, .restore_state_to_opc = m68k_restore_state_to_opc, + .mmu_index = m68k_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = m68k_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = m68k_cpu_exec_interrupt, .cpu_exec_halt = m68k_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = m68k_cpu_do_interrupt, .do_transaction_failed = m68k_cpu_transaction_failed, #endif /* !CONFIG_USER_ONLY */ }; -static void m68k_cpu_class_init(ObjectClass *c, void *data) +static void m68k_cpu_class_init(ObjectClass *c, const void *data) { M68kCPUClass *mcc = M68K_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); @@ -615,7 +641,6 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data) &mcc->parent_phases); cc->class_by_name = m68k_cpu_class_by_name; - cc->mmu_index = m68k_cpu_mmu_index; cc->dump_state = m68k_cpu_dump_state; cc->set_pc = m68k_cpu_set_pc; cc->get_pc = m68k_cpu_get_pc; @@ -630,7 +655,7 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data) cc->tcg_ops = &m68k_tcg_ops; } -static void m68k_cpu_class_init_cf_core(ObjectClass *c, void *data) +static void m68k_cpu_class_init_cf_core(ObjectClass *c, const void *data) { CPUClass *cc = CPU_CLASS(c); @@ -645,7 +670,7 @@ static void m68k_cpu_class_init_cf_core(ObjectClass *c, void *data) .class_init = m68k_cpu_class_init_cf_core \ } -static void m68k_cpu_class_init_m68k_core(ObjectClass *c, void *data) +static void m68k_cpu_class_init_m68k_core(ObjectClass *c, const void *data) { CPUClass *cc = CPU_CLASS(c); diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h index ddb0f29..d9db6a4 100644 --- a/target/m68k/cpu.h +++ b/target/m68k/cpu.h @@ -21,7 +21,9 @@ #ifndef M68K_CPU_H #define M68K_CPU_H +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" #include "cpu-qom.h" @@ -76,8 +78,6 @@ #define M68K_MAX_TTR 2 #define TTR(type, index) ttr[((type & ACCESS_CODE) == ACCESS_CODE) * 2 + index] -#define TARGET_INSN_START_EXTRA_WORDS 1 - typedef CPU_LDoubleU FPReg; typedef struct CPUArchState { @@ -594,8 +594,6 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, MemTxResult response, uintptr_t retaddr); #endif -#include "exec/cpu-all.h" - /* TB flags */ #define TB_FLAGS_MACSR 0x0f #define TB_FLAGS_MSR_S_BIT 13 @@ -607,22 +605,6 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, #define TB_FLAGS_TRACE 16 #define TB_FLAGS_TRACE_BIT (1 << TB_FLAGS_TRACE) -static inline void cpu_get_tb_cpu_state(CPUM68KState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *cs_base = 0; - *flags = (env->macsr >> 4) & TB_FLAGS_MACSR; - if (env->sr & SR_S) { - *flags |= TB_FLAGS_MSR_S; - *flags |= (env->sfc << (TB_FLAGS_SFC_S_BIT - 2)) & TB_FLAGS_SFC_S; - *flags |= (env->dfc << (TB_FLAGS_DFC_S_BIT - 2)) & TB_FLAGS_DFC_S; - } - if (M68K_SR_TRACE(env->sr) == M68K_SR_TRACE_ANY_INS) { - *flags |= TB_FLAGS_TRACE; - } -} - void dump_mmu(CPUM68KState *env); #endif diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c index eb1cb8c..5601286 100644 --- a/target/m68k/fpu_helper.c +++ b/target/m68k/fpu_helper.c @@ -21,8 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "softfloat.h" /* diff --git a/target/m68k/helper.c b/target/m68k/helper.c index 0bf5748..15f110f 100644 --- a/target/m68k/helper.c +++ b/target/m68k/helper.c @@ -21,10 +21,11 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "exec/gdbstub.h" #include "exec/helper-proto.h" +#include "system/memory.h" #include "gdbstub/helpers.h" #include "fpu/softfloat.h" #include "qemu/qemu-print.h" @@ -290,7 +291,6 @@ void HELPER(m68k_movec_to)(CPUM68KState *env, uint32_t reg, uint32_t val) /* Invalid control registers will generate an exception. */ raise_exception_ra(env, EXCP_ILLEGAL, 0); - return; } uint32_t HELPER(m68k_movec_from)(CPUM68KState *env, uint32_t reg) diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c index 15bad5d..f29ae12 100644 --- a/target/m68k/op_helper.c +++ b/target/m68k/op_helper.c @@ -20,8 +20,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "semihosting/semihost.h" #if !defined(CONFIG_USER_ONLY) diff --git a/target/m68k/translate.c b/target/m68k/translate.c index dec2967..97afceb 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "tcg/tcg-op.h" #include "qemu/log.h" #include "qemu/qemu-print.h" diff --git a/target/microblaze/cpu-param.h b/target/microblaze/cpu-param.h index c866ec6..e0a3794 100644 --- a/target/microblaze/cpu-param.h +++ b/target/microblaze/cpu-param.h @@ -27,7 +27,6 @@ /* FIXME: MB uses variable pages down to 1K but linux only uses 4k. */ #define TARGET_PAGE_BITS 12 -/* MicroBlaze is always in-order. */ -#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL +#define TARGET_INSN_START_EXTRA_WORDS 1 #endif diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index f3bebea..22231f0 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -27,11 +27,11 @@ #include "cpu.h" #include "qemu/module.h" #include "hw/qdev-properties.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/gdbstub.h" #include "exec/translation-block.h" #include "fpu/softfloat-helpers.h" +#include "accel/tcg/cpu-ops.h" #include "tcg/tcg.h" static const struct { @@ -95,6 +95,17 @@ static vaddr mb_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState mb_get_tb_cpu_state(CPUState *cs) +{ + CPUMBState *env = cpu_env(cs); + + return (TCGTBCPUState){ + .pc = env->pc, + .flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK), + .cs_base = (env->iflags & IMM_FLAG ? env->imm : 0), + }; +} + static void mb_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -118,7 +129,7 @@ static void mb_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool mb_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } #endif /* !CONFIG_USER_ONLY */ @@ -252,6 +263,11 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) return; } + gdb_register_coprocessor(cs, mb_cpu_gdb_read_stack_protect, + mb_cpu_gdb_write_stack_protect, + gdb_find_static_feature("microblaze-stack-protect.xml"), + 0); + qemu_init_vcpu(cs); version = cpu->cfg.version ? cpu->cfg.version : DEFAULT_CPU_VERSION; @@ -324,20 +340,13 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) static void mb_cpu_initfn(Object *obj) { - MicroBlazeCPU *cpu = MICROBLAZE_CPU(obj); - - gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect, - mb_cpu_gdb_write_stack_protect, - gdb_find_static_feature("microblaze-stack-protect.xml"), - 0); - #ifndef CONFIG_USER_ONLY /* Inbound IRQ and FIR lines */ - qdev_init_gpio_in(DEVICE(cpu), microblaze_cpu_set_irq, 2); - qdev_init_gpio_in_named(DEVICE(cpu), mb_cpu_ns_axi_dp, "ns_axi_dp", 1); - qdev_init_gpio_in_named(DEVICE(cpu), mb_cpu_ns_axi_ip, "ns_axi_ip", 1); - qdev_init_gpio_in_named(DEVICE(cpu), mb_cpu_ns_axi_dc, "ns_axi_dc", 1); - qdev_init_gpio_in_named(DEVICE(cpu), mb_cpu_ns_axi_ic, "ns_axi_ic", 1); + qdev_init_gpio_in(DEVICE(obj), microblaze_cpu_set_irq, 2); + qdev_init_gpio_in_named(DEVICE(obj), mb_cpu_ns_axi_dp, "ns_axi_dp", 1); + qdev_init_gpio_in_named(DEVICE(obj), mb_cpu_ns_axi_ip, "ns_axi_ip", 1); + qdev_init_gpio_in_named(DEVICE(obj), mb_cpu_ns_axi_dc, "ns_axi_dc", 1); + qdev_init_gpio_in_named(DEVICE(obj), mb_cpu_ns_axi_ic, "ns_axi_ic", 1); #endif /* Restricted 'endianness' property is equivalent of 'little-endian' */ @@ -424,25 +433,31 @@ static const struct SysemuCPUOps mb_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps mb_tcg_ops = { + /* MicroBlaze is always in-order. */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = true, + .initialize = mb_tcg_init, .translate_code = mb_translate_code, + .get_tb_cpu_state = mb_get_tb_cpu_state, .synchronize_from_tb = mb_cpu_synchronize_from_tb, .restore_state_to_opc = mb_restore_state_to_opc, + .mmu_index = mb_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = mb_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = mb_cpu_exec_interrupt, .cpu_exec_halt = mb_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = mb_cpu_do_interrupt, .do_transaction_failed = mb_cpu_transaction_failed, .do_unaligned_access = mb_cpu_do_unaligned_access, #endif /* !CONFIG_USER_ONLY */ }; -static void mb_cpu_class_init(ObjectClass *oc, void *data) +static void mb_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -455,7 +470,6 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data) &mcc->parent_phases); cc->class_by_name = mb_cpu_class_by_name; - cc->mmu_index = mb_cpu_mmu_index; cc->dump_state = mb_cpu_dump_state; cc->set_pc = mb_cpu_set_pc; cc->get_pc = mb_cpu_get_pc; diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index e44ddd5..3ce28b3 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -21,8 +21,10 @@ #define MICROBLAZE_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" +#include "exec/cpu-interrupt.h" typedef struct CPUArchState CPUMBState; #if !defined(CONFIG_USER_ONLY) @@ -231,8 +233,6 @@ typedef struct CPUArchState CPUMBState; #define STREAM_CONTROL (1 << 3) #define STREAM_NONBLOCK (1 << 4) -#define TARGET_INSN_START_EXTRA_WORDS 1 - /* use-non-secure property masks */ #define USE_NON_SECURE_M_AXI_DP_MASK 0x1 #define USE_NON_SECURE_M_AXI_IP_MASK 0x2 @@ -248,7 +248,7 @@ struct CPUArchState { uint32_t pc; uint32_t msr; /* All bits of MSR except MSR[C] and MSR[CC] */ uint32_t msr_c; /* MSR[C], in low bit; other bits must be 0 */ - target_ulong ear; + uint64_t ear; uint32_t esr; uint32_t fsr; uint32_t btr; @@ -409,8 +409,6 @@ void mb_translate_code(CPUState *cs, TranslationBlock *tb, #define MMU_USER_IDX 2 /* See NB_MMU_MODES in cpu-defs.h. */ -#include "exec/cpu-all.h" - /* Ensure there is no overlap between the two masks. */ QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK); @@ -421,14 +419,6 @@ static inline bool mb_cpu_is_big_endian(CPUState *cs) return !cpu->cfg.endi; } -static inline void cpu_get_tb_cpu_state(CPUMBState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK); - *cs_base = (*flags & IMM_FLAG ? env->imm : 0); -} - #if !defined(CONFIG_USER_ONLY) bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index 27fc929..ef0e2f9 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -21,11 +21,56 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/cputlb.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "qemu/host-utils.h" #include "exec/log.h" +#include "exec/helper-proto.h" + + +G_NORETURN +static void mb_unaligned_access_internal(CPUState *cs, uint64_t addr, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + uint32_t esr, iflags; + + /* Recover the pc and iflags from the corresponding insn_start. */ + cpu_restore_state(cs, retaddr); + iflags = env->iflags; + + qemu_log_mask(CPU_LOG_INT, + "Unaligned access addr=0x%" PRIx64 " pc=%x iflags=%x\n", + addr, env->pc, iflags); + + esr = ESR_EC_UNALIGNED_DATA; + if (likely(iflags & ESR_ESS_FLAG)) { + esr |= iflags & ESR_ESS_MASK; + } else { + qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); + } + + env->ear = addr; + env->esr = esr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit(cs); +} + +void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + mb_unaligned_access_internal(cs, addr, retaddr); +} #ifndef CONFIG_USER_ONLY + +void HELPER(unaligned_access)(CPUMBState *env, uint64_t addr) +{ + mb_unaligned_access_internal(env_cpu(env), addr, GETPC()); +} + static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu, MMUAccessType access_type) { @@ -267,31 +312,3 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } #endif /* !CONFIG_USER_ONLY */ - -void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) -{ - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - uint32_t esr, iflags; - - /* Recover the pc and iflags from the corresponding insn_start. */ - cpu_restore_state(cs, retaddr); - iflags = cpu->env.iflags; - - qemu_log_mask(CPU_LOG_INT, - "Unaligned access addr=" TARGET_FMT_lx " pc=%x iflags=%x\n", - (target_ulong)addr, cpu->env.pc, iflags); - - esr = ESR_EC_UNALIGNED_DATA; - if (likely(iflags & ESR_ESS_FLAG)) { - esr |= iflags & ESR_ESS_MASK; - } else { - qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); - } - - cpu->env.ear = addr; - cpu->env.esr = esr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit(cs); -} diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h index f740835..ef4fad9 100644 --- a/target/microblaze/helper.h +++ b/target/microblaze/helper.h @@ -20,12 +20,22 @@ DEF_HELPER_FLAGS_3(fcmp_ne, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_3(fcmp_ge, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32) -#if !defined(CONFIG_USER_ONLY) -DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) -#endif - DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, tl) - DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32) DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32) + +#ifndef CONFIG_USER_ONLY +DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(unaligned_access, TCG_CALL_NO_WG, noreturn, env, i64) +DEF_HELPER_FLAGS_2(lbuea, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_3(sbea, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_le, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_le, TCG_CALL_NO_WG, void, env, i32, i64) +#endif diff --git a/target/microblaze/machine.c b/target/microblaze/machine.c index 51705e4..a4cf38d 100644 --- a/target/microblaze/machine.c +++ b/target/microblaze/machine.c @@ -93,7 +93,7 @@ static const VMStateDescription vmstate_env = { }; static const VMStateField vmstate_cpu_fields[] = { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, MicroBlazeCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, MicroBlazeCPU, 1, vmstate_env, CPUMBState), VMSTATE_END_OF_LIST() }; diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index f8587d5..8703ff5 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -22,7 +22,9 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/cputlb.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/page-protection.h" +#include "exec/target_page.h" static unsigned int tlb_decode_size(unsigned int f) { @@ -170,7 +172,8 @@ unsigned int mmu_translate(MicroBlazeCPU *cpu, MicroBlazeMMULookup *lu, } done: qemu_log_mask(CPU_LOG_MMU, - "MMU vaddr=%" PRIx64 " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", + "MMU vaddr=0x" TARGET_FMT_lx + " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", vaddr, rw, tlb_wr, tlb_ex, hit); return hit; } diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c index f637803..b8365b3 100644 --- a/target/microblaze/op_helper.c +++ b/target/microblaze/op_helper.c @@ -23,8 +23,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "fpu/softfloat.h" void helper_put(uint32_t id, uint32_t ctrl, uint32_t data) @@ -383,6 +382,8 @@ void helper_stackprot(CPUMBState *env, target_ulong addr) } #if !defined(CONFIG_USER_ONLY) +#include "system/memory.h" + /* Writes/reads to the MMU's special regs end up here. */ uint32_t helper_mmu_read(CPUMBState *env, uint32_t ext, uint32_t rn) { @@ -394,38 +395,90 @@ void helper_mmu_write(CPUMBState *env, uint32_t ext, uint32_t rn, uint32_t v) mmu_write(env, ext, rn, v); } +static void mb_transaction_failed_internal(CPUState *cs, hwaddr physaddr, + uint64_t addr, unsigned size, + MMUAccessType access_type, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + MicroBlazeCPU *cpu = env_archcpu(env); + const char *access_name = "INVALID"; + bool take = env->msr & MSR_EE; + uint32_t esr = ESR_EC_DATA_BUS; + + switch (access_type) { + case MMU_INST_FETCH: + access_name = "INST_FETCH"; + esr = ESR_EC_INSN_BUS; + take &= cpu->cfg.iopb_bus_exception; + break; + case MMU_DATA_LOAD: + access_name = "DATA_LOAD"; + take &= cpu->cfg.dopb_bus_exception; + break; + case MMU_DATA_STORE: + access_name = "DATA_STORE"; + take &= cpu->cfg.dopb_bus_exception; + break; + } + + qemu_log_mask(CPU_LOG_INT, "Transaction failed: addr 0x%" PRIx64 + "physaddr 0x" HWADDR_FMT_plx " size %d access-type %s (%s)\n", + addr, physaddr, size, access_name, + take ? "TAKEN" : "DROPPED"); + + if (take) { + env->esr = esr; + env->ear = addr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, retaddr); + } +} + void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr) { - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - CPUMBState *env = &cpu->env; + mb_transaction_failed_internal(cs, physaddr, addr, size, + access_type, retaddr); +} - qemu_log_mask(CPU_LOG_INT, "Transaction failed: vaddr 0x%" VADDR_PRIx - " physaddr 0x" HWADDR_FMT_plx " size %d access type %s\n", - addr, physaddr, size, - access_type == MMU_INST_FETCH ? "INST_FETCH" : - (access_type == MMU_DATA_LOAD ? "DATA_LOAD" : "DATA_STORE")); +#define LD_EA(NAME, TYPE, FUNC) \ +uint32_t HELPER(NAME)(CPUMBState *env, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + TYPE ret = FUNC(cs->as, ea, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_LOAD, GETPC()); \ + } \ + return ret; \ +} - if (!(env->msr & MSR_EE)) { - return; - } +LD_EA(lbuea, uint8_t, address_space_ldub) +LD_EA(lhuea_be, uint16_t, address_space_lduw_be) +LD_EA(lhuea_le, uint16_t, address_space_lduw_le) +LD_EA(lwea_be, uint32_t, address_space_ldl_be) +LD_EA(lwea_le, uint32_t, address_space_ldl_le) + +#define ST_EA(NAME, TYPE, FUNC) \ +void HELPER(NAME)(CPUMBState *env, uint32_t data, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + FUNC(cs->as, ea, data, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_STORE, GETPC()); \ + } \ +} - if (access_type == MMU_INST_FETCH) { - if (!cpu->cfg.iopb_bus_exception) { - return; - } - env->esr = ESR_EC_INSN_BUS; - } else { - if (!cpu->cfg.dopb_bus_exception) { - return; - } - env->esr = ESR_EC_DATA_BUS; - } +ST_EA(sbea, uint8_t, address_space_stb) +ST_EA(shea_be, uint16_t, address_space_stw_be) +ST_EA(shea_le, uint16_t, address_space_stw_le) +ST_EA(swea_be, uint32_t, address_space_stl_be) +ST_EA(swea_le, uint32_t, address_space_stl_le) - env->ear = addr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit_restore(cs, retaddr); -} #endif diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index b54e5ac..5098a1d 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -20,13 +20,13 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/translator.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "qemu/qemu-print.h" #include "exec/log.h" @@ -63,9 +63,6 @@ typedef struct DisasContext { DisasContextBase base; const MicroBlazeCPUConfig *cfg; - TCGv_i32 r0; - bool r0_set; - /* Decoder. */ uint32_t ext_imm; unsigned int tb_flags; @@ -179,14 +176,7 @@ static TCGv_i32 reg_for_read(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (!dc->r0_set) { - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - tcg_gen_movi_i32(dc->r0, 0); - dc->r0_set = true; - } - return dc->r0; + return tcg_constant_i32(0); } static TCGv_i32 reg_for_write(DisasContext *dc, int reg) @@ -194,10 +184,7 @@ static TCGv_i32 reg_for_write(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - return dc->r0; + return tcg_temp_new_i32(); } static bool do_typea(DisasContext *dc, arg_typea *arg, bool side_effects, @@ -310,11 +297,7 @@ static void gen_add(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_addc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 tmp = tcg_temp_new_i32(); - - tcg_gen_add2_i32(tmp, cpu_msr_c, ina, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, ina, inb, cpu_msr_c); } /* Input carry, but no output carry. */ @@ -543,12 +526,10 @@ static void gen_rsub(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_rsubc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_not_i32(tmp, ina); - tcg_gen_add2_i32(tmp, cpu_msr_c, tmp, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, tmp, inb, cpu_msr_c); } /* No input or output carry. */ @@ -625,19 +606,18 @@ DO_TYPEBI(xori, false, tcg_gen_xori_i32) static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) { - TCGv ret = tcg_temp_new(); + TCGv ret; /* If any of the regs is r0, set t to the value of the other reg. */ if (ra && rb) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_add_i32(tmp, cpu_R[ra], cpu_R[rb]); - tcg_gen_extu_i32_tl(ret, tmp); + ret = tcg_temp_new_i32(); + tcg_gen_add_i32(ret, cpu_R[ra], cpu_R[rb]); } else if (ra) { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); + ret = cpu_R[ra]; } else if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + ret = cpu_R[rb]; } else { - tcg_gen_movi_tl(ret, 0); + ret = tcg_constant_i32(0); } if ((ra == 1 || rb == 1) && dc->cfg->stackprot) { @@ -648,15 +628,16 @@ static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) { - TCGv ret = tcg_temp_new(); + TCGv ret; /* If any of the regs is r0, set t to the value of the other reg. */ - if (ra) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_addi_i32(tmp, cpu_R[ra], imm); - tcg_gen_extu_i32_tl(ret, tmp); + if (ra && imm) { + ret = tcg_temp_new_i32(); + tcg_gen_addi_i32(ret, cpu_R[ra], imm); + } else if (ra) { + ret = cpu_R[ra]; } else { - tcg_gen_movi_tl(ret, (uint32_t)imm); + ret = tcg_constant_i32(imm); } if (ra == 1 && dc->cfg->stackprot) { @@ -666,23 +647,23 @@ static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) } #ifndef CONFIG_USER_ONLY -static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) +static TCGv_i64 compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) { int addr_size = dc->cfg->addr_size; - TCGv ret = tcg_temp_new(); + TCGv_i64 ret = tcg_temp_new_i64(); if (addr_size == 32 || ra == 0) { if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + tcg_gen_extu_i32_i64(ret, cpu_R[rb]); } else { - tcg_gen_movi_tl(ret, 0); + return tcg_constant_i64(0); } } else { if (rb) { tcg_gen_concat_i32_i64(ret, cpu_R[rb], cpu_R[ra]); } else { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); - tcg_gen_shli_tl(ret, ret, 32); + tcg_gen_extu_i32_i64(ret, cpu_R[ra]); + tcg_gen_shli_i64(ret, ret, 32); } if (addr_size < 64) { /* Mask off out of range bits. */ @@ -706,6 +687,20 @@ static void record_unaligned_ess(DisasContext *dc, int rd, tcg_set_insn_start_param(dc->base.insn_start, 1, iflags); } + +static void gen_alignment_check_ea(DisasContext *dc, TCGv_i64 ea, int rb, + int rd, MemOp size, bool store) +{ + if (rb && (dc->tb_flags & MSR_EE) && dc->cfg->unaligned_exceptions) { + TCGLabel *over = gen_new_label(); + + record_unaligned_ess(dc, rd, size, store); + + tcg_gen_brcondi_i64(TCG_COND_TSTEQ, ea, (1 << size) - 1, over); + gen_helper_unaligned_access(tcg_env, ea); + gen_set_label(over); + } +} #endif static inline MemOp mo_endian(DisasContext *dc) @@ -771,10 +766,11 @@ static bool trans_lbuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_lbuea(reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -802,10 +798,13 @@ static bool trans_lhuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, false); + (mo_endian(dc) == MO_BE ? gen_helper_lhuea_be : gen_helper_lhuea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -833,10 +832,13 @@ static bool trans_lwea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, false); + (mo_endian(dc) == MO_BE ? gen_helper_lwea_be : gen_helper_lwea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -924,10 +926,11 @@ static bool trans_sbea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_sbea(tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -955,10 +958,13 @@ static bool trans_shea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, true); + (mo_endian(dc) == MO_BE ? gen_helper_shea_be : gen_helper_shea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -986,10 +992,13 @@ static bool trans_swea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, true); + (mo_endian(dc) == MO_BE ? gen_helper_swea_be : gen_helper_swea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -1613,8 +1622,6 @@ static void mb_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs) dc->cfg = &cpu->cfg; dc->tb_flags = dc->base.tb->flags; dc->ext_imm = dc->base.tb->cs_base; - dc->r0 = NULL; - dc->r0_set = false; dc->mem_index = cpu_mmu_index(cs, false); dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER; dc->jmp_dest = -1; @@ -1653,11 +1660,6 @@ static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs) trap_illegal(dc, true); } - if (dc->r0) { - dc->r0 = NULL; - dc->r0_set = false; - } - /* Discard the imm global when its contents cannot be used. */ if ((dc->tb_flags & ~dc->tb_flags_to_set) & IMM_FLAG) { tcg_gen_discard_i32(cpu_imm); @@ -1835,7 +1837,7 @@ void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags) } qemu_fprintf(f, "\nesr=0x%04x fsr=0x%02x btr=0x%08x edr=0x%x\n" - "ear=0x" TARGET_FMT_lx " slr=0x%x shr=0x%x\n", + "ear=0x%" PRIx64 " slr=0x%x shr=0x%x\n", env->esr, env->fsr, env->btr, env->edr, env->ear, env->slr, env->shr); diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc index 922fc39..d93b9d3 100644 --- a/target/mips/cpu-defs.c.inc +++ b/target/mips/cpu-defs.c.inc @@ -756,8 +756,9 @@ const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) | + (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | + (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, @@ -796,8 +797,9 @@ const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) | + (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | + (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, diff --git a/target/mips/cpu-param.h b/target/mips/cpu-param.h index 8fcb1b4..58f4508 100644 --- a/target/mips/cpu-param.h +++ b/target/mips/cpu-param.h @@ -20,6 +20,6 @@ #endif #define TARGET_PAGE_BITS 12 -#define TCG_GUEST_DEFAULT_MO (0) +#define TARGET_INSN_START_EXTRA_WORDS 2 #endif diff --git a/target/mips/cpu.c b/target/mips/cpu.c index b207106..5989c3b 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -29,11 +29,12 @@ #include "qemu/module.h" #include "system/kvm.h" #include "system/qtest.h" -#include "exec/exec-all.h" #include "hw/qdev-properties.h" #include "hw/qdev-clock.h" -#include "semihosting/semihost.h" #include "fpu_helper.h" +#ifndef CONFIG_USER_ONLY +#include "semihosting/semihost.h" +#endif const char regnames[32][3] = { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3", @@ -144,7 +145,7 @@ static bool mips_cpu_has_work(CPUState *cs) * check for interrupts that can be taken. For pre-release 6 CPUs, * check for CP0 Config7 'Wait IE ignore' bit. */ - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_mips_hw_interrupts_pending(env)) { if (cpu_mips_hw_interrupts_enabled(env) || (env->CP0_Config7 & (1 << CP0C7_WII)) || @@ -159,7 +160,7 @@ static bool mips_cpu_has_work(CPUState *cs) * The QEMU model will issue an _WAKE request whenever the CPUs * should be woken up. */ - if (cs->interrupt_request & CPU_INTERRUPT_WAKE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_WAKE)) { has_work = true; } @@ -169,7 +170,7 @@ static bool mips_cpu_has_work(CPUState *cs) } /* MIPS Release 6 has the ability to halt the CPU. */ if (env->CP0_Config5 & (1 << CP0C5_VP)) { - if (cs->interrupt_request & CPU_INTERRUPT_WAKE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_WAKE)) { has_work = true; } if (!mips_vp_active(env)) { @@ -180,11 +181,6 @@ static bool mips_cpu_has_work(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -static int mips_cpu_mmu_index(CPUState *cs, bool ifunc) -{ - return mips_env_mmu_index(cpu_env(cs)); -} - #include "cpu-defs.c.inc" static void mips_cpu_reset_hold(Object *obj, ResetType type) @@ -415,12 +411,11 @@ static void mips_cpu_reset_hold(Object *obj, ResetType type) restore_pamask(env); cs->exception_index = EXCP_NONE; +#ifndef CONFIG_USER_ONLY if (semihosting_get_argc()) { /* UHI interface can be used to obtain argc and argv */ env->active_tc.gpr[4] = -1; } - -#ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_mips_reset_vcpu(cpu); } @@ -548,16 +543,48 @@ static const Property mips_cpu_properties[] = { #ifdef CONFIG_TCG #include "accel/tcg/cpu-ops.h" + +static int mips_cpu_mmu_index(CPUState *cs, bool ifunc) +{ + return mips_env_mmu_index(cpu_env(cs)); +} + +static TCGTBCPUState mips_get_tb_cpu_state(CPUState *cs) +{ + CPUMIPSState *env = cpu_env(cs); + + return (TCGTBCPUState){ + .pc = env->active_tc.PC, + .flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK | + MIPS_HFLAG_HWRENA_ULR), + }; +} + +#ifndef CONFIG_USER_ONLY +static vaddr mips_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & MIPS_HFLAG_AWRAP ? (int32_t)result : result; +} +#endif + static const TCGCPUOps mips_tcg_ops = { + .mttcg_supported = TARGET_LONG_BITS == 32, + .guest_default_memory_order = 0, + .initialize = mips_tcg_init, .translate_code = mips_translate_code, + .get_tb_cpu_state = mips_get_tb_cpu_state, .synchronize_from_tb = mips_cpu_synchronize_from_tb, .restore_state_to_opc = mips_restore_state_to_opc, + .mmu_index = mips_cpu_mmu_index, #if !defined(CONFIG_USER_ONLY) .tlb_fill = mips_cpu_tlb_fill, + .pointer_wrap = mips_pointer_wrap, .cpu_exec_interrupt = mips_cpu_exec_interrupt, .cpu_exec_halt = mips_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = mips_cpu_do_interrupt, .do_transaction_failed = mips_cpu_do_transaction_failed, .do_unaligned_access = mips_cpu_do_unaligned_access, @@ -566,7 +593,7 @@ static const TCGCPUOps mips_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void mips_cpu_class_init(ObjectClass *c, void *data) +static void mips_cpu_class_init(ObjectClass *c, const void *data) { MIPSCPUClass *mcc = MIPS_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); @@ -580,7 +607,6 @@ static void mips_cpu_class_init(ObjectClass *c, void *data) &mcc->parent_phases); cc->class_by_name = mips_cpu_class_by_name; - cc->mmu_index = mips_cpu_mmu_index; cc->dump_state = mips_cpu_dump_state; cc->set_pc = mips_cpu_set_pc; cc->get_pc = mips_cpu_get_pc; @@ -608,7 +634,7 @@ static const TypeInfo mips_cpu_type_info = { .class_init = mips_cpu_class_init, }; -static void mips_cpu_cpudef_class_init(ObjectClass *oc, void *data) +static void mips_cpu_cpudef_class_init(ObjectClass *oc, const void *data) { MIPSCPUClass *mcc = MIPS_CPU_CLASS(oc); mcc->cpu_def = data; @@ -621,7 +647,7 @@ static void mips_register_cpudef_type(const struct mips_def_t *def) .name = typename, .parent = TYPE_MIPS_CPU, .class_init = mips_cpu_cpudef_class_init, - .class_data = (void *)def, + .class_data = def, }; type_register_static(&ti); diff --git a/target/mips/cpu.h b/target/mips/cpu.h index f6877ec..5cd4c6c 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -2,9 +2,11 @@ #define MIPS_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #ifndef CONFIG_USER_ONLY -#include "exec/memory.h" +#include "system/memory.h" #endif #include "fpu/softfloat-types.h" #include "hw/clock.h" @@ -98,8 +100,6 @@ struct CPUMIPSFPUContext { #define FP_UNIMPLEMENTED 32 }; -#define TARGET_INSN_START_EXTRA_WORDS 2 - typedef struct CPUMIPSMVPContext CPUMIPSMVPContext; struct CPUMIPSMVPContext { int32_t CP0_MVPControl; @@ -1256,8 +1256,6 @@ static inline int mips_env_mmu_index(CPUMIPSState *env) return hflags_mmu_index(env->hflags); } -#include "exec/cpu-all.h" - /* Exceptions */ enum { EXCP_NONE = -1, @@ -1368,15 +1366,6 @@ void cpu_mips_clock_init(MIPSCPU *cpu); /* helper.c */ target_ulong exception_resume_pc(CPUMIPSState *env); -static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->active_tc.PC; - *cs_base = 0; - *flags = env->hflags & (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK | - MIPS_HFLAG_HWRENA_ULR); -} - /** * mips_cpu_create_with_clock: * @typename: a MIPS CPU type. diff --git a/target/mips/helper.h b/target/mips/helper.h index 7e40041..b6cd53c 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl) #endif +DEF_HELPER_3(crc32, tl, tl, tl, i32) +DEF_HELPER_3(crc32c, tl, tl, tl, i32) DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32) /* microMIPS functions */ diff --git a/target/mips/kvm.c b/target/mips/kvm.c index d67b7c1..912cd5d 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { CPUMIPSState *env = cpu_env(cs); @@ -139,7 +144,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) bql_lock(); - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_mips_io_interrupts_pending(cpu)) { intr.cpu = -1; intr.irq = 2; @@ -585,7 +590,7 @@ static void kvm_mips_update_state(void *opaque, bool running, RunState state) } } -static int kvm_mips_put_fpu_registers(CPUState *cs, int level) +static int kvm_mips_put_fpu_registers(CPUState *cs, KvmPutState level) { CPUMIPSState *env = cpu_env(cs); int err, ret = 0; @@ -744,7 +749,7 @@ static int kvm_mips_get_fpu_registers(CPUState *cs) } -static int kvm_mips_put_cp0_registers(CPUState *cs, int level) +static int kvm_mips_put_cp0_registers(CPUState *cs, KvmPutState level) { CPUMIPSState *env = cpu_env(cs); int err, ret = 0; @@ -1172,7 +1177,7 @@ static int kvm_mips_get_cp0_registers(CPUState *cs) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { CPUMIPSState *env = cpu_env(cs); struct kvm_regs regs; diff --git a/target/mips/meson.build b/target/mips/meson.build index 247979a..abf0ce3 100644 --- a/target/mips/meson.build +++ b/target/mips/meson.build @@ -7,6 +7,7 @@ mips_ss.add(files( 'gdbstub.c', 'msa.c', )) +mips_ss.add(zlib) if have_system subdir('system') diff --git a/target/mips/system/mips-qmp-cmds.c b/target/mips/system/mips-qmp-cmds.c index 7340ac7..b6a2874 100644 --- a/target/mips/system/mips-qmp-cmds.c +++ b/target/mips/system/mips-qmp-cmds.c @@ -7,9 +7,20 @@ */ #include "qemu/osdep.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qemu/target-info.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void mips_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -30,7 +41,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_MIPS_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, mips_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/mips/system/physaddr.c b/target/mips/system/physaddr.c index 505781d..b8e1a5a 100644 --- a/target/mips/system/physaddr.c +++ b/target/mips/system/physaddr.c @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "../internal.h" diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c index 1a8902e..d32bceb 100644 --- a/target/mips/tcg/exception.c +++ b/target/mips/tcg/exception.c @@ -23,7 +23,6 @@ #include "cpu.h" #include "internal.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" target_ulong exception_resume_pc(CPUMIPSState *env) diff --git a/target/mips/tcg/fpu_helper.c b/target/mips/tcg/fpu_helper.c index 45d593d..36af980 100644 --- a/target/mips/tcg/fpu_helper.c +++ b/target/mips/tcg/fpu_helper.c @@ -24,7 +24,6 @@ #include "cpu.h" #include "internal.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "fpu/softfloat.h" #include "fpu_helper.h" diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c index f92a923..10319bf 100644 --- a/target/mips/tcg/ldst_helper.c +++ b/target/mips/tcg/ldst_helper.c @@ -23,8 +23,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/memop.h" #include "internal.h" diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc index c479bec..8fda7c8 100644 --- a/target/mips/tcg/micromips_translate.c.inc +++ b/target/mips/tcg/micromips_translate.c.inc @@ -1795,7 +1795,7 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) return; case LSA: check_insn(ctx, ISA_MIPS_R6); - gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2)); + gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) + 1); break; case ALIGN: check_insn(ctx, ISA_MIPS_R6); diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc index a9af8f1..97da345 100644 --- a/target/mips/tcg/mips16e_translate.c.inc +++ b/target/mips/tcg/mips16e_translate.c.inc @@ -306,7 +306,7 @@ static void gen_mips16_restore(DisasContext *ctx, int astatic; TCGv t0 = tcg_temp_new(); - gen_op_addr_addi(ctx, t0, cpu_gpr[29], -framesize); + gen_op_addr_addi(ctx, t0, cpu_gpr[29], framesize); if (do_ra) { decr_and_load(ctx, 31, t0); @@ -386,7 +386,7 @@ static void gen_mips16_restore(DisasContext *ctx, } } - gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], -framesize); + gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], framesize); } #if defined(TARGET_MIPS64) diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c index 74fb80c..f554b3d 100644 --- a/target/mips/tcg/msa_helper.c +++ b/target/mips/tcg/msa_helper.c @@ -21,10 +21,11 @@ #include "cpu.h" #include "internal.h" #include "tcg/tcg.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/helper-proto.h" #include "exec/memop.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "fpu_helper.h" @@ -8211,7 +8212,6 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd, /* Element-by-element access macros */ #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df)) -#if TARGET_BIG_ENDIAN static inline uint64_t bswap16x4(uint64_t x) { uint64_t m = 0x00ff00ff00ff00ffull; @@ -8222,7 +8222,6 @@ static inline uint64_t bswap32x2(uint64_t x) { return ror64(bswap64(x), 32); } -#endif void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd, target_ulong addr) @@ -8251,10 +8250,10 @@ void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd, */ d0 = cpu_ldq_le_data_ra(env, addr + 0, ra); d1 = cpu_ldq_le_data_ra(env, addr + 8, ra); -#if TARGET_BIG_ENDIAN - d0 = bswap16x4(d0); - d1 = bswap16x4(d1); -#endif + if (mips_env_is_bigendian(env)) { + d0 = bswap16x4(d0); + d1 = bswap16x4(d1); + } pwd->d[0] = d0; pwd->d[1] = d1; } @@ -8272,10 +8271,10 @@ void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd, */ d0 = cpu_ldq_le_data_ra(env, addr + 0, ra); d1 = cpu_ldq_le_data_ra(env, addr + 8, ra); -#if TARGET_BIG_ENDIAN - d0 = bswap32x2(d0); - d1 = bswap32x2(d1); -#endif + if (mips_env_is_bigendian(env)) { + d0 = bswap32x2(d0); + d1 = bswap32x2(d1); + } pwd->d[0] = d0; pwd->d[1] = d1; } @@ -8338,10 +8337,10 @@ void helper_msa_st_h(CPUMIPSState *env, uint32_t wd, /* Store 8 bytes at a time. See helper_msa_ld_h. */ d0 = pwd->d[0]; d1 = pwd->d[1]; -#if TARGET_BIG_ENDIAN - d0 = bswap16x4(d0); - d1 = bswap16x4(d1); -#endif + if (mips_env_is_bigendian(env)) { + d0 = bswap16x4(d0); + d1 = bswap16x4(d1); + } cpu_stq_le_data_ra(env, addr + 0, d0, ra); cpu_stq_le_data_ra(env, addr + 8, d1, ra); } @@ -8359,10 +8358,10 @@ void helper_msa_st_w(CPUMIPSState *env, uint32_t wd, /* Store 8 bytes at a time. See helper_msa_ld_w. */ d0 = pwd->d[0]; d1 = pwd->d[1]; -#if TARGET_BIG_ENDIAN - d0 = bswap32x2(d0); - d1 = bswap32x2(d1); -#endif + if (mips_env_is_bigendian(env)) { + d0 = bswap32x2(d0); + d1 = bswap32x2(d1); + } cpu_stq_le_data_ra(env, addr + 0, d0, ra); cpu_stq_le_data_ra(env, addr + 8, d1, ra); } diff --git a/target/mips/tcg/msa_translate.c b/target/mips/tcg/msa_translate.c index 75cf80a..82b1499 100644 --- a/target/mips/tcg/msa_translate.c +++ b/target/mips/tcg/msa_translate.c @@ -780,7 +780,7 @@ TRANS_DF_iv(ST, trans_msa_ldst, gen_helper_msa_st); static bool trans_LSA(DisasContext *ctx, arg_r *a) { - return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } static bool trans_DLSA(DisasContext *ctx, arg_r *a) @@ -788,5 +788,5 @@ static bool trans_DLSA(DisasContext *ctx, arg_r *a) if (TARGET_LONG_BITS != 64) { return false; } - return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index 1e27414..9d4e0be 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -3626,12 +3626,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) gen_p_lsx(ctx, rd, rs, rt); break; case NM_LSA: - /* - * In nanoMIPS, the shift field directly encodes the shift - * amount, meaning that the supported shift values are in - * the range 0 to 3 (instead of 1 to 4 in MIPSR6). - */ - gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) - 1); + gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2)); break; case NM_EXTW: gen_ext(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 6, 5)); diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 0c787cb..102a058 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -1,6 +1,7 @@ # Octeon Architecture Module instruction set # # Copyright (C) 2022 Pavel Dovgalyuk +# Copyright (C) 2024 Philippe Mathieu-Daudé # # SPDX-License-Identifier: LGPL-2.1-or-later # @@ -39,3 +40,10 @@ CINS 011100 ..... ..... ..... ..... 11001 . @bitfield POP 011100 rs:5 00000 rd:5 00000 10110 dw:1 SEQNE 011100 rs:5 rt:5 rd:5 00000 10101 ne:1 SEQNEI 011100 rs:5 rt:5 imm:s10 10111 ne:1 + +&lx base index rd +@lx ...... base:5 index:5 rd:5 ...... ..... &lx +LWX 011111 ..... ..... ..... 00000 001010 @lx +LHX 011111 ..... ..... ..... 00100 001010 @lx +LBUX 011111 ..... ..... ..... 00110 001010 @lx +LDX 011111 ..... ..... ..... 01000 001010 @lx diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index d9eb437..b2eca29 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -174,3 +174,15 @@ static bool trans_SEQNEI(DisasContext *ctx, arg_SEQNEI *a) } return true; } + +static bool trans_lx(DisasContext *ctx, arg_lx *a, MemOp mop) +{ + gen_lx(ctx, a->rd, a->base, a->index, mop); + + return true; +} + +TRANS(LBUX, trans_lx, MO_UB); +TRANS(LHX, trans_lx, MO_SW); +TRANS(LWX, trans_lx, MO_SL); +TRANS(LDX, trans_lx, MO_UQ); diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c index 65403f1..4502ae2 100644 --- a/target/mips/tcg/op_helper.c +++ b/target/mips/tcg/op_helper.c @@ -22,9 +22,10 @@ #include "cpu.h" #include "internal.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "exec/memop.h" #include "fpu_helper.h" +#include "qemu/crc32c.h" +#include <zlib.h> static inline target_ulong bitswap(target_ulong v) { @@ -143,6 +144,30 @@ target_ulong helper_rotx(target_ulong rs, uint32_t shift, uint32_t shiftx, return (int64_t)(int32_t)(uint32_t)tmp5; } +/* these crc32 functions are based on target/loongarch/tcg/op_helper.c */ +target_ulong helper_crc32(target_ulong val, target_ulong m, uint32_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? + (target_ulong) -1ULL : + ((1ULL << (sz * 8)) - 1); + + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff); +} + +target_ulong helper_crc32c(target_ulong val, target_ulong m, uint32_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? + (target_ulong) -1ULL : + ((1ULL << (sz * 8)) - 1); + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff); +} + void helper_fork(target_ulong arg1, target_ulong arg2) { /* diff --git a/target/mips/tcg/rel6.decode b/target/mips/tcg/rel6.decode index d6989cf..7fbcb10 100644 --- a/target/mips/tcg/rel6.decode +++ b/target/mips/tcg/rel6.decode @@ -16,11 +16,16 @@ &r rs rt rd sa +&special3_crc rs rt c sz + @lsa ...... rs:5 rt:5 rd:5 ... sa:2 ...... &r +@crc32 ...... rs:5 rt:5 ..... c:3 sz:2 ...... &special3_crc LSA 000000 ..... ..... ..... 000 .. 000101 @lsa DLSA 000000 ..... ..... ..... 000 .. 010101 @lsa +CRC32 011111 ..... ..... 00000 ... .. 001111 @crc32 + REMOVED 010011 ----- ----- ----- ----- ------ # COP1X (COP3) REMOVED 011100 ----- ----- ----- ----- ------ # SPECIAL2 diff --git a/target/mips/tcg/rel6_translate.c b/target/mips/tcg/rel6_translate.c index 59f237b..4c05662 100644 --- a/target/mips/tcg/rel6_translate.c +++ b/target/mips/tcg/rel6_translate.c @@ -23,7 +23,7 @@ bool trans_REMOVED(DisasContext *ctx, arg_REMOVED *a) static bool trans_LSA(DisasContext *ctx, arg_r *a) { - return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } static bool trans_DLSA(DisasContext *ctx, arg_r *a) @@ -31,5 +31,17 @@ static bool trans_DLSA(DisasContext *ctx, arg_r *a) if (TARGET_LONG_BITS != 64) { return false; } - return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); +} + +static bool trans_CRC32(DisasContext *ctx, arg_special3_crc *a) +{ + if (unlikely(!ctx->crcp) + || unlikely((a->sz == 3) && (!(ctx->hflags & MIPS_HFLAG_64))) + || unlikely((a->c >= 2))) { + gen_reserved_instruction(ctx); + return true; + } + gen_crc32(ctx, a->rt, a->rs, a->rt, a->sz, a->c); + return true; } diff --git a/target/mips/tcg/system/cp0_helper.c b/target/mips/tcg/system/cp0_helper.c index 78e422b..b69e70d 100644 --- a/target/mips/tcg/system/cp0_helper.c +++ b/target/mips/tcg/system/cp0_helper.c @@ -28,6 +28,7 @@ #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" +#include "exec/target_page.h" /* SMP helpers. */ @@ -1561,12 +1562,14 @@ target_ulong helper_dvpe(CPUMIPSState *env) CPUState *other_cs = first_cpu; target_ulong prev = env->mvp->CP0_MVPControl; - CPU_FOREACH(other_cs) { - MIPSCPU *other_cpu = MIPS_CPU(other_cs); - /* Turn off all VPEs except the one executing the dvpe. */ - if (&other_cpu->env != env) { - other_cpu->env.mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP); - mips_vpe_sleep(other_cpu); + if (env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) { + CPU_FOREACH(other_cs) { + MIPSCPU *other_cpu = MIPS_CPU(other_cs); + /* Turn off all VPEs except the one executing the dvpe. */ + if (&other_cpu->env != env) { + other_cpu->env.mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP); + mips_vpe_sleep(other_cpu); + } } } return prev; @@ -1577,15 +1580,17 @@ target_ulong helper_evpe(CPUMIPSState *env) CPUState *other_cs = first_cpu; target_ulong prev = env->mvp->CP0_MVPControl; - CPU_FOREACH(other_cs) { - MIPSCPU *other_cpu = MIPS_CPU(other_cs); + if (env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) { + CPU_FOREACH(other_cs) { + MIPSCPU *other_cpu = MIPS_CPU(other_cs); - if (&other_cpu->env != env - /* If the VPE is WFI, don't disturb its sleep. */ - && !mips_vpe_is_wfi(other_cpu)) { - /* Enable the VPE. */ - other_cpu->env.mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP); - mips_vpe_wake(other_cpu); /* And wake it up. */ + if (&other_cpu->env != env + /* If the VPE is WFI, don't disturb its sleep. */ + && !mips_vpe_is_wfi(other_cpu)) { + /* Enable the VPE. */ + other_cpu->env.mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP); + mips_vpe_wake(other_cpu); /* And wake it up. */ + } } } return prev; diff --git a/target/mips/tcg/system/mips-semi.c b/target/mips/tcg/system/mips-semi.c index df0c325..e822a42 100644 --- a/target/mips/tcg/system/mips-semi.c +++ b/target/mips/tcg/system/mips-semi.c @@ -374,5 +374,4 @@ void mips_semihosting(CPUMIPSState *env) error_report("Unknown UHI operation %d", op); abort(); } - return; } diff --git a/target/mips/tcg/system/special_helper.c b/target/mips/tcg/system/special_helper.c index 3ce3ae1..b54cbe8 100644 --- a/target/mips/tcg/system/special_helper.c +++ b/target/mips/tcg/system/special_helper.c @@ -22,7 +22,6 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "internal.h" diff --git a/target/mips/tcg/system/tlb_helper.c b/target/mips/tcg/system/tlb_helper.c index df80301..1e89015 100644 --- a/target/mips/tcg/system/tlb_helper.c +++ b/target/mips/tcg/system/tlb_helper.c @@ -22,9 +22,9 @@ #include "cpu.h" #include "internal.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" -#include "exec/cpu_ldst.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/log.h" #include "exec/helper-proto.h" @@ -652,7 +652,7 @@ static int walk_directory(CPUMIPSState *env, uint64_t *vaddr, return 0; } - if ((entry & (1 << psn)) && hugepg) { + if (extract64(entry, psn, 1) && hugepg) { *huge_page = true; *hgpg_directory_hit = true; entry = get_tlb_entry_layout(env, entry, leaf_mop, pf_ptew); diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 78b848a..d91d6ef 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -27,6 +27,7 @@ #include "internal.h" #include "exec/helper-proto.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "semihosting/semihost.h" #include "trace.h" #include "fpu_helper.h" @@ -1956,6 +1957,17 @@ void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset) } } +void gen_base_index_addr(DisasContext *ctx, TCGv addr, int base, int index) +{ + if (base == 0) { + gen_load_gpr(addr, index); + } else if (index == 0) { + gen_load_gpr(addr, base); + } else { + gen_op_addr_add(ctx, addr, cpu_gpr[base], cpu_gpr[index]); + } +} + static target_ulong pc_relative_pc(DisasContext *ctx) { target_ulong pc = ctx->base.pc_next; @@ -2024,6 +2036,15 @@ static void gen_lxr(DisasContext *ctx, TCGv reg, TCGv addr, tcg_gen_or_tl(reg, t0, t1); } +void gen_lx(DisasContext *ctx, int rd, int base, int index, MemOp mop) +{ + TCGv t0 = tcg_temp_new(); + + gen_base_index_addr(ctx, t0, base, index); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | mop); + gen_store_gpr(t0, rd); +} + /* Load */ static void gen_ld(DisasContext *ctx, uint32_t opc, int rt, int base, int offset) @@ -10545,13 +10566,7 @@ static void gen_flt3_ldst(DisasContext *ctx, uint32_t opc, { TCGv t0 = tcg_temp_new(); - if (base == 0) { - gen_load_gpr(t0, index); - } else if (index == 0) { - gen_load_gpr(t0, base); - } else { - gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[index]); - } + gen_base_index_addr(ctx, t0, base, index); /* * Don't do NOP if destination is zero: we must perform the actual * memory access. @@ -11322,47 +11337,6 @@ enum { /* MIPSDSP functions. */ -/* Indexed load is not for DSP only */ -static void gen_mips_lx(DisasContext *ctx, uint32_t opc, - int rd, int base, int offset) -{ - TCGv t0; - - if (!(ctx->insn_flags & INSN_OCTEON)) { - check_dsp(ctx); - } - t0 = tcg_temp_new(); - - if (base == 0) { - gen_load_gpr(t0, offset); - } else if (offset == 0) { - gen_load_gpr(t0, base); - } else { - gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]); - } - - switch (opc) { - case OPC_LBUX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB); - gen_store_gpr(t0, rd); - break; - case OPC_LHX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_SW); - gen_store_gpr(t0, rd); - break; - case OPC_LWX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_SL); - gen_store_gpr(t0, rd); - break; -#if defined(TARGET_MIPS64) - case OPC_LDX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_UQ); - gen_store_gpr(t0, rd); - break; -#endif - } -} - static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, int ret, int v1, int v2) { @@ -13448,6 +13422,29 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) } } +void gen_crc32(DisasContext *ctx, int rd, int rs, int rt, int sz, + int crc32c) +{ + TCGv t0; + TCGv t1; + TCGv_i32 tsz = tcg_constant_i32(1 << sz); + if (rd == 0) { + /* Treat as NOP. */ + return; + } + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + gen_load_gpr(t0, rt); + gen_load_gpr(t1, rs); + + if (crc32c) { + gen_helper_crc32c(cpu_gpr[rd], t0, t1, tsz); + } else { + gen_helper_crc32(cpu_gpr[rd], t0, t1, tsz); + } +} + static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) { int rs, rt, rd, sa; @@ -13610,15 +13607,22 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_LX_DSP: + check_dsp(ctx); op2 = MASK_LX(ctx->opcode); switch (op2) { #if defined(TARGET_MIPS64) case OPC_LDX: + gen_lx(ctx, rd, rs, rt, MO_UQ); + break; #endif case OPC_LBUX: + gen_lx(ctx, rd, rs, rt, MO_UB); + break; case OPC_LHX: + gen_lx(ctx, rd, rs, rt, MO_SW); + break; case OPC_LWX: - gen_mips_lx(ctx, op2, rd, rs, rt); + gen_lx(ctx, rd, rs, rt, MO_SL); break; default: /* Invalid */ MIPS_INVAL("MASK LX"); @@ -15094,6 +15098,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1; ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1; ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3; + ctx->crcp = (env->CP0_Config5 >> CP0C5_CRCP) & 1; restore_cpu_state(env, ctx); #ifdef CONFIG_USER_ONLY ctx->mem_idx = MIPS_HFLAG_UM; diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h index 1bf153d..89dde1e 100644 --- a/target/mips/tcg/translate.h +++ b/target/mips/tcg/translate.h @@ -51,6 +51,7 @@ typedef struct DisasContext { bool abs2008; bool mi; int gi; + bool crcp; } DisasContext; #define DISAS_STOP DISAS_TARGET_0 @@ -153,6 +154,7 @@ void check_cp1_registers(DisasContext *ctx, int regs); void check_cop1x(DisasContext *ctx); void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset); +void gen_base_index_addr(DisasContext *ctx, TCGv addr, int base, int index); void gen_move_low32(TCGv ret, TCGv_i64 arg); void gen_move_high32(TCGv ret, TCGv_i64 arg); void gen_load_gpr(TCGv t, int reg); @@ -167,6 +169,7 @@ void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg); void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg); int get_fp_bit(int cc); +void gen_lx(DisasContext *ctx, int rd, int base, int index, MemOp mop); void gen_ldxs(DisasContext *ctx, int base, int index, int rd); void gen_align(DisasContext *ctx, int wordsz, int rd, int rs, int rt, int bp); void gen_addiupc(DisasContext *ctx, int rx, int imm, @@ -181,6 +184,7 @@ bool gen_lsa(DisasContext *ctx, int rd, int rt, int rs, int sa); bool gen_dlsa(DisasContext *ctx, int rd, int rt, int rs, int sa); void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel); +void gen_crc32(DisasContext *ctx, int rd, int rs, int rt, int sz, int crc32c); extern TCGv cpu_gpr[32], cpu_PC; #if defined(TARGET_MIPS64) diff --git a/target/mips/tcg/translate_addr_const.c b/target/mips/tcg/translate_addr_const.c index 6f4b39f..1d140e9 100644 --- a/target/mips/tcg/translate_addr_const.c +++ b/target/mips/tcg/translate_addr_const.c @@ -26,7 +26,7 @@ bool gen_lsa(DisasContext *ctx, int rd, int rt, int rs, int sa) t1 = tcg_temp_new(); gen_load_gpr(t0, rs); gen_load_gpr(t1, rt); - tcg_gen_shli_tl(t0, t0, sa + 1); + tcg_gen_shli_tl(t0, t0, sa); tcg_gen_add_tl(cpu_gpr[rd], t0, t1); tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); return true; @@ -47,7 +47,7 @@ bool gen_dlsa(DisasContext *ctx, int rd, int rt, int rs, int sa) t1 = tcg_temp_new(); gen_load_gpr(t0, rs); gen_load_gpr(t1, rt); - tcg_gen_shli_tl(t0, t0, sa + 1); + tcg_gen_shli_tl(t0, t0, sa); tcg_gen_add_tl(cpu_gpr[rd], t0, t1); return true; } diff --git a/target/openrisc/cpu-param.h b/target/openrisc/cpu-param.h index 37627f2..b4f57bb 100644 --- a/target/openrisc/cpu-param.h +++ b/target/openrisc/cpu-param.h @@ -12,6 +12,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 -#define TCG_GUEST_DEFAULT_MO (0) +#define TARGET_INSN_START_EXTRA_WORDS 1 #endif diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index e8abf1f..9bbfe22 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -21,9 +21,9 @@ #include "qapi/error.h" #include "qemu/qemu-print.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "fpu/softfloat-helpers.h" +#include "accel/tcg/cpu-ops.h" #include "tcg/tcg.h" static void openrisc_cpu_set_pc(CPUState *cs, vaddr value) @@ -41,6 +41,18 @@ static vaddr openrisc_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState openrisc_get_tb_cpu_state(CPUState *cs) +{ + CPUOpenRISCState *env = cpu_env(cs); + + return (TCGTBCPUState){ + .pc = env->pc, + .flags = ((env->dflag ? TB_FLAGS_DFLAG : 0) + | (cpu_get_gpr(env, 0) ? 0 : TB_FLAGS_R0_0) + | (env->sr & (SR_SM | SR_DME | SR_IME | SR_OVE))), + }; +} + static void openrisc_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -66,8 +78,7 @@ static void openrisc_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool openrisc_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | - CPU_INTERRUPT_TIMER); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER); } #endif /* !CONFIG_USER_ONLY */ @@ -240,23 +251,28 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps openrisc_tcg_ops = { + .guest_default_memory_order = 0, + .mttcg_supported = true, + .initialize = openrisc_translate_init, .translate_code = openrisc_translate_code, + .get_tb_cpu_state = openrisc_get_tb_cpu_state, .synchronize_from_tb = openrisc_cpu_synchronize_from_tb, .restore_state_to_opc = openrisc_restore_state_to_opc, + .mmu_index = openrisc_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = openrisc_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = openrisc_cpu_exec_interrupt, .cpu_exec_halt = openrisc_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = openrisc_cpu_do_interrupt, #endif /* !CONFIG_USER_ONLY */ }; -static void openrisc_cpu_class_init(ObjectClass *oc, void *data) +static void openrisc_cpu_class_init(ObjectClass *oc, const void *data) { OpenRISCCPUClass *occ = OPENRISC_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(occ); @@ -269,7 +285,6 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data) &occ->parent_phases); cc->class_by_name = openrisc_cpu_class_by_name; - cc->mmu_index = openrisc_cpu_mmu_index; cc->dump_state = openrisc_cpu_dump_state; cc->set_pc = openrisc_cpu_set_pc; cc->get_pc = openrisc_cpu_get_pc; diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index b97d2ff..f4bcf00 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -21,7 +21,9 @@ #define OPENRISC_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "fpu/softfloat-types.h" /** @@ -38,8 +40,6 @@ struct OpenRISCCPUClass { ResettablePhases parent_phases; }; -#define TARGET_INSN_START_EXTRA_WORDS 1 - enum { MMU_NOMMU_IDX = 0, MMU_SUPERVISOR_IDX = 1, @@ -332,8 +332,6 @@ void cpu_openrisc_count_stop(OpenRISCCPU *cpu); #define CPU_RESOLVING_TYPE TYPE_OPENRISC_CPU -#include "exec/cpu-all.h" - #define TB_FLAGS_SM SR_SM #define TB_FLAGS_DME SR_DME #define TB_FLAGS_IME SR_IME @@ -351,16 +349,6 @@ static inline void cpu_set_gpr(CPUOpenRISCState *env, int i, uint32_t val) env->shadow_gpr[0][i] = val; } -static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *cs_base = 0; - *flags = (env->dflag ? TB_FLAGS_DFLAG : 0) - | (cpu_get_gpr(env, 0) ? 0 : TB_FLAGS_R0_0) - | (env->sr & (SR_SM | SR_DME | SR_IME | SR_OVE)); -} - static inline uint32_t cpu_get_sr(const CPUOpenRISCState *env) { return (env->sr diff --git a/target/openrisc/exception.c b/target/openrisc/exception.c index 8699c3d..e213be3 100644 --- a/target/openrisc/exception.c +++ b/target/openrisc/exception.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exception.h" G_NORETURN void raise_exception(OpenRISCCPU *cpu, uint32_t excp) diff --git a/target/openrisc/exception_helper.c b/target/openrisc/exception_helper.c index 1f5be4b..c2c9d13 100644 --- a/target/openrisc/exception_helper.c +++ b/target/openrisc/exception_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "exception.h" diff --git a/target/openrisc/fpu_helper.c b/target/openrisc/fpu_helper.c index 8b81d2f..dba9972 100644 --- a/target/openrisc/fpu_helper.c +++ b/target/openrisc/fpu_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c index b3b5b40..4868230 100644 --- a/target/openrisc/interrupt.c +++ b/target/openrisc/interrupt.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "gdbstub/helpers.h" #include "qemu/host-utils.h" #ifndef CONFIG_USER_ONLY diff --git a/target/openrisc/interrupt_helper.c b/target/openrisc/interrupt_helper.c index ab4ea88..1553ebc 100644 --- a/target/openrisc/interrupt_helper.c +++ b/target/openrisc/interrupt_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" void HELPER(rfe)(CPUOpenRISCState *env) diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c index 3574e57..081c706 100644 --- a/target/openrisc/machine.c +++ b/target/openrisc/machine.c @@ -136,7 +136,7 @@ const VMStateDescription vmstate_openrisc_cpu = { .minimum_version_id = 1, .post_load = cpu_post_load, .fields = (const VMStateField[]) { - VMSTATE_CPU(), + VMSTATE_STRUCT(parent_obj, OpenRISCCPU, 0, vmstate_cpu_common, CPUState), VMSTATE_STRUCT(env, OpenRISCCPU, 1, vmstate_env, CPUOpenRISCState), VMSTATE_END_OF_LIST() } diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c index 47ac783..acea50c 100644 --- a/target/openrisc/mmu.c +++ b/target/openrisc/mmu.c @@ -23,6 +23,7 @@ #include "cpu.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "gdbstub/helpers.h" #include "qemu/host-utils.h" #include "hw/loader.h" diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index 21bc137..b091a9c 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" +#include "exec/target_page.h" #include "exec/helper-proto.h" #include "exception.h" #ifndef CONFIG_USER_ONLY @@ -196,7 +196,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) env->ttmr = (rb & ~TTMR_IP) | ip; } else { /* Clear IP bit. */ env->ttmr = rb & ~TTMR_IP; - cs->interrupt_request &= ~CPU_INTERRUPT_TIMER; + cpu_reset_interrupt(cs, CPU_INTERRUPT_TIMER); } cpu_openrisc_timer_update(cpu); bql_unlock(); @@ -218,7 +218,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, { OpenRISCCPU *cpu = env_archcpu(env); #ifndef CONFIG_USER_ONLY - uint64_t data[TARGET_INSN_START_WORDS]; + uint64_t data[INSN_START_WORDS]; MachineState *ms = MACHINE(qdev_get_machine()); CPUState *cs = env_cpu(env); int idx; diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index 7a6af18..5ab3bc7 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -20,14 +20,14 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" #include "tcg/tcg-op.h" #include "qemu/log.h" #include "qemu/bitops.h" #include "qemu/qemu-print.h" #include "exec/translator.h" #include "exec/translation-block.h" - +#include "exec/target_page.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" @@ -220,8 +220,7 @@ static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) TCGv t0 = tcg_temp_new(); TCGv res = tcg_temp_new(); - tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, cpu_sr_cy, dc->zero); - tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, dc->zero); + tcg_gen_addcio_tl(res, cpu_sr_cy, srca, srcb, cpu_sr_cy); tcg_gen_xor_tl(cpu_sr_ov, srca, srcb); tcg_gen_xor_tl(t0, res, srcb); tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index ece3481..89ae763 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -32,17 +32,20 @@ /* PowerPC CPU definitions */ #define POWERPC_DEF_PREFIX(pvr, svr, type) \ glue(glue(glue(glue(pvr, _), svr), _), type) -#define POWERPC_DEF_SVR(_name, _desc, _pvr, _svr, _type) \ +#define POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, _svr, _type, _deprecation_note) \ static void \ glue(POWERPC_DEF_PREFIX(_pvr, _svr, _type), _cpu_class_init) \ - (ObjectClass *oc, void *data) \ + (ObjectClass *oc, const void *data) \ { \ DeviceClass *dc = DEVICE_CLASS(oc); \ + CPUClass *cc = CPU_CLASS(oc); \ PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); \ \ pcc->pvr = _pvr; \ pcc->svr = _svr; \ dc->desc = _desc; \ + \ + cc->deprecation_note = _deprecation_note; \ } \ \ static const TypeInfo \ @@ -63,6 +66,13 @@ type_init( \ glue(POWERPC_DEF_PREFIX(_pvr, _svr, _type), _cpu_register_types)) +#define POWERPC_DEF_SVR(_name, _desc, _pvr, _svr, _type) \ + POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, _svr, _type, NULL) + +#define POWERPC_DEPRECATED_CPU(_name, _pvr, _type, _desc, _deprecation_note)\ + POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type, \ + _deprecation_note) + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) @@ -116,6 +126,13 @@ NULL) POWERPC_DEF("x2vp20", CPU_POWERPC_X2VP20, 405, NULL) + /* PPE42 Embedded Controllers */ + POWERPC_DEF("PPE42", CPU_POWERPC_PPE42, ppe42, + "Generic PPE 42") + POWERPC_DEF("PPE42X", CPU_POWERPC_PPE42X, ppe42x, + "Generic PPE 42X") + POWERPC_DEF("PPE42XM", CPU_POWERPC_PPE42XM, ppe42xm, + "Generic PPE 42XM") /* PowerPC 440 family */ #if defined(TODO_USER_ONLY) POWERPC_DEF("440", CPU_POWERPC_440, 440GP, @@ -722,12 +739,12 @@ "POWER7 v2.3") POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, "POWER7+ v2.1") - POWERPC_DEF("power8e_v2.1", CPU_POWERPC_POWER8E_v21, POWER8, - "POWER8E v2.1") + POWERPC_DEPRECATED_CPU("power8e_v2.1", CPU_POWERPC_POWER8E_v21, POWER8, + "POWER8E v2.1", "CPU is unmaintained.") POWERPC_DEF("power8_v2.0", CPU_POWERPC_POWER8_v20, POWER8, "POWER8 v2.0") - POWERPC_DEF("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, - "POWER8NVL v1.0") + POWERPC_DEPRECATED_CPU("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, + "POWER8NVL v1.0", "CPU is unmaintained.") POWERPC_DEF("power9_v2.0", CPU_POWERPC_POWER9_DD20, POWER9, "POWER9 v2.0") POWERPC_DEF("power9_v2.2", CPU_POWERPC_POWER9_DD22, POWER9, diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h index 72ad31b..c6cd27f 100644 --- a/target/ppc/cpu-models.h +++ b/target/ppc/cpu-models.h @@ -69,6 +69,10 @@ enum { /* Xilinx cores */ CPU_POWERPC_X2VP4 = 0x20010820, CPU_POWERPC_X2VP20 = 0x20010860, + /* IBM PPE42 Family */ + CPU_POWERPC_PPE42 = 0x42000000, + CPU_POWERPC_PPE42X = 0x42100000, + CPU_POWERPC_PPE42XM = 0x42200000, /* PowerPC 440 family */ /* Generic PowerPC 440 */ #define CPU_POWERPC_440 CPU_POWERPC_440GXf diff --git a/target/ppc/cpu-param.h b/target/ppc/cpu-param.h index 6c4525f..e4ed908 100644 --- a/target/ppc/cpu-param.h +++ b/target/ppc/cpu-param.h @@ -33,11 +33,10 @@ #ifdef CONFIG_USER_ONLY /* Allow user-only to vary page size from 4k */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 12 #else # define TARGET_PAGE_BITS 12 #endif -#define TCG_GUEST_DEFAULT_MO 0 +#define TARGET_INSN_START_EXTRA_WORDS 0 #endif diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c index bfcc695..4d8faad 100644 --- a/target/ppc/cpu.c +++ b/target/ppc/cpu.c @@ -22,6 +22,7 @@ #include "cpu-models.h" #include "cpu-qom.h" #include "exec/log.h" +#include "exec/watchpoint.h" #include "fpu/softfloat-helpers.h" #include "mmu-hash64.h" #include "helper_regs.h" diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 3ee8351..787020f 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -22,7 +22,9 @@ #include "qemu/int128.h" #include "qemu/cpu-float.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "cpu-qom.h" #include "qom/object.h" #include "hw/registerfields.h" @@ -218,6 +220,8 @@ typedef enum powerpc_excp_t { POWERPC_EXCP_POWER10, /* POWER11 exception model */ POWERPC_EXCP_POWER11, + /* PPE42 exception model */ + POWERPC_EXCP_PPE42, } powerpc_excp_t; /*****************************************************************************/ @@ -280,6 +284,8 @@ typedef enum powerpc_input_t { PPC_FLAGS_INPUT_POWER9, /* Freescale RCPU bus */ PPC_FLAGS_INPUT_RCPU, + /* PPE42 bus */ + PPC_FLAGS_INPUT_PPE42, } powerpc_input_t; #define PPC_INPUT(env) ((env)->bus_model) @@ -431,39 +437,64 @@ typedef enum { #define MSR_TM PPC_BIT_NR(31) /* Transactional Memory Available (Book3s) */ #define MSR_CM PPC_BIT_NR(32) /* Computation mode for BookE hflags */ #define MSR_ICM PPC_BIT_NR(33) /* Interrupt computation mode for BookE */ +#define MSR_SEM0 PPC_BIT_NR(33) /* SIB Error Mask Bit 0 (PPE42) */ +#define MSR_SEM1 PPC_BIT_NR(34) /* SIB Error Mask Bit 1 (PPE42) */ +#define MSR_SEM2 PPC_BIT_NR(35) /* SIB Error Mask Bit 2 (PPE42) */ #define MSR_GS PPC_BIT_NR(35) /* guest state for BookE */ +#define MSR_SEM3 PPC_BIT_NR(36) /* SIB Error Mask Bit 3 (PPE42) */ +#define MSR_SEM4 PPC_BIT_NR(37) /* SIB Error Mask Bit 4 (PPE42) */ #define MSR_UCLE PPC_BIT_NR(37) /* User-mode cache lock enable for BookE */ #define MSR_VR PPC_BIT_NR(38) /* altivec available x hflags */ #define MSR_SPE PPC_BIT_NR(38) /* SPE enable for BookE x hflags */ +#define MSR_SEM5 PPC_BIT_NR(38) /* SIB Error Mask Bit 5 (PPE42) */ +#define MSR_SEM6 PPC_BIT_NR(39) /* SIB Error Mask Bit 6 (PPE42) */ #define MSR_VSX PPC_BIT_NR(40) /* Vector Scalar Extension (>= 2.06)x hflags */ +#define MSR_IS0 PPC_BIT_NR(40) /* Instance Specific Bit 0 (PPE42) */ #define MSR_S PPC_BIT_NR(41) /* Secure state */ +#define MSR_SIBRC0 PPC_BIT_NR(41) /* Last SIB return code Bit 0 (PPE42) */ +#define MSR_SIBRC1 PPC_BIT_NR(42) /* Last SIB return code Bit 1 (PPE42) */ +#define MSR_SIBRC2 PPC_BIT_NR(43) /* Last SIB return code Bit 2 (PPE42) */ +#define MSR_LP PPC_BIT_NR(44) /* Low Priority (PPE42) */ #define MSR_KEY PPC_BIT_NR(44) /* key bit on 603e */ #define MSR_POW PPC_BIT_NR(45) /* Power management */ #define MSR_WE PPC_BIT_NR(45) /* Wait State Enable on 405 */ +#define MSR_IS1 PPC_BIT_NR(46) /* Instance Specific Bit 1 (PPE42) */ #define MSR_TGPR PPC_BIT_NR(46) /* TGPR usage on 602/603 x */ #define MSR_CE PPC_BIT_NR(46) /* Critical int. enable on embedded PPC x */ #define MSR_ILE PPC_BIT_NR(47) /* Interrupt little-endian mode */ +#define MSR_UIE PPC_BIT_NR(47) /* Unmaskable Interrupt Enable (PPE42) */ #define MSR_EE PPC_BIT_NR(48) /* External interrupt enable */ #define MSR_PR PPC_BIT_NR(49) /* Problem state hflags */ #define MSR_FP PPC_BIT_NR(50) /* Floating point available hflags */ #define MSR_ME PPC_BIT_NR(51) /* Machine check interrupt enable */ #define MSR_FE0 PPC_BIT_NR(52) /* Floating point exception mode 0 */ +#define MSR_IS2 PPC_BIT_NR(52) /* Instance Specific Bit 2 (PPE42) */ +#define MSR_IS3 PPC_BIT_NR(53) /* Instance Specific Bit 3 (PPE42) */ #define MSR_SE PPC_BIT_NR(53) /* Single-step trace enable x hflags */ #define MSR_DWE PPC_BIT_NR(53) /* Debug wait enable on 405 x */ #define MSR_UBLE PPC_BIT_NR(53) /* User BTB lock enable on e500 x */ #define MSR_BE PPC_BIT_NR(54) /* Branch trace enable x hflags */ #define MSR_DE PPC_BIT_NR(54) /* Debug int. enable on embedded PPC x */ #define MSR_FE1 PPC_BIT_NR(55) /* Floating point exception mode 1 */ +#define MSR_IPE PPC_BIT_NR(55) /* Imprecise Mode Enable (PPE42) */ #define MSR_AL PPC_BIT_NR(56) /* AL bit on POWER */ +#define MSR_SIBRCA0 PPC_BIT_NR(56) /* SIB Return Code Accumulator 0 (PPE42) */ +#define MSR_SIBRCA1 PPC_BIT_NR(57) /* SIB Return Code Accumulator 1 (PPE42) */ #define MSR_EP PPC_BIT_NR(57) /* Exception prefix on 601 */ #define MSR_IR PPC_BIT_NR(58) /* Instruction relocate */ #define MSR_IS PPC_BIT_NR(58) /* Instruction address space (BookE) */ +#define MSR_SIBRCA2 PPC_BIT_NR(58) /* SIB Return Code Accumulator 2 (PPE42) */ +#define MSR_SIBRCA3 PPC_BIT_NR(59) /* SIB Return Code Accumulator 3 (PPE42) */ #define MSR_DR PPC_BIT_NR(59) /* Data relocate */ #define MSR_DS PPC_BIT_NR(59) /* Data address space (BookE) */ #define MSR_PE PPC_BIT_NR(60) /* Protection enable on 403 */ +#define MSR_SIBRCA4 PPC_BIT_NR(60) /* SIB Return Code Accumulator 4 (PPE42) */ +#define MSR_SIBRCA5 PPC_BIT_NR(61) /* SIB Return Code Accumulator 5 (PPE42) */ #define MSR_PX PPC_BIT_NR(61) /* Protection exclusive on 403 x */ #define MSR_PMM PPC_BIT_NR(61) /* Performance monitor mark on POWER x */ #define MSR_RI PPC_BIT_NR(62) /* Recoverable interrupt 1 */ +#define MSR_SIBRCA6 PPC_BIT_NR(62) /* SIB Return Code Accumulator 6 (PPE42) */ +#define MSR_SIBRCA7 PPC_BIT_NR(63) /* SIB Return Code Accumulator 7 (PPE42) */ #define MSR_LE PPC_BIT_NR(63) /* Little-endian mode 1 hflags */ FIELD(MSR, SF, MSR_SF, 1) @@ -515,6 +546,9 @@ FIELD(MSR, PX, MSR_PX, 1) FIELD(MSR, PMM, MSR_PMM, 1) FIELD(MSR, RI, MSR_RI, 1) FIELD(MSR, LE, MSR_LE, 1) +FIELD(MSR, SEM, MSR_SEM6, 7) +FIELD(MSR, SIBRC, MSR_SIBRC2, 3) +FIELD(MSR, SIBRCA, MSR_SIBRCA7, 8) /* * FE0 and FE1 bits are not side-by-side @@ -728,6 +762,31 @@ FIELD(MSR, LE, MSR_LE, 1) #define ESR_VLEMI PPC_BIT(58) /* VLE operation */ #define ESR_MIF PPC_BIT(62) /* Misaligned instruction (VLE) */ +/* PPE42 Interrupt Status Register bits */ +#define PPE42_ISR_SRSMS0 PPC_BIT_NR(48) /* Sys Reset State Machine State 0 */ +#define PPE42_ISR_SRSMS1 PPC_BIT_NR(49) /* Sys Reset State Machine State 1 */ +#define PPE42_ISR_SRSMS2 PPC_BIT_NR(50) /* Sys Reset State Machine State 2 */ +#define PPE42_ISR_SRSMS3 PPC_BIT_NR(51) /* Sys Reset State Machine State 3 */ +#define PPE42_ISR_EP PPC_BIT_NR(53) /* MSR[EE] Maskable Event Pending */ +#define PPE42_ISR_PTR PPC_BIT_NR(56) /* Program Interrupt from trap */ +#define PPE42_ISR_ST PPC_BIT_NR(57) /* Data Interrupt caused by store */ +#define PPE42_ISR_MFE PPC_BIT_NR(60) /* Multiple Fault Error */ +#define PPE42_ISR_MCS0 PPC_BIT_NR(61) /* Machine Check Status bit0 */ +#define PPE42_ISR_MCS1 PPC_BIT_NR(62) /* Machine Check Status bit1 */ +#define PPE42_ISR_MCS2 PPC_BIT_NR(63) /* Machine Check Status bit2 */ +FIELD(PPE42_ISR, SRSMS, PPE42_ISR_SRSMS3, 4) +FIELD(PPE42_ISR, MCS, PPE42_ISR_MCS2, 3) + +/* PPE42 Machine Check Status field values */ +#define PPE42_ISR_MCS_INSTRUCTION 0 +#define PPE42_ISR_MCS_DATA_LOAD 1 +#define PPE42_ISR_MCS_DATA_PRECISE_STORE 2 +#define PPE42_ISR_MCS_DATA_IMPRECISE_STORE 3 +#define PPE42_ISR_MCS_PROGRAM 4 +#define PPE42_ISR_MCS_ISI 5 +#define PPE42_ISR_MCS_ALIGNMENT 6 +#define PPE42_ISR_MCS_DSI 7 + /* Transaction EXception And Summary Register bits */ #define TEXASR_FAILURE_PERSISTENT (63 - 7) #define TEXASR_DISALLOWED (63 - 8) @@ -783,6 +842,8 @@ enum { POWERPC_FLAG_SMT_1LPAR = 0x00800000, /* Has BHRB */ POWERPC_FLAG_BHRB = 0x01000000, + /* Use PPE42-specific behavior */ + POWERPC_FLAG_PPE42 = 0x02000000, }; /* @@ -1520,6 +1581,10 @@ struct PowerPCCPUClass { void (*init_proc)(CPUPPCState *env); int (*check_pow)(CPUPPCState *env); int (*check_attn)(CPUPPCState *env); + + /* Handlers to be set by the machine initialising the chips */ + uint64_t (*load_sprd)(CPUPPCState *env); + void (*store_sprd)(CPUPPCState *env, uint64_t val); }; static inline bool ppc_cpu_core_single_threaded(CPUState *cs) @@ -1608,8 +1673,6 @@ void ppc_store_dawrx1(CPUPPCState *env, uint32_t value); #endif /* !defined(CONFIG_USER_ONLY) */ void ppc_store_msr(CPUPPCState *env, target_ulong value); -void ppc_cpu_list(void); - /* Time-base and decrementer management */ uint64_t cpu_ppc_load_tbl(CPUPPCState *env); uint32_t cpu_ppc_load_tbu(CPUPPCState *env); @@ -1671,8 +1734,6 @@ static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn) int ppc_dcr_read(ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp); int ppc_dcr_write(ppc_dcr_t *dcr_env, int dcrn, uint32_t val); -#define cpu_list ppc_cpu_list - /* MMU modes definitions */ #define MMU_USER_IDX 0 static inline int ppc_env_mmu_index(CPUPPCState *env, bool ifetch) @@ -1702,8 +1763,6 @@ void ppc_compat_add_property(Object *obj, const char *name, uint32_t *compat_pvr, const char *basedesc); #endif /* defined(TARGET_PPC64) */ -#include "exec/cpu-all.h" - /*****************************************************************************/ /* CRF definitions */ #define CRF_LT_BIT 3 @@ -1754,9 +1813,12 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_CSRR0 (0x03A) #define SPR_BOOKE_CSRR1 (0x03B) #define SPR_BOOKE_DEAR (0x03D) +#define SPR_PPE42_EDR (0x03D) #define SPR_IAMR (0x03D) #define SPR_BOOKE_ESR (0x03E) +#define SPR_PPE42_ISR (0x03E) #define SPR_BOOKE_IVPR (0x03F) +#define SPR_PPE42_IVPR (0x03F) #define SPR_MPC_EIE (0x050) #define SPR_MPC_EID (0x051) #define SPR_MPC_NRI (0x052) @@ -1822,6 +1884,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_TBU40 (0x11E) #define SPR_SVR (0x11E) #define SPR_BOOKE_PIR (0x11E) +#define SPR_PPE42_PIR (0x11E) #define SPR_PVR (0x11F) #define SPR_HSPRG0 (0x130) #define SPR_BOOKE_DBSR (0x130) @@ -1831,6 +1894,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_EPCR (0x133) #define SPR_SPURR (0x134) #define SPR_BOOKE_DBCR0 (0x134) +#define SPR_PPE42_DBCR (0x134) #define SPR_IBCR (0x135) #define SPR_PURR (0x135) #define SPR_BOOKE_DBCR1 (0x135) @@ -1848,6 +1912,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_HSRR1 (0x13B) #define SPR_BOOKE_IAC4 (0x13B) #define SPR_BOOKE_DAC1 (0x13C) +#define SPR_PPE42_DACR (0x13C) #define SPR_MMCRH (0x13C) #define SPR_DABR2 (0x13D) #define SPR_BOOKE_DAC2 (0x13D) @@ -1857,12 +1922,14 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_DVC2 (0x13F) #define SPR_LPIDR (0x13F) #define SPR_BOOKE_TSR (0x150) +#define SPR_PPE42_TSR (0x150) #define SPR_HMER (0x150) #define SPR_HMEER (0x151) #define SPR_PCR (0x152) #define SPR_HEIR (0x153) #define SPR_BOOKE_LPIDR (0x152) #define SPR_BOOKE_TCR (0x154) +#define SPR_PPE42_TCR (0x154) #define SPR_BOOKE_TLB0PS (0x158) #define SPR_BOOKE_TLB1PS (0x159) #define SPR_BOOKE_TLB2PS (0x15A) @@ -2532,6 +2599,12 @@ enum { PPC2_MEM_LWSYNC = 0x0000000000200000ULL, /* ISA 2.06 BCD assist instructions */ PPC2_BCDA_ISA206 = 0x0000000000400000ULL, + /* PPE42 instructions */ + PPC2_PPE42 = 0x0000000000800000ULL, + /* PPE42X instructions */ + PPC2_PPE42X = 0x0000000001000000ULL, + /* PPE42XM instructions */ + PPC2_PPE42XM = 0x0000000002000000ULL, #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \ @@ -2541,7 +2614,8 @@ enum { PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \ PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \ PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC | \ - PPC2_BCDA_ISA206) + PPC2_BCDA_ISA206 | PPC2_PPE42 | PPC2_PPE42X | \ + PPC2_PPE42XM) }; /*****************************************************************************/ @@ -2755,19 +2829,6 @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer); */ #define is_book3s_arch2x(ctx) (!!((ctx)->insns_flags & PPC_SEGMENT_64B)) -#ifdef CONFIG_DEBUG_TCG -void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags); -#else -static inline void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->nip; - *cs_base = 0; - *flags = env->hflags; -} -#endif - G_NORETURN void raise_exception_err_ra(CPUPPCState *env, uint32_t exception, uint32_t error_code, uintptr_t raddr); @@ -3051,7 +3112,8 @@ static inline int check_attn_none(CPUPPCState *env) #define POWERPC_FAMILY(_name) \ static void \ - glue(glue(ppc_, _name), _cpu_family_class_init)(ObjectClass *, void *); \ + glue(glue(ppc_, _name), _cpu_family_class_init)(ObjectClass *, \ + const void *); \ \ static const TypeInfo \ glue(glue(ppc_, _name), _cpu_family_type_info) = { \ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 7decc09..3aa3aef 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -40,12 +40,11 @@ #include "qemu/cutils.h" #include "disas/capstone.h" #include "fpu/softfloat.h" - +#include "exec/watchpoint.h" #include "helper_regs.h" #include "internal.h" #include "spr_common.h" #include "power8-pmu.h" - #ifndef CONFIG_USER_ONLY #include "hw/boards.h" #include "hw/intc/intc.h" @@ -1654,6 +1653,47 @@ static void register_8xx_sprs(CPUPPCState *env) * ... and more (thermal management, performance counters, ...) */ +static void register_ppe42_sprs(CPUPPCState *env) +{ + spr_register(env, SPR_PPE42_EDR, "EDR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + spr_register(env, SPR_PPE42_ISR, "ISR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + spr_register(env, SPR_PPE42_IVPR, "IVPR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, SPR_NOACCESS, + 0xfff80000); + spr_register(env, SPR_PPE42_PIR, "PIR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_pir, + 0x00000000); + spr_register(env, SPR_PPE42_DBCR, "DBCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_40x_dbcr0, + 0x00000000); + spr_register(env, SPR_PPE42_DACR, "DACR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* Timer */ + spr_register(env, SPR_DECR, "DECR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_decr, &spr_write_decr, + 0x00000000); + spr_register(env, SPR_PPE42_TSR, "TSR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_booke_tsr, + 0x00000000); + spr_register(env, SPR_BOOKE_TCR, "TCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_booke_tcr, + 0x00000000); +} + /*****************************************************************************/ /* Exception vectors models */ static void init_excp_4xx(CPUPPCState *env) @@ -1680,6 +1720,30 @@ static void init_excp_4xx(CPUPPCState *env) #endif } +static void init_excp_ppe42(CPUPPCState *env) +{ +#if !defined(CONFIG_USER_ONLY) + /* Machine Check vector changed after version 0 */ + if (((env->spr[SPR_PVR] & 0xf00000ul) >> 20) == 0) { + env->excp_vectors[POWERPC_EXCP_MCHECK] = 0x00000000; + } else { + env->excp_vectors[POWERPC_EXCP_MCHECK] = 0x00000020; + } + env->excp_vectors[POWERPC_EXCP_RESET] = 0x00000040; + env->excp_vectors[POWERPC_EXCP_DSI] = 0x00000060; + env->excp_vectors[POWERPC_EXCP_ISI] = 0x00000080; + env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x000000A0; + env->excp_vectors[POWERPC_EXCP_ALIGN] = 0x000000C0; + env->excp_vectors[POWERPC_EXCP_PROGRAM] = 0x000000E0; + env->excp_vectors[POWERPC_EXCP_DECR] = 0x00000100; + env->excp_vectors[POWERPC_EXCP_FIT] = 0x00000120; + env->excp_vectors[POWERPC_EXCP_WDT] = 0x00000140; + env->ivpr_mask = 0xFFFFFE00UL; + /* Hardware reset vector */ + env->hreset_vector = 0x00000040UL; +#endif +} + static void init_excp_MPC5xx(CPUPPCState *env) { #if !defined(CONFIG_USER_ONLY) @@ -2167,7 +2231,7 @@ static void init_proc_405(CPUPPCState *env) SET_WDT_PERIOD(16, 20, 24, 28); } -POWERPC_FAMILY(405)(ObjectClass *oc, void *data) +POWERPC_FAMILY(405)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2201,6 +2265,80 @@ POWERPC_FAMILY(405)(ObjectClass *oc, void *data) POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; } +static void init_proc_ppe42(CPUPPCState *env) +{ + register_ppe42_sprs(env); + + init_excp_ppe42(env); + env->dcache_line_size = 32; + env->icache_line_size = 32; + /* Allocate hardware IRQ controller */ + ppc40x_irq_init(env_archcpu(env)); + + SET_FIT_PERIOD(8, 12, 16, 20); + SET_WDT_PERIOD(16, 20, 24, 28); +} + +static void ppe42_class_common_init(PowerPCCPUClass *pcc) +{ + pcc->init_proc = init_proc_ppe42; + pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; + pcc->insns_flags = PPC_INSNS_BASE | + PPC_WRTEE | + PPC_CACHE | + PPC_CACHE_DCBZ | + PPC_MEM_SYNC; + pcc->msr_mask = R_MSR_SEM_MASK | + (1ull << MSR_IS0) | + R_MSR_SIBRC_MASK | + (1ull << MSR_LP) | + (1ull << MSR_WE) | + (1ull << MSR_IS1) | + (1ull << MSR_UIE) | + (1ull << MSR_EE) | + (1ull << MSR_ME) | + (1ull << MSR_IS2) | + (1ull << MSR_IS3) | + (1ull << MSR_IPE) | + R_MSR_SIBRCA_MASK; + pcc->mmu_model = POWERPC_MMU_REAL; + pcc->excp_model = POWERPC_EXCP_PPE42; + pcc->bus_model = PPC_FLAGS_INPUT_PPE42; + pcc->bfd_mach = bfd_mach_ppc_403; + pcc->flags = POWERPC_FLAG_PPE42 | POWERPC_FLAG_BUS_CLK; +} + +POWERPC_FAMILY(ppe42)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42"; + pcc->insns_flags2 = PPC2_PPE42; + ppe42_class_common_init(pcc); +} + +POWERPC_FAMILY(ppe42x)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42X"; + pcc->insns_flags2 = PPC2_PPE42 | PPC2_PPE42X; + ppe42_class_common_init(pcc); +} + +POWERPC_FAMILY(ppe42xm)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42XM"; + pcc->insns_flags2 = PPC2_PPE42 | PPC2_PPE42X | PPC2_PPE42XM; + ppe42_class_common_init(pcc); +} + static void init_proc_440EP(CPUPPCState *env) { register_BookE_sprs(env, 0x000000000000FFFFULL); @@ -2235,7 +2373,7 @@ static void init_proc_440EP(CPUPPCState *env) SET_WDT_PERIOD(20, 24, 28, 32); } -POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data) +POWERPC_FAMILY(440EP)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2274,7 +2412,7 @@ POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data) POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; } -POWERPC_FAMILY(460EX)(ObjectClass *oc, void *data) +POWERPC_FAMILY(460EX)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2329,7 +2467,7 @@ static void init_proc_440GP(CPUPPCState *env) SET_WDT_PERIOD(20, 24, 28, 32); } -POWERPC_FAMILY(440GP)(ObjectClass *oc, void *data) +POWERPC_FAMILY(440GP)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2399,7 +2537,7 @@ static void init_proc_440x5(CPUPPCState *env) SET_WDT_PERIOD(20, 24, 28, 32); } -POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data) +POWERPC_FAMILY(440x5)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2435,7 +2573,7 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data) POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; } -POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, void *data) +POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2484,7 +2622,7 @@ static void init_proc_MPC5xx(CPUPPCState *env) /* XXX: TODO: allocate internal IRQ controller */ } -POWERPC_FAMILY(MPC5xx)(ObjectClass *oc, void *data) +POWERPC_FAMILY(MPC5xx)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2527,7 +2665,7 @@ static void init_proc_MPC8xx(CPUPPCState *env) /* XXX: TODO: allocate internal IRQ controller */ } -POWERPC_FAMILY(MPC8xx)(ObjectClass *oc, void *data) +POWERPC_FAMILY(MPC8xx)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2578,7 +2716,7 @@ static void init_proc_G2(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(G2)(ObjectClass *oc, void *data) +POWERPC_FAMILY(G2)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2617,7 +2755,7 @@ POWERPC_FAMILY(G2)(ObjectClass *oc, void *data) POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK; } -POWERPC_FAMILY(G2LE)(ObjectClass *oc, void *data) +POWERPC_FAMILY(G2LE)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -2752,7 +2890,7 @@ static void init_proc_e200(CPUPPCState *env) /* XXX: TODO: allocate internal IRQ controller */ } -POWERPC_FAMILY(e200)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e200)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3045,7 +3183,7 @@ static void init_proc_e500v1(CPUPPCState *env) init_proc_e500(env, fsl_e500v1); } -POWERPC_FAMILY(e500v1)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e500v1)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3089,7 +3227,7 @@ static void init_proc_e500v2(CPUPPCState *env) init_proc_e500(env, fsl_e500v2); } -POWERPC_FAMILY(e500v2)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e500v2)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3133,7 +3271,7 @@ static void init_proc_e500mc(CPUPPCState *env) init_proc_e500(env, fsl_e500mc); } -POWERPC_FAMILY(e500mc)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e500mc)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3180,7 +3318,7 @@ static void init_proc_e5500(CPUPPCState *env) init_proc_e500(env, fsl_e5500); } -POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e5500)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3229,7 +3367,7 @@ static void init_proc_e6500(CPUPPCState *env) init_proc_e500(env, fsl_e6500); } -POWERPC_FAMILY(e6500)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e6500)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3292,7 +3430,7 @@ static void init_proc_603(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(603)(ObjectClass *oc, void *data) +POWERPC_FAMILY(603)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3332,7 +3470,7 @@ POWERPC_FAMILY(603)(ObjectClass *oc, void *data) POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK; } -POWERPC_FAMILY(603E)(ObjectClass *oc, void *data) +POWERPC_FAMILY(603E)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3378,7 +3516,7 @@ static void init_proc_e300(CPUPPCState *env) register_e300_sprs(env); } -POWERPC_FAMILY(e300)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e300)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3434,7 +3572,7 @@ static void init_proc_604(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(604)(ObjectClass *oc, void *data) +POWERPC_FAMILY(604)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3480,7 +3618,7 @@ static void init_proc_604E(CPUPPCState *env) register_604e_sprs(env); } -POWERPC_FAMILY(604E)(ObjectClass *oc, void *data) +POWERPC_FAMILY(604E)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3537,7 +3675,7 @@ static void init_proc_740(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(740)(ObjectClass *oc, void *data) +POWERPC_FAMILY(740)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3603,7 +3741,7 @@ static void init_proc_750(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(750)(ObjectClass *oc, void *data) +POWERPC_FAMILY(750)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3750,7 +3888,7 @@ static void init_proc_750cl(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(750cl)(ObjectClass *oc, void *data) +POWERPC_FAMILY(750cl)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3858,7 +3996,7 @@ static void init_proc_750cx(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(750cx)(ObjectClass *oc, void *data) +POWERPC_FAMILY(750cx)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -3931,7 +4069,7 @@ static void init_proc_750fx(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(750fx)(ObjectClass *oc, void *data) +POWERPC_FAMILY(750fx)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4004,7 +4142,7 @@ static void init_proc_750gx(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(750gx)(ObjectClass *oc, void *data) +POWERPC_FAMILY(750gx)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4064,7 +4202,7 @@ static void init_proc_745(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(745)(ObjectClass *oc, void *data) +POWERPC_FAMILY(745)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4110,7 +4248,7 @@ static void init_proc_755(CPUPPCState *env) register_755_sprs(env); } -POWERPC_FAMILY(755)(ObjectClass *oc, void *data) +POWERPC_FAMILY(755)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4177,7 +4315,7 @@ static void init_proc_7400(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7400)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7400)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4257,7 +4395,7 @@ static void init_proc_7410(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7410)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7410)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4358,7 +4496,7 @@ static void init_proc_7440(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7440)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7440)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4481,7 +4619,7 @@ static void init_proc_7450(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7450)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7450)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4611,7 +4749,7 @@ static void init_proc_7445(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7445)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7445)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4743,7 +4881,7 @@ static void init_proc_7455(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7455)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7455)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -4895,7 +5033,7 @@ static void init_proc_7457(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(7457)(ObjectClass *oc, void *data) +POWERPC_FAMILY(7457)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -5030,7 +5168,7 @@ static void init_proc_e600(CPUPPCState *env) ppc6xx_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(e600)(ObjectClass *oc, void *data) +POWERPC_FAMILY(e600)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -5801,6 +5939,18 @@ static void register_power9_book4_sprs(CPUPPCState *env) &spr_read_generic, &spr_write_generic, &spr_read_generic, &spr_write_generic, 0x00000000); + + /* SPRC/SPRD exist in earlier CPUs but only tested on POWER9/10 */ + spr_register_hv(env, SPR_POWER_SPRC, "SPRC", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_sprc, + 0x00000000); + spr_register_hv(env, SPR_POWER_SPRD, "SPRD", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_sprd, &spr_write_sprd, + 0x00000000); #endif } @@ -5822,17 +5972,6 @@ static void register_power8_book4_sprs(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_WORT, 0); - /* SPRC/SPRD exist in earlier CPUs but only tested on POWER9/10 */ - spr_register_hv(env, SPR_POWER_SPRC, "SPRC", - SPR_NOACCESS, SPR_NOACCESS, - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_sprc, - 0x00000000); - spr_register_hv(env, SPR_POWER_SPRD, "SPRD", - SPR_NOACCESS, SPR_NOACCESS, - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_sprd, &spr_write_sprd, - 0x00000000); #endif } @@ -5994,7 +6133,7 @@ static void init_proc_970(CPUPPCState *env) ppc970_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(970)(ObjectClass *oc, void *data) +POWERPC_FAMILY(970)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6069,7 +6208,7 @@ static void init_proc_power5plus(CPUPPCState *env) ppc970_irq_init(env_archcpu(env)); } -POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER5P)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6175,7 +6314,7 @@ static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return true; } -POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER7)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6339,7 +6478,7 @@ static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return true; } -POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER8)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6548,7 +6687,7 @@ static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return false; } -POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER9)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6646,7 +6785,7 @@ static bool ppc_pvr_match_power10(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return false; } -POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER10)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6706,7 +6845,7 @@ static bool ppc_pvr_match_power11(PowerPCCPUClass *pcc, uint32_t pvr, bool best) return false; } -POWERPC_FAMILY(POWER11)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER11)(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); @@ -6802,53 +6941,64 @@ static void init_ppc_proc(PowerPCCPU *cpu) /* MSR bits & flags consistency checks */ if (env->msr_mask & (1 << 25)) { - switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) { + switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_SPE: case POWERPC_FLAG_VRE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n"); + "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) { + } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n"); + "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 17)) { - switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) { + switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_TGPR: case POWERPC_FLAG_CE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n"); + "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) { + } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n"); + "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 10)) { switch (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE | - POWERPC_FLAG_UBLE)) { + POWERPC_FLAG_UBLE | POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_SE: case POWERPC_FLAG_DWE: case POWERPC_FLAG_UBLE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" "Should define POWERPC_FLAG_SE or POWERPC_FLAG_DWE or " - "POWERPC_FLAG_UBLE\n"); + "POWERPC_FLAG_UBLE or POWERPC_FLAG_PPE42\n"); exit(1); } } else if (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE | - POWERPC_FLAG_UBLE)) { + POWERPC_FLAG_UBLE | POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" "Should not define POWERPC_FLAG_SE nor POWERPC_FLAG_DWE nor " - "POWERPC_FLAG_UBLE\n"); + "POWERPC_FLAG_UBLE nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 9)) { @@ -6867,18 +7017,23 @@ static void init_ppc_proc(PowerPCCPU *cpu) exit(1); } if (env->msr_mask & (1 << 2)) { - switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) { + switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_PX: case POWERPC_FLAG_PMM: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n"); + "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) { + } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n"); + "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if ((env->flags & POWERPC_FLAG_BUS_CLK) == 0) { @@ -7081,7 +7236,7 @@ ObjectClass *ppc_cpu_class_by_name(const char *name) if (strcmp(name, "max") == 0) { MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); if (mc) { - return object_class_by_name(mc->default_cpu_type); + return object_class_by_name(machine_class_default_cpu_type(mc)); } } #endif @@ -7114,7 +7269,7 @@ PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) } /* Sort by PVR, ordering special case "host" last. */ -static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b) +static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b, gpointer d) { ObjectClass *oc_a = (ObjectClass *)a; ObjectClass *oc_b = (ObjectClass *)b; @@ -7143,6 +7298,7 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc)); const char *typename = object_class_get_name(oc); char *name; @@ -7153,7 +7309,11 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) } name = cpu_model_from_type(typename); - qemu_printf(" %-16s PVR %08x\n", name, pcc->pvr); + if (cc->deprecation_note) { + qemu_printf(" %-16s PVR %08x (deprecated)\n", name, pcc->pvr); + } else { + qemu_printf(" %-16s PVR %08x\n", name, pcc->pvr); + } for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { PowerPCCPUAlias *alias = &ppc_cpu_aliases[i]; ObjectClass *alias_oc = ppc_cpu_class_by_name(alias->model); @@ -7176,13 +7336,13 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) g_free(name); } -void ppc_cpu_list(void) +static void ppc_cpu_list(void) { GSList *list; qemu_printf("Available CPUs:\n"); list = object_class_get_list(TYPE_POWERPC_CPU, false); - list = g_slist_sort(list, ppc_cpu_list_compare); + list = g_slist_sort_with_data(list, ppc_cpu_list_compare, NULL); g_slist_foreach(list, ppc_cpu_list_entry, NULL); g_slist_free(list); @@ -7215,20 +7375,20 @@ static void ppc_restore_state_to_opc(CPUState *cs, cpu->env.nip = data[0]; } + +static int ppc_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return ppc_env_mmu_index(cpu_env(cs), ifetch); +} #endif /* CONFIG_TCG */ #ifndef CONFIG_USER_ONLY static bool ppc_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ -static int ppc_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - return ppc_env_mmu_index(cpu_env(cs), ifetch); -} - static void ppc_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -7243,39 +7403,40 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) } msr = (target_ulong)0; - msr |= (target_ulong)MSR_HVB; - msr |= (target_ulong)1 << MSR_EP; + if (!(env->flags & POWERPC_FLAG_PPE42)) { + msr |= (target_ulong)MSR_HVB; + msr |= (target_ulong)1 << MSR_EP; #if defined(DO_SINGLE_STEP) && 0 - /* Single step trace mode */ - msr |= (target_ulong)1 << MSR_SE; - msr |= (target_ulong)1 << MSR_BE; + /* Single step trace mode */ + msr |= (target_ulong)1 << MSR_SE; + msr |= (target_ulong)1 << MSR_BE; #endif #if defined(CONFIG_USER_ONLY) - msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */ - msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */ - msr |= (target_ulong)1 << MSR_FE1; - msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */ - msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */ - msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */ - msr |= (target_ulong)1 << MSR_PR; + msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */ + msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */ + msr |= (target_ulong)1 << MSR_FE1; + msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */ + msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */ + msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */ + msr |= (target_ulong)1 << MSR_PR; #if defined(TARGET_PPC64) - msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */ + msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */ #endif #if !TARGET_BIG_ENDIAN - msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */ - if (!((env->msr_mask >> MSR_LE) & 1)) { - fprintf(stderr, "Selected CPU does not support little-endian.\n"); - exit(1); - } + msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */ + if (!((env->msr_mask >> MSR_LE) & 1)) { + fprintf(stderr, "Selected CPU does not support little-endian.\n"); + exit(1); + } #endif #endif #if defined(TARGET_PPC64) - if (mmu_is_64bit(env->mmu_model)) { - msr |= (1ULL << MSR_SF); - } + if (mmu_is_64bit(env->mmu_model)) { + msr |= (1ULL << MSR_SF); + } #endif - + } hreg_store_msr(env, msr, 1); #if !defined(CONFIG_USER_ONLY) @@ -7386,6 +7547,12 @@ static void ppc_cpu_exec_exit(CPUState *cs) cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); } } + +static vaddr ppc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return (cpu_env(cs)->hflags >> HFLAGS_64) & 1 ? result : (uint32_t)result; +} #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ @@ -7478,16 +7645,22 @@ static const struct SysemuCPUOps ppc_sysemu_ops = { #include "accel/tcg/cpu-ops.h" static const TCGCPUOps ppc_tcg_ops = { + .mttcg_supported = TARGET_LONG_BITS == 64, + .guest_default_memory_order = 0, .initialize = ppc_translate_init, .translate_code = ppc_translate_code, + .get_tb_cpu_state = ppc_get_tb_cpu_state, .restore_state_to_opc = ppc_restore_state_to_opc, + .mmu_index = ppc_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = ppc_cpu_record_sigsegv, #else .tlb_fill = ppc_cpu_tlb_fill, + .pointer_wrap = ppc_pointer_wrap, .cpu_exec_interrupt = ppc_cpu_exec_interrupt, .cpu_exec_halt = ppc_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = ppc_cpu_do_interrupt, .cpu_exec_enter = ppc_cpu_exec_enter, .cpu_exec_exit = ppc_cpu_exec_exit, @@ -7500,7 +7673,7 @@ static const TCGCPUOps ppc_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void ppc_cpu_class_init(ObjectClass *oc, void *data) +static void ppc_cpu_class_init(ObjectClass *oc, const void *data) { PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -7517,7 +7690,7 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) &pcc->parent_phases); cc->class_by_name = ppc_cpu_class_by_name; - cc->mmu_index = ppc_cpu_mmu_index; + cc->list_cpus = ppc_cpu_list; cc->dump_state = ppc_cpu_dump_state; cc->set_pc = ppc_cpu_set_pc; cc->get_pc = ppc_cpu_get_pc; @@ -7566,7 +7739,7 @@ static const TypeInfo ppc_cpu_type_info = { .class_size = sizeof(PowerPCCPUClass), .class_init = ppc_cpu_class_init, #ifndef CONFIG_USER_ONLY - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_INTERRUPT_STATS_PROVIDER }, { } }, @@ -7713,6 +7886,18 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags) * they can be read with "p $ivor0", "p $ivor1", etc. */ break; + case POWERPC_EXCP_PPE42: + qemu_fprintf(f, "SRR0 " TARGET_FMT_lx " SRR1 " TARGET_FMT_lx "\n", + env->spr[SPR_SRR0], env->spr[SPR_SRR1]); + + qemu_fprintf(f, " TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx + " ISR " TARGET_FMT_lx " EDR " TARGET_FMT_lx "\n", + env->spr[SPR_PPE42_TCR], env->spr[SPR_PPE42_TSR], + env->spr[SPR_PPE42_ISR], env->spr[SPR_PPE42_EDR]); + + qemu_fprintf(f, " PIR " TARGET_FMT_lx " IVPR " TARGET_FMT_lx "\n", + env->spr[SPR_PPE42_PIR], env->spr[SPR_PPE42_IVPR]); + break; case POWERPC_EXCP_40x: qemu_fprintf(f, " TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " ESR " TARGET_FMT_lx " DEAR " TARGET_FMT_lx "\n", diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index c941c89..d8bca19 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -19,11 +19,11 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/log.h" +#include "system/memory.h" #include "system/tcg.h" #include "system/system.h" #include "system/runstate.h" #include "cpu.h" -#include "exec/exec-all.h" #include "internal.h" #include "helper_regs.h" #include "hw/ppc/ppc.h" @@ -949,6 +949,125 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) powerpc_set_excp_state(cpu, vector, new_msr); } +static void powerpc_excp_ppe42(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + target_ulong msr, new_msr, vector; + target_ulong mcs = PPE42_ISR_MCS_INSTRUCTION; + bool promote_unmaskable; + + msr = env->msr; + + /* + * New interrupt handler msr preserves SIBRC and ME unless explicitly + * overridden by the exception. All other MSR bits are zeroed out. + */ + new_msr = env->msr & (((target_ulong)1 << MSR_ME) | R_MSR_SIBRC_MASK); + + /* HV emu assistance interrupt only exists on server arch 2.05 or later */ + if (excp == POWERPC_EXCP_HV_EMU) { + excp = POWERPC_EXCP_PROGRAM; + } + + /* + * Unmaskable interrupts (Program, ISI, Alignment and DSI) are promoted to + * machine check if MSR_UIE is 0. + */ + promote_unmaskable = !(msr & ((target_ulong)1 << MSR_UIE)); + + + switch (excp) { + case POWERPC_EXCP_MCHECK: /* Machine check exception */ + break; + case POWERPC_EXCP_DSI: /* Data storage exception */ + trace_ppc_excp_dsi(env->spr[SPR_PPE42_ISR], env->spr[SPR_PPE42_EDR]); + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_DSI; + } + break; + case POWERPC_EXCP_ISI: /* Instruction storage exception */ + trace_ppc_excp_isi(msr, env->nip); + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_ISI; + } + break; + case POWERPC_EXCP_EXTERNAL: /* External input */ + break; + case POWERPC_EXCP_ALIGN: /* Alignment exception */ + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_ALIGNMENT; + } + break; + case POWERPC_EXCP_PROGRAM: /* Program exception */ + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_PROGRAM; + } + switch (env->error_code & ~0xF) { + case POWERPC_EXCP_INVAL: + trace_ppc_excp_inval(env->nip); + env->spr[SPR_PPE42_ISR] &= ~((target_ulong)1 << PPE42_ISR_PTR); + break; + case POWERPC_EXCP_TRAP: + env->spr[SPR_PPE42_ISR] |= ((target_ulong)1 << PPE42_ISR_PTR); + break; + default: + /* Should never occur */ + cpu_abort(env_cpu(env), "Invalid program exception %d. Aborting\n", + env->error_code); + break; + } +#ifdef CONFIG_TCG + env->spr[SPR_PPE42_EDR] = ppc_ldl_code(env, env->nip); +#endif + break; + case POWERPC_EXCP_DECR: /* Decrementer exception */ + break; + case POWERPC_EXCP_FIT: /* Fixed-interval timer interrupt */ + trace_ppc_excp_print("FIT"); + break; + case POWERPC_EXCP_WDT: /* Watchdog timer interrupt */ + trace_ppc_excp_print("WDT"); + break; + case POWERPC_EXCP_RESET: /* System reset exception */ + /* reset exceptions don't have ME set */ + new_msr &= ~((target_ulong)1 << MSR_ME); + break; + default: + cpu_abort(env_cpu(env), "Invalid PPE42 exception %d. Aborting\n", + excp); + break; + } + + env->spr[SPR_SRR0] = env->nip; + env->spr[SPR_SRR1] = msr; + + vector = env->excp_vectors[excp]; + if (vector == (target_ulong)-1ULL) { + cpu_abort(env_cpu(env), + "Raised an exception without defined vector %d\n", excp); + } + vector |= env->spr[SPR_PPE42_IVPR]; + + if (excp == POWERPC_EXCP_MCHECK) { + /* Also set the Machine Check Status (MCS) */ + env->spr[SPR_PPE42_ISR] &= ~R_PPE42_ISR_MCS_MASK; + env->spr[SPR_PPE42_ISR] |= (mcs & R_PPE42_ISR_MCS_MASK); + env->spr[SPR_PPE42_ISR] &= ~((target_ulong)1 << PPE42_ISR_MFE); + + /* Machine checks halt execution if MSR_ME is 0 */ + powerpc_mcheck_checkstop(env); + + /* machine check exceptions don't have ME set */ + new_msr &= ~((target_ulong)1 << MSR_ME); + } + + powerpc_set_excp_state(cpu, vector, new_msr); +} + static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) { CPUPPCState *env = &cpu->env; @@ -1589,6 +1708,9 @@ void powerpc_excp(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_POWER11: powerpc_excp_books(cpu, excp); break; + case POWERPC_EXCP_PPE42: + powerpc_excp_ppe42(cpu, excp); + break; default: g_assert_not_reached(); } @@ -1945,6 +2067,43 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env, } #endif /* TARGET_PPC64 */ +static int ppe42_next_unmasked_interrupt(CPUPPCState *env) +{ + bool async_deliver; + + /* External reset */ + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; + } + /* Machine check exception */ + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + + async_deliver = FIELD_EX64(env->msr, MSR, EE); + + if (async_deliver != 0) { + /* Watchdog timer */ + if (env->pending_interrupts & PPC_INTERRUPT_WDT) { + return PPC_INTERRUPT_WDT; + } + /* External Interrupt */ + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { + return PPC_INTERRUPT_EXT; + } + /* Fixed interval timer */ + if (env->pending_interrupts & PPC_INTERRUPT_FIT) { + return PPC_INTERRUPT_FIT; + } + /* Decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; + } + } + + return 0; +} + static int ppc_next_unmasked_interrupt(CPUPPCState *env) { uint32_t pending_interrupts = env->pending_interrupts; @@ -1970,6 +2129,10 @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) } #endif + if (env->excp_model == POWERPC_EXCP_PPE42) { + return ppe42_next_unmasked_interrupt(env); + } + /* External reset */ if (pending_interrupts & PPC_INTERRUPT_RESET) { return PPC_INTERRUPT_RESET; diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index d93cfed..850aca6 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "internal.h" #include "fpu/softfloat.h" @@ -563,14 +562,14 @@ uint64_t helper_##op(CPUPPCState *env, float64 arg) \ return ret; \ } -FPU_FCTI(fctiw, int32, 0x80000000U) -FPU_FCTI(fctiwz, int32_round_to_zero, 0x80000000U) -FPU_FCTI(fctiwu, uint32, 0x00000000U) -FPU_FCTI(fctiwuz, uint32_round_to_zero, 0x00000000U) -FPU_FCTI(fctid, int64, 0x8000000000000000ULL) -FPU_FCTI(fctidz, int64_round_to_zero, 0x8000000000000000ULL) -FPU_FCTI(fctidu, uint64, 0x0000000000000000ULL) -FPU_FCTI(fctiduz, uint64_round_to_zero, 0x0000000000000000ULL) +FPU_FCTI(FCTIW, int32, 0x80000000U) +FPU_FCTI(FCTIWZ, int32_round_to_zero, 0x80000000U) +FPU_FCTI(FCTIWU, uint32, 0x00000000U) +FPU_FCTI(FCTIWUZ, uint32_round_to_zero, 0x00000000U) +FPU_FCTI(FCTID, int64, 0x8000000000000000ULL) +FPU_FCTI(FCTIDZ, int64_round_to_zero, 0x8000000000000000ULL) +FPU_FCTI(FCTIDU, uint64, 0x0000000000000000ULL) +FPU_FCTI(FCTIDUZ, uint64_round_to_zero, 0x0000000000000000ULL) #define FPU_FCFI(op, cvtr, is_single) \ uint64_t helper_##op(CPUPPCState *env, uint64_t arg) \ @@ -587,10 +586,10 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg) \ return farg.ll; \ } -FPU_FCFI(fcfid, int64_to_float64, 0) -FPU_FCFI(fcfids, int64_to_float32, 1) -FPU_FCFI(fcfidu, uint64_to_float64, 0) -FPU_FCFI(fcfidus, uint64_to_float32, 1) +FPU_FCFI(FCFID, int64_to_float64, 0) +FPU_FCFI(FCFIDS, int64_to_float32, 1) +FPU_FCFI(FCFIDU, uint64_to_float64, 0) +FPU_FCFI(FCFIDUS, uint64_to_float32, 1) static uint64_t do_fri(CPUPPCState *env, uint64_t arg, FloatRoundMode rounding_mode) @@ -614,22 +613,22 @@ static uint64_t do_fri(CPUPPCState *env, uint64_t arg, return arg; } -uint64_t helper_frin(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIN(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_ties_away); } -uint64_t helper_friz(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIZ(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_to_zero); } -uint64_t helper_frip(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIP(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_up); } -uint64_t helper_frim(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIM(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_down); } @@ -698,7 +697,7 @@ static uint64_t do_frsp(CPUPPCState *env, uint64_t arg, uintptr_t retaddr) return helper_todouble(f32); } -uint64_t helper_frsp(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRSP(CPUPPCState *env, uint64_t arg) { return do_frsp(env, arg, GETPC()); } @@ -872,7 +871,7 @@ uint32_t helper_FTSQRT(uint64_t frb) return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); } -void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2, +void helper_FCMPU(CPUPPCState *env, uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; @@ -903,7 +902,7 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2, } } -void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2, +void helper_FCMPO(CPUPPCState *env, uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ca414f2..e99c8c8 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -94,26 +94,26 @@ DEF_HELPER_2(fpscr_setbit, void, env, i32) DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32) DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64) -DEF_HELPER_4(fcmpo, void, env, i64, i64, i32) -DEF_HELPER_4(fcmpu, void, env, i64, i64, i32) +DEF_HELPER_4(FCMPO, void, env, i64, i64, i32) +DEF_HELPER_4(FCMPU, void, env, i64, i64, i32) -DEF_HELPER_2(fctiw, i64, env, i64) -DEF_HELPER_2(fctiwu, i64, env, i64) -DEF_HELPER_2(fctiwz, i64, env, i64) -DEF_HELPER_2(fctiwuz, i64, env, i64) -DEF_HELPER_2(fcfid, i64, env, i64) -DEF_HELPER_2(fcfidu, i64, env, i64) -DEF_HELPER_2(fcfids, i64, env, i64) -DEF_HELPER_2(fcfidus, i64, env, i64) -DEF_HELPER_2(fctid, i64, env, i64) -DEF_HELPER_2(fctidu, i64, env, i64) -DEF_HELPER_2(fctidz, i64, env, i64) -DEF_HELPER_2(fctiduz, i64, env, i64) -DEF_HELPER_2(frsp, i64, env, i64) -DEF_HELPER_2(frin, i64, env, i64) -DEF_HELPER_2(friz, i64, env, i64) -DEF_HELPER_2(frip, i64, env, i64) -DEF_HELPER_2(frim, i64, env, i64) +DEF_HELPER_2(FCTIW, i64, env, i64) +DEF_HELPER_2(FCTIWU, i64, env, i64) +DEF_HELPER_2(FCTIWZ, i64, env, i64) +DEF_HELPER_2(FCTIWUZ, i64, env, i64) +DEF_HELPER_2(FCFID, i64, env, i64) +DEF_HELPER_2(FCFIDU, i64, env, i64) +DEF_HELPER_2(FCFIDS, i64, env, i64) +DEF_HELPER_2(FCFIDUS, i64, env, i64) +DEF_HELPER_2(FCTID, i64, env, i64) +DEF_HELPER_2(FCTIDU, i64, env, i64) +DEF_HELPER_2(FCTIDZ, i64, env, i64) +DEF_HELPER_2(FCTIDUZ, i64, env, i64) +DEF_HELPER_2(FRSP, i64, env, i64) +DEF_HELPER_2(FRIN, i64, env, i64) +DEF_HELPER_2(FRIZ, i64, env, i64) +DEF_HELPER_2(FRIP, i64, env, i64) +DEF_HELPER_2(FRIM, i64, env, i64) DEF_HELPER_3(FADD, f64, env, f64, f64) DEF_HELPER_3(FADDS, f64, env, f64, f64) diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index f211bc9..a07e6a7 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -27,6 +27,8 @@ #include "power8-pmu.h" #include "cpu-models.h" #include "spr_common.h" +#include "accel/tcg/cpu-ops.h" +#include "internal.h" /* Swap temporary saved registers with GPRs */ void hreg_swap_gpr_tgpr(CPUPPCState *env) @@ -184,6 +186,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env) if (env->spr[SPR_LPCR] & LPCR_HR) { hflags |= 1 << HFLAGS_HR; } + if (unlikely(ppc_flags & POWERPC_FLAG_PPE42)) { + /* PPE42 has a single address space and no problem state */ + msr = 0; + } #ifndef CONFIG_USER_ONLY if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) { @@ -255,26 +261,24 @@ void hreg_update_pmu_hflags(CPUPPCState *env) env->hflags |= hreg_compute_pmu_hflags_value(env); } -#ifdef CONFIG_DEBUG_TCG -void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) +TCGTBCPUState ppc_get_tb_cpu_state(CPUState *cs) { + CPUPPCState *env = cpu_env(cs); uint32_t hflags_current = env->hflags; - uint32_t hflags_rebuilt; - - *pc = env->nip; - *cs_base = 0; - *flags = hflags_current; - hflags_rebuilt = hreg_compute_hflags_value(env); +#ifdef CONFIG_DEBUG_TCG + uint32_t hflags_rebuilt = hreg_compute_hflags_value(env); if (unlikely(hflags_current != hflags_rebuilt)) { cpu_abort(env_cpu(env), "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n", hflags_current, hflags_rebuilt); } -} #endif + return (TCGTBCPUState){ .pc = env->nip, .flags = hflags_current }; +} + +#ifndef CONFIG_USER_ONLY void cpu_interrupt_exittb(CPUState *cs) { /* @@ -286,6 +290,7 @@ void cpu_interrupt_exittb(CPUState *cs) cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); } } +#endif int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) { @@ -307,9 +312,6 @@ int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) value &= ~(1 << MSR_ME); value |= env->msr & (1 << MSR_ME); } - if ((value ^ env->msr) & (R_MSR_IR_MASK | R_MSR_DR_MASK)) { - cpu_interrupt_exittb(cs); - } if ((env->mmu_model == POWERPC_MMU_BOOKE || env->mmu_model == POWERPC_MMU_BOOKE206) && ((value ^ env->msr) & R_MSR_GS_MASK)) { @@ -320,8 +322,14 @@ int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) /* Swap temporary saved registers with GPRs */ hreg_swap_gpr_tgpr(env); } - if (unlikely((value ^ env->msr) & R_MSR_EP_MASK)) { - env->excp_prefix = FIELD_EX64(value, MSR, EP) * 0xFFF00000; + /* PPE42 uses IR, DR and EP MSR bits for other purposes */ + if (likely(!(env->flags & POWERPC_FLAG_PPE42))) { + if ((value ^ env->msr) & (R_MSR_IR_MASK | R_MSR_DR_MASK)) { + cpu_interrupt_exittb(cs); + } + if (unlikely((value ^ env->msr) & R_MSR_EP_MASK)) { + env->excp_prefix = FIELD_EX64(value, MSR, EP) * 0xFFF00000; + } } /* * If PR=1 then EE, IR and DR must be 1 @@ -463,6 +471,23 @@ void register_generic_sprs(PowerPCCPU *cpu) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); + + spr_register(env, SPR_PVR, "PVR", + /* Linux permits userspace to read PVR */ +#if defined(CONFIG_LINUX_USER) + &spr_read_generic, +#else + SPR_NOACCESS, +#endif + SPR_NOACCESS, + &spr_read_generic, SPR_NOACCESS, + pcc->pvr); + + /* PPE42 doesn't support SPRG1-3, SVR or TB regs */ + if (env->insns_flags2 & PPC2_PPE42) { + return; + } + spr_register(env, SPR_SPRG1, "SPRG1", SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, @@ -476,17 +501,6 @@ void register_generic_sprs(PowerPCCPU *cpu) &spr_read_generic, &spr_write_generic, 0x00000000); - spr_register(env, SPR_PVR, "PVR", - /* Linux permits userspace to read PVR */ -#if defined(CONFIG_LINUX_USER) - &spr_read_generic, -#else - SPR_NOACCESS, -#endif - SPR_NOACCESS, - &spr_read_generic, SPR_NOACCESS, - pcc->pvr); - /* Register SVR if it's defined to anything else than POWERPC_SVR_NONE */ if (pcc->svr != POWERPC_SVR_NONE) { if (pcc->svr & POWERPC_SVR_E500) { diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h index 8196c13..b928c2c 100644 --- a/target/ppc/helper_regs.h +++ b/target/ppc/helper_regs.h @@ -20,6 +20,8 @@ #ifndef HELPER_REGS_H #define HELPER_REGS_H +#include "target/ppc/cpu.h" + void hreg_swap_gpr_tgpr(CPUPPCState *env); void hreg_compute_hflags(CPUPPCState *env); void hreg_update_pmu_hflags(CPUPPCState *env); diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index e53fd28..0e9c68f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -58,6 +58,10 @@ %ds_rtp 22:4 !function=times_2 @DS_rtp ...... ....0 ra:5 .............. .. &D rt=%ds_rtp si=%ds_si +%dd_si 3:s13 +&DD rt ra si:int64_t +@DD ...... rt:5 ra:5 ............. . .. &DD si=%dd_si + &DX_b vrt b %dx_b 6:10 16:5 0:1 @DX_b ...... vrt:5 ..... .......... ..... . &DX_b b=%dx_b @@ -66,6 +70,11 @@ %dx_d 6:s10 16:5 0:1 @DX ...... rt:5 ..... .......... ..... . &DX d=%dx_d +%md_sh 1:1 11:5 +%md_mb 5:1 6:5 +&MD rs ra sh mb rc +@MD ...... rs:5 ra:5 ..... ...... ... . rc:1 &MD sh=%md_sh mb=%md_mb + &VA vrt vra vrb rc @VA ...... vrt:5 vra:5 vrb:5 rc:5 ...... &VA @@ -322,6 +331,13 @@ LDUX 011111 ..... ..... ..... 0000110101 - @X LQ 111000 ..... ..... ............ ---- @DQ_rtp +LVD 000101 ..... ..... ................ @D +LVDU 001001 ..... ..... ................ @D +LVDX 011111 ..... ..... ..... 0000010001 - @X +LSKU 111010 ..... ..... ............. 0 11 @DD +LCXU 111010 ..... ..... ............. 1 11 @DD + + ### Fixed-Point Store Instructions STB 100110 ..... ..... ................ @D @@ -346,6 +362,11 @@ STDUX 011111 ..... ..... ..... 0010110101 - @X STQ 111110 ..... ..... ..............10 @DS_rtp +STVDU 010110 ..... ..... ................ @D +STVDX 011111 ..... ..... ..... 0010010001 - @X +STSKU 111110 ..... ..... ............. 0 11 @DD +STCXU 111110 ..... ..... ............. 1 11 @DD + ### Fixed-Point Compare Instructions CMP 011111 ... - . ..... ..... 0000000000 - @X_bfl @@ -461,8 +482,14 @@ PRTYD 011111 ..... ..... ----- 0010111010 - @X_sa BPERMD 011111 ..... ..... ..... 0011111100 - @X CFUGED 011111 ..... ..... ..... 0011011100 - @X -CNTLZDM 011111 ..... ..... ..... 0000111011 - @X -CNTTZDM 011111 ..... ..... ..... 1000111011 - @X +{ + SLVD 011111 ..... ..... ..... 0000111011 . @X_rc + CNTLZDM 011111 ..... ..... ..... 0000111011 - @X +} +{ + SRVD 011111 ..... ..... ..... 1000111011 . @X_rc + CNTTZDM 011111 ..... ..... ..... 1000111011 - @X +} PDEPD 011111 ..... ..... ..... 0010011100 - @X PEXTD 011111 ..... ..... ..... 0010111100 - @X @@ -503,6 +530,17 @@ STFDU 110111 ..... ...... ............... @D STFDX 011111 ..... ...... .... 1011010111 - @X STFDUX 011111 ..... ...... .... 1011110111 - @X +### Floating-Point Move Instructions + +FMR 111111 ..... ----- ..... 0001001000 . @X_tb_rc +FNEG 111111 ..... ----- ..... 0000101000 . @X_tb_rc +FABS 111111 ..... ----- ..... 0100001000 . @X_tb_rc +FNABS 111111 ..... ----- ..... 0010001000 . @X_tb_rc + +FCPSGN 111111 ..... ..... ..... 0000001000 . @X_rc +FMRGEW 111111 ..... ..... ..... 1111000110 - @X +FMRGOW 111111 ..... ..... ..... 1101000110 - @X + ### Floating-Point Arithmetic Instructions FADD 111111 ..... ..... ..... ----- 10101 . @A_tab @@ -541,6 +579,35 @@ FNMADDS 111011 ..... ..... ..... ..... 11111 . @A FNMSUB 111111 ..... ..... ..... ..... 11110 . @A FNMSUBS 111011 ..... ..... ..... ..... 11110 . @A +### Floating-Point Rounding and Conversion Instructions + +FRSP 111111 ..... ----- ..... 0000001100 . @X_tb_rc + +FRIN 111111 ..... ----- ..... 0110001000 . @X_tb_rc +FRIZ 111111 ..... ----- ..... 0110101000 . @X_tb_rc +FRIP 111111 ..... ----- ..... 0111001000 . @X_tb_rc +FRIM 111111 ..... ----- ..... 0111101000 . @X_tb_rc + +FCTIW 111111 ..... ----- ..... 0000001110 . @X_tb_rc +FCTIWU 111111 ..... ----- ..... 0010001110 . @X_tb_rc +FCTIWZ 111111 ..... ----- ..... 0000001111 . @X_tb_rc +FCTIWUZ 111111 ..... ----- ..... 0010001111 . @X_tb_rc + +FCTID 111111 ..... ----- ..... 1100101110 . @X_tb_rc +FCTIDU 111111 ..... ----- ..... 1110101110 . @X_tb_rc +FCTIDZ 111111 ..... ----- ..... 1100101111 . @X_tb_rc +FCTIDUZ 111111 ..... ----- ..... 1110101111 . @X_tb_rc + +FCFID 111111 ..... ----- ..... 1101001110 . @X_tb_rc +FCFIDS 111011 ..... ----- ..... 1101001110 . @X_tb_rc +FCFIDU 111111 ..... ----- ..... 1111001110 . @X_tb_rc +FCFIDUS 111011 ..... ----- ..... 1111001110 . @X_tb_rc + +### Floating-Point Compare Instructions + +FCMPU 111111 ... -- ..... ..... 0000000000 - @X_bf +FCMPO 111111 ... -- ..... ..... 0000100000 - @X_bf + ### Floating-Point Select Instruction FSEL 111111 ..... ..... ..... ..... 10111 . @A @@ -981,8 +1048,16 @@ LXSSP 111001 ..... ..... .............. 11 @DS STXSSP 111101 ..... ..... .............. 11 @DS LXV 111101 ..... ..... ............ . 001 @DQ_TSX STXV 111101 ..... ..... ............ . 101 @DQ_TSX -LXVP 000110 ..... ..... ............ 0000 @DQ_TSXP -STXVP 000110 ..... ..... ............ 0001 @DQ_TSXP + +# STVD PPE instruction overlaps with the LXVP and STXVP instructions +{ + STVD 000110 ..... ..... ................ @D + [ + LXVP 000110 ..... ..... ............ 0000 @DQ_TSXP + STXVP 000110 ..... ..... ............ 0001 @DQ_TSXP + ] +} + LXVX 011111 ..... ..... ..... 0100 - 01100 . @X_TSX STXVX 011111 ..... ..... ..... 0110001100 . @X_TSX LXVPX 011111 ..... ..... ..... 0101001101 - @X_TSXP @@ -1300,3 +1375,26 @@ CLRBHRB 011111 ----- ----- ----- 0110101110 - ## Misc POWER instructions ATTN 000000 00000 00000 00000 0100000000 0 + +# Fused compare-branch instructions for PPE only +%fcb_bdx 1:s10 !function=times_4 +&FCB px:bool ra rb:uint64_t bdx lk:bool +@FCB ...... .. px:1 .. ra:5 rb:5 .......... lk:1 &FCB bdx=%fcb_bdx +&FCB_bix px:bool bix ra rb:uint64_t bdx lk:bool +@FCB_bix ...... .. px:1 bix:2 ra:5 rb:5 .......... lk:1 &FCB_bix bdx=%fcb_bdx + +CMPWBC 000001 00 . .. ..... ..... .......... . @FCB_bix +CMPLWBC 000001 01 . .. ..... ..... .......... . @FCB_bix +CMPWIBC 000001 10 . .. ..... ..... .......... . @FCB_bix +BNBWI 000001 11 . 00 ..... ..... .......... . @FCB +BNBW 000001 11 . 01 ..... ..... .......... . @FCB +CLRBWIBC 000001 11 . 10 ..... ..... .......... . @FCB +CLRBWBC 000001 11 . 11 ..... ..... .......... . @FCB + +# Data Cache Block Query for PPE only +DCBQ 011111 ..... ..... ..... 0110010110 - @X + +# Rotate Doubleword Instructions for PPE only +RLDICL 011110 ..... ..... ..... ...... 000 . . @MD +RLDICR 011110 ..... ..... ..... ...... 001 . . @MD +RLDIMI 011110 ..... ..... ..... ...... 011 . . @MD diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 9012d38..7723350 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -21,6 +21,7 @@ #include "exec/breakpoint.h" #include "hw/registerfields.h" #include "exec/page-protection.h" +#include "accel/tcg/tb-cpu-state.h" /* PM instructions */ typedef enum { @@ -308,4 +309,6 @@ static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk) return msk; } +TCGTBCPUState ppc_get_tb_cpu_state(CPUState *cs); + #endif /* PPC_INTERNAL_H */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 992356c..cd60893 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -41,7 +41,7 @@ #include "trace.h" #include "gdbstub/enums.h" #include "exec/memattrs.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "system/hostmem.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" @@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) } } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -902,7 +907,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; @@ -1332,7 +1337,6 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { - return; } MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) @@ -1350,7 +1354,7 @@ static int kvmppc_handle_halt(PowerPCCPU *cpu) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (!cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && FIELD_EX64(env->msr, MSR, EE)) { cs->halted = 1; cs->exception_index = EXCP_HLT; @@ -2384,7 +2388,7 @@ static bool kvmppc_cpu_realize(CPUState *cs, Error **errp) return true; } -static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) +static void kvmppc_host_cpu_class_init(ObjectClass *oc, const void *data) { PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); @@ -2756,11 +2760,11 @@ int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid, Error **errp) { - struct kvm_get_htab_header *buf; - size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64; + size_t chunksize = sizeof(struct kvm_get_htab_header) + + n_valid * HASH_PTE_SIZE_64; + g_autofree struct kvm_get_htab_header *buf = g_malloc(chunksize); ssize_t rc; - buf = alloca(chunksize); buf->index = index; buf->n_valid = n_valid; buf->n_invalid = n_invalid; @@ -3005,7 +3009,7 @@ void kvm_arch_accel_class_init(ObjectClass *oc) { } -static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +static void kvm_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index a8768c1..a1d9ce9 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -221,7 +221,6 @@ static inline int kvmppc_smt_threads(void) static inline void kvmppc_error_append_smt_possible_hint(Error *const *errp) { - return; } static inline int kvmppc_set_smt_threads(int smt) @@ -259,7 +258,6 @@ static inline target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, static inline void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) { - return; } static inline void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) @@ -456,7 +454,6 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) static inline void kvmppc_check_papr_resize_hpt(Error **errp) { - return; } static inline int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 98df5b4..d72e5ec 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -1,6 +1,5 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "system/kvm.h" #include "system/tcg.h" #include "helper_regs.h" diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 51b137f..6ab71a6 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -19,11 +19,13 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "helper_regs.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/probe.h" #include "internal.h" #include "qemu/atomic128.h" diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 2d9512c..0e625cb 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" #include "exec/helper-proto.h" #include "qemu/error-report.h" @@ -329,62 +328,22 @@ target_ulong helper_load_sprd(CPUPPCState *env) * accessed by powernv machines. */ PowerPCCPU *cpu = env_archcpu(env); - PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; - target_ulong sprc = env->spr[SPR_POWER_SPRC]; - - switch (sprc & 0x3e0) { - case 0: /* SCRATCH0-3 */ - case 1: /* SCRATCH4-7 */ - return pc->scratch[(sprc >> 3) & 0x7]; - - case 0x1e0: /* core thread state */ - if (env->excp_model == POWERPC_EXCP_POWER9) { - /* - * Only implement for POWER9 because skiboot uses it to check - * big-core mode. Other bits are unimplemented so we would - * prefer to get unimplemented message on POWER10 if it were - * used anywhere. - */ - if (pc->big_core) { - return PPC_BIT(63); - } else { - return 0; - } - } - /* fallthru */ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - default: - qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" - TARGET_FMT_lx"\n", sprc); - break; + if (pcc->load_sprd) { + return pcc->load_sprd(env); } + return 0; } void helper_store_sprd(CPUPPCState *env, target_ulong val) { - target_ulong sprc = env->spr[SPR_POWER_SPRC]; PowerPCCPU *cpu = env_archcpu(env); - PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; - int nr; - - switch (sprc & 0x3e0) { - case 0: /* SCRATCH0-3 */ - case 1: /* SCRATCH4-7 */ - /* - * Log stores to SCRATCH, because some firmware uses these for - * debugging and logging, but they would normally be read by the BMC, - * which is not implemented in QEMU yet. This gives a way to get at the - * information. Could also dump these upon checkstop. - */ - nr = (sprc >> 3) & 0x7; - qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr); - pc->scratch[nr] = val; - break; - default: - qemu_log_mask(LOG_UNIMP, "mtSPRD: Unimplemented SPRC:0x" - TARGET_FMT_lx"\n", sprc); - break; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + if (pcc->store_sprd) { + return pcc->store_sprd(env, val); } } diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index a812cb5..3865556 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "system/memory.h" #include "cpu.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 1f791a7..8b980a5 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "system/kvm.h" #include "kvm_ppc.h" #include "internal.h" diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index 2838de0..04c23ea 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -3,6 +3,8 @@ #ifndef CONFIG_USER_ONLY +#include "system/memory.h" + bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, bool guest_visible); diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 5ca4fae..dd33755 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -20,11 +20,11 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "qemu/error-report.h" #include "qemu/qemu-print.h" #include "system/hw_accel.h" +#include "system/memory.h" #include "kvm_ppc.h" #include "mmu-hash64.h" #include "exec/log.h" diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 461eda4..33ac341 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -19,10 +19,10 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "qemu/error-report.h" #include "system/kvm.h" +#include "system/memory.h" #include "kvm_ppc.h" #include "exec/log.h" #include "internal.h" diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index fb62b94..52d4861 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -24,8 +24,8 @@ #include "kvm_ppc.h" #include "mmu-hash64.h" #include "mmu-hash32.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "exec/log.h" #include "helper_regs.h" #include "qemu/error-report.h" diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index ad9ba82..ac60705 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -25,8 +25,8 @@ #include "mmu-hash64.h" #include "mmu-hash32.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "exec/log.h" #include "helper_regs.h" #include "qemu/error-report.h" @@ -36,7 +36,7 @@ #include "mmu-radix64.h" #include "mmu-booke.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" /* #define FLUSH_ALL_TLBS */ diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c index db9ee8e..2a7a5b4 100644 --- a/target/ppc/power8-pmu.c +++ b/target/ppc/power8-pmu.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "helper_regs.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "qemu/error-report.h" #include "qemu/timer.h" diff --git a/target/ppc/ppc-qmp-cmds.c b/target/ppc/ppc-qmp-cmds.c index a25d86a..7022564 100644 --- a/target/ppc/ppc-qmp-cmds.c +++ b/target/ppc/ppc-qmp-cmds.c @@ -28,7 +28,8 @@ #include "qemu/ctype.h" #include "monitor/hmp-target.h" #include "monitor/hmp.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-models.h" #include "cpu-qom.h" @@ -175,6 +176,15 @@ int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) return -EINVAL; } +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void ppc_cpu_defs_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; diff --git a/target/ppc/tcg-excp_helper.c b/target/ppc/tcg-excp_helper.c index 5a189dc..edecfb8 100644 --- a/target/ppc/tcg-excp_helper.c +++ b/target/ppc/tcg-excp_helper.c @@ -19,8 +19,8 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/log.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "target/ppc/cpu.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "system/runstate.h" @@ -229,6 +229,18 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, case POWERPC_MMU_BOOKE206: env->spr[SPR_BOOKE_DEAR] = vaddr; break; + case POWERPC_MMU_REAL: + if (env->flags & POWERPC_FLAG_PPE42) { + env->spr[SPR_PPE42_EDR] = vaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_PPE42_ISR] |= PPE42_ISR_ST; + } else { + env->spr[SPR_PPE42_ISR] &= ~PPE42_ISR_ST; + } + } else { + env->spr[SPR_DAR] = vaddr; + } + break; default: env->spr[SPR_DAR] = vaddr; break; diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index 7312032..7209b41 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -20,7 +20,6 @@ #include "cpu.h" #include "hw/ppc/ppc.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "qemu/log.h" #include "qemu/main-loop.h" diff --git a/target/ppc/translate.c b/target/ppc/translate.c index a52cbc8..d422789 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -21,7 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internal.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "qemu/host-utils.h" @@ -209,6 +209,11 @@ struct DisasContext { #define DISAS_CHAIN DISAS_TARGET_2 /* lookup next tb, pc updated */ #define DISAS_CHAIN_UPDATE DISAS_TARGET_3 /* lookup next tb, pc stale */ +static inline bool is_ppe(const DisasContext *ctx) +{ + return !!(ctx->flags & POWERPC_FLAG_PPE42); +} + /* Return true iff byteswap is needed in a scalar memop */ static inline bool need_byteswap(const DisasContext *ctx) { @@ -556,11 +561,8 @@ void spr_access_nop(DisasContext *ctx, int sprn, int gprn) #endif -/* SPR common to all PowerPC */ -/* XER */ -void spr_read_xer(DisasContext *ctx, int gprn, int sprn) +static void gen_get_xer(DisasContext *ctx, TCGv dst) { - TCGv dst = cpu_gpr[gprn]; TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); TCGv t2 = tcg_temp_new(); @@ -579,9 +581,16 @@ void spr_read_xer(DisasContext *ctx, int gprn, int sprn) } } -void spr_write_xer(DisasContext *ctx, int sprn, int gprn) +/* SPR common to all PowerPC */ +/* XER */ +void spr_read_xer(DisasContext *ctx, int gprn, int sprn) +{ + TCGv dst = cpu_gpr[gprn]; + gen_get_xer(ctx, dst); +} + +static void gen_set_xer(DisasContext *ctx, TCGv src) { - TCGv src = cpu_gpr[gprn]; /* Write all flags, while reading back check for isa300 */ tcg_gen_andi_tl(cpu_xer, src, ~((1u << XER_SO) | @@ -594,6 +603,12 @@ void spr_write_xer(DisasContext *ctx, int sprn, int gprn) tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1); } +void spr_write_xer(DisasContext *ctx, int sprn, int gprn) +{ + TCGv src = cpu_gpr[gprn]; + gen_set_xer(ctx, src); +} + /* LR */ void spr_read_lr(DisasContext *ctx, int gprn, int sprn) { @@ -1745,11 +1760,10 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(ca32, ca); } } else { - TCGv zero = tcg_constant_tl(0); if (add_ca) { - tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero); - tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero); + tcg_gen_addcio_tl(t0, ca, arg1, arg2, ca); } else { + TCGv zero = tcg_constant_tl(0); tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero); } gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0); @@ -1948,11 +1962,9 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(cpu_ca32, cpu_ca); } } else if (add_ca) { - TCGv zero, inv1 = tcg_temp_new(); + TCGv inv1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); - zero = tcg_constant_tl(0); - tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero); - tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero); + tcg_gen_addcio_tl(t0, cpu_ca, arg2, inv1, cpu_ca); gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, cpu_ca32, 0); } else { tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1); @@ -3627,7 +3639,6 @@ static void pmu_count_insns(DisasContext *ctx) #else static void pmu_count_insns(DisasContext *ctx) { - return; } #endif /* #if defined(TARGET_PPC64) */ @@ -4268,8 +4279,10 @@ static void gen_mtmsr(DisasContext *ctx) /* L=1 form only updates EE and RI */ mask &= (1ULL << MSR_RI) | (1ULL << MSR_EE); } else { - /* mtmsr does not alter S, ME, or LE */ - mask &= ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S)); + if (likely(!(ctx->insns_flags2 & PPC2_PPE42))) { + /* mtmsr does not alter S, ME, or LE */ + mask &= ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S)); + } /* * XXX: we need to update nip before the store if we enter @@ -5757,6 +5770,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a) #include "translate/bhrb-impl.c.inc" +#include "translate/ppe-impl.c.inc" + /* Handles lfdp */ static void gen_dform39(DisasContext *ctx) { diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index a66b833..464fb1d 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -98,28 +98,26 @@ static bool do_helper_ac(DisasContext *ctx, arg_A_tac *a, return true; } -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rB(ctx->opcode)); \ - gen_helper_f##name(t1, tcg_env, t0); \ - set_fpr(rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_helper_compute_fprf_float64(tcg_env, t1); \ - } \ - gen_helper_float_check_status(tcg_env); \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ +static bool do_round_convert(DisasContext *ctx, arg_X_tb_rc *a, + void (*helper)(TCGv_i64, TCGv_env, TCGv_i64), + bool set_fprf) +{ + TCGv_i64 t0, t1; + REQUIRE_FPU(ctx); + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_reset_fpstatus(); + get_fpr(t0, a->rb); + helper(t1, tcg_env, t0); + set_fpr(a->rt, t1); + if (set_fprf) { + gen_helper_compute_fprf_float64(tcg_env, t1); + } + gen_helper_float_check_status(tcg_env); + if (unlikely(a->rc)) { + gen_set_cr1_from_fpscr(ctx); + } + return true; } static bool do_helper_bs(DisasContext *ctx, arg_A_tb *a, @@ -213,41 +211,26 @@ TRANS(FSQRT, do_helper_fsqrt, gen_helper_FSQRT); TRANS(FSQRTS, do_helper_fsqrt, gen_helper_FSQRTS); /*** Floating-Point round & convert ***/ -/* fctiw */ -GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT); -/* fctiwu */ -GEN_FLOAT_B(ctiwu, 0x0E, 0x04, 0, PPC2_FP_CVT_ISA206); -/* fctiwz */ -GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT); -/* fctiwuz */ -GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206); -/* frsp */ -GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT); -/* fcfid */ -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC2_FP_CVT_S64); -/* fcfids */ -GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206); -/* fcfidu */ -GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206); -/* fcfidus */ -GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206); -/* fctid */ -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64); -/* fctidu */ -GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206); -/* fctidz */ -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64); -/* fctidu */ -GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206); - -/* frin */ -GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT); -/* friz */ -GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT); -/* frip */ -GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT); -/* frim */ -GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); +TRANS_FLAGS(FLOAT, FRSP, do_round_convert, gen_helper_FRSP, true); +TRANS_FLAGS(FLOAT_EXT, FRIN, do_round_convert, gen_helper_FRIN, true); +TRANS_FLAGS(FLOAT_EXT, FRIZ, do_round_convert, gen_helper_FRIZ, true); +TRANS_FLAGS(FLOAT_EXT, FRIP, do_round_convert, gen_helper_FRIP, true); +TRANS_FLAGS(FLOAT_EXT, FRIM, do_round_convert, gen_helper_FRIM, true); + +TRANS_FLAGS(FLOAT, FCTIW, do_round_convert, gen_helper_FCTIW, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIWU, do_round_convert, gen_helper_FCTIWU, false); +TRANS_FLAGS(FLOAT, FCTIWZ, do_round_convert, gen_helper_FCTIWZ, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIWUZ, do_round_convert, gen_helper_FCTIWUZ, false); + +TRANS_FLAGS2(FP_CVT_S64, FCTID, do_round_convert, gen_helper_FCTID, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIDU, do_round_convert, gen_helper_FCTIDU, false); +TRANS_FLAGS2(FP_CVT_S64, FCTIDZ, do_round_convert, gen_helper_FCTIDZ, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIDUZ, do_round_convert, gen_helper_FCTIDUZ, false); + +TRANS_FLAGS2(FP_CVT_S64, FCFID, do_round_convert, gen_helper_FCFID, true); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDS, do_round_convert, gen_helper_FCFIDS, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDU, do_round_convert, gen_helper_FCFIDU, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDUS, do_round_convert, gen_helper_FCFIDUS, false); static bool trans_FTDIV(DisasContext *ctx, arg_X_bf *a) { @@ -274,183 +257,117 @@ static bool trans_FTSQRT(DisasContext *ctx, arg_X_bf_b *a) } /*** Floating-Point compare ***/ - -/* fcmpo */ -static void gen_fcmpo(DisasContext *ctx) +static bool do_helper_cmp(DisasContext *ctx, arg_X_bf *a, + void (*helper)(TCGv_env, TCGv_i64, TCGv_i64, + TCGv_i32)) { TCGv_i32 crf; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); gen_reset_fpstatus(); - crf = tcg_constant_i32(crfD(ctx->opcode)); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); - gen_helper_fcmpo(tcg_env, t0, t1, crf); + crf = tcg_constant_i32(a->bf); + get_fpr(t0, a->ra); + get_fpr(t1, a->rb); + helper(tcg_env, t0, t1, crf); gen_helper_float_check_status(tcg_env); + return true; } -/* fcmpu */ -static void gen_fcmpu(DisasContext *ctx) -{ - TCGv_i32 crf; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - gen_reset_fpstatus(); - crf = tcg_constant_i32(crfD(ctx->opcode)); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); - gen_helper_fcmpu(tcg_env, t0, t1, crf); - gen_helper_float_check_status(tcg_env); -} +TRANS(FCMPU, do_helper_cmp, gen_helper_FCMPU); +TRANS(FCMPO, do_helper_cmp, gen_helper_FCMPO); /*** Floating-point move ***/ -/* fabs */ -/* XXX: beware that fabs never checks for NaNs nor update FPSCR */ -static void gen_fabs(DisasContext *ctx) -{ - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_andi_i64(t1, t0, ~(1ULL << 63)); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { - gen_set_cr1_from_fpscr(ctx); - } -} /* fmr - fmr. */ /* XXX: beware that fmr never checks for NaNs nor update FPSCR */ -static void gen_fmr(DisasContext *ctx) +static bool trans_FMR(DisasContext *ctx, arg_FMR *a) { TCGv_i64 t0; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - set_fpr(rD(ctx->opcode), t0); - if (unlikely(Rc(ctx->opcode))) { + get_fpr(t0, a->rb); + set_fpr(a->rt, t0); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -/* fnabs */ -/* XXX: beware that fnabs never checks for NaNs nor update FPSCR */ -static void gen_fnabs(DisasContext *ctx) +/* XXX: beware that f{neg, abs, nabs} never checks for NaNs nor update FPSCR */ +static bool do_move_b(DisasContext *ctx, arg_X_tb_rc *a, int64_t val, + void (*tcg_op)(TCGv_i64, TCGv_i64, int64_t)) { - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_ori_i64(t1, t0, 1ULL << 63); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { + get_fpr(t0, a->rb); + tcg_op(t1, t0, val); + set_fpr(a->rt, t1); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -/* fneg */ -/* XXX: beware that fneg never checks for NaNs nor update FPSCR */ -static void gen_fneg(DisasContext *ctx) -{ - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_xori_i64(t1, t0, 1ULL << 63); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { - gen_set_cr1_from_fpscr(ctx); - } -} +TRANS(FNEG, do_move_b, 1ULL << 63, tcg_gen_xori_i64); +TRANS(FABS, do_move_b, ~(1ULL << 63), tcg_gen_andi_i64); +TRANS(FNABS, do_move_b, 1ULL << 63, tcg_gen_ori_i64); /* fcpsgn: PowerPC 2.05 specification */ /* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */ -static void gen_fcpsgn(DisasContext *ctx) +static bool trans_FCPSGN(DisasContext *ctx, arg_FCPSGN *a) { - TCGv_i64 t0; - TCGv_i64 t1; - TCGv_i64 t2; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, ISA205); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); + get_fpr(t0, a->ra); + get_fpr(t1, a->rb); tcg_gen_deposit_i64(t2, t0, t1, 0, 63); - set_fpr(rD(ctx->opcode), t2); - if (unlikely(Rc(ctx->opcode))) { + set_fpr(a->rt, t2); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -static void gen_fmrgew(DisasContext *ctx) +static bool trans_FMRGEW(DisasContext *ctx, arg_FMRGEW *a) { - TCGv_i64 b0; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - b0 = tcg_temp_new_i64(); + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, VSX207); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_shri_i64(b0, t0, 32); - get_fpr(t0, rA(ctx->opcode)); - tcg_gen_deposit_i64(t1, t0, b0, 0, 32); - set_fpr(rD(ctx->opcode), t1); + t2 = tcg_temp_new_i64(); + get_fpr(t1, a->rb); + tcg_gen_shri_i64(t0, t1, 32); + get_fpr(t1, a->ra); + tcg_gen_deposit_i64(t2, t1, t0, 0, 32); + set_fpr(a->rt, t2); + return true; } -static void gen_fmrgow(DisasContext *ctx) +static bool trans_FMRGOW(DisasContext *ctx, arg_FMRGOW *a) { - TCGv_i64 t0; - TCGv_i64 t1; - TCGv_i64 t2; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, VSX207); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - get_fpr(t1, rA(ctx->opcode)); + get_fpr(t0, a->rb); + get_fpr(t1, a->ra); tcg_gen_deposit_i64(t2, t0, t1, 32, 32); - set_fpr(rD(ctx->opcode), t2); + set_fpr(a->rt, t2); + return true; } /*** Floating-Point status & ctrl register ***/ @@ -479,7 +396,7 @@ static void gen_mcrfs(DisasContext *ctx) tcg_gen_extu_tl_i64(tnew_fpscr, cpu_fpscr); /* Only the exception bits (including FX) should be cleared if read */ tcg_gen_andi_i64(tnew_fpscr, tnew_fpscr, - ~((0xF << shift) & FP_EX_CLEAR_BITS)); + ~(MAKE_64BIT_MASK(shift, 4) & FP_EX_CLEAR_BITS)); /* FEX and VX need to be updated, so don't set fpscr directly */ tmask = tcg_constant_i32(1 << nibble); gen_helper_store_fpscr(tcg_env, tnew_fpscr, tmask); @@ -1051,8 +968,6 @@ TRANS(STFDX, do_lsfp_X, false, true, false) TRANS(STFDUX, do_lsfp_X, true, true, false) TRANS(PSTFD, do_lsfp_PLS_D, false, true, false) -#undef GEN_FLOAT_B - #undef GEN_LDF #undef GEN_LDUF #undef GEN_LDUXF diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc index cef4b5d..9bc9c3a 100644 --- a/target/ppc/translate/fp-ops.c.inc +++ b/target/ppc/translate/fp-ops.c.inc @@ -1,24 +1,3 @@ -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -GEN_HANDLER(f##name, 0x3F, op2, op3, 0x001F0000, type) - -GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT), -GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT), -GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT), -GEN_HANDLER_E(fcfid, 0x3F, 0x0E, 0x1A, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fctidz, 0x3F, 0x0F, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT), - GEN_HANDLER_E(lfdepx, 0x1F, 0x1F, 0x12, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206), @@ -31,15 +10,6 @@ GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX) GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205), -GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT), -GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT), -GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT), -GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205), -GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207), -GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207), GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT), GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT), diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ppe-impl.c.inc new file mode 100644 index 0000000..0a05903 --- /dev/null +++ b/target/ppc/translate/ppe-impl.c.inc @@ -0,0 +1,609 @@ +/* + * IBM PPE Instructions + * + * Copyright (c) 2025, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +static bool vdr_is_valid(uint32_t vdr) +{ + const uint32_t valid_bitmap = 0xf00003ff; + return !!((1ul << (vdr & 0x1f)) & valid_bitmap); +} + +static bool ppe_gpr_is_valid(uint32_t reg) +{ + const uint32_t valid_bitmap = 0xf00027ff; + return !!((1ul << (reg & 0x1f)) & valid_bitmap); +} + +#define CHECK_VDR(CTX, VDR) \ + do { \ + if (unlikely(!vdr_is_valid(VDR))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +#define CHECK_PPE_GPR(CTX, REG) \ + do { \ + if (unlikely(!ppe_gpr_is_valid(REG))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f) + +#define CHECK_PPE_LEVEL(CTX, LVL) \ + do { \ + if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a) +{ + int i; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + const uint8_t vd_list[] = {9, 7, 5, 3, 0}; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || (a->si < 0xB))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + + tcg_gen_addi_tl(base, cpu_gpr[a->ra], a->si * 8); + gen_store_spr(SPR_PPE42_EDR, base); + + t8 = tcg_temp_new_i64(); + + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[31], cpu_gpr[30], t8); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[29], cpu_gpr[28], t8); + + lo = tcg_temp_new(); + hi = tcg_temp_new(); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + gen_store_spr(SPR_SRR0, hi); + gen_store_spr(SPR_SRR1, lo); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + gen_set_xer(ctx, hi); + tcg_gen_mov_tl(cpu_ctr, lo); + + for (i = 0; i < sizeof(vd_list); i++) { + int vd = vd_list[i]; + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd], t8); + } + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + tcg_gen_shri_tl(hi, hi, 28); + tcg_gen_trunc_tl_i32(cpu_crf[0], hi); + gen_store_spr(SPR_SPRG0, lo); + + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], base); + return true; +} + +static bool trans_LSKU(DisasContext *ctx, arg_LSKU *a) +{ + int64_t n; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || + (a->si & PPC_BIT(0)) || (a->si == 0))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + gen_addr_register(ctx, base); + + + tcg_gen_addi_tl(base, base, a->si * 8); + gen_store_spr(SPR_PPE42_EDR, base); + + n = a->si - 1; + t8 = tcg_temp_new_i64(); + if (n > 0) { + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + hi = cpu_gpr[30]; + lo = cpu_gpr[31]; + tcg_gen_extr_i64_tl(lo, hi, t8); + } + if (n > 1) { + tcg_gen_addi_tl(EA, base, -16); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + hi = cpu_gpr[28]; + lo = cpu_gpr[29]; + tcg_gen_extr_i64_tl(lo, hi, t8); + } + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], base); + return true; +} + +static bool trans_STCXU(DisasContext *ctx, arg_STCXU *a) +{ + TCGv EA; + TCGv lo, hi; + TCGv_i64 t8; + int i; + const uint8_t vd_list[] = {9, 7, 5, 3, 0}; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], 4); + tcg_gen_qemu_st_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + + gen_store_spr(SPR_PPE42_EDR, cpu_gpr[a->ra]); + + t8 = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(t8, cpu_gpr[31], cpu_gpr[30]); + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + tcg_gen_concat_tl_i64(t8, cpu_gpr[29], cpu_gpr[28]); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + lo = tcg_temp_new(); + hi = tcg_temp_new(); + + gen_load_spr(hi, SPR_SRR0); + gen_load_spr(lo, SPR_SRR1); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + gen_get_xer(ctx, hi); + tcg_gen_mov_tl(lo, cpu_ctr); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + for (i = 0; i < sizeof(vd_list); i++) { + int vd = vd_list[i]; + tcg_gen_concat_tl_i64(t8, cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd]); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + + gen_load_spr(lo, SPR_SPRG0); + tcg_gen_extu_i32_tl(hi, cpu_crf[0]); + tcg_gen_shli_tl(hi, hi, 28); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], a->si * 8); + tcg_gen_qemu_st_tl(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) | + MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], EA); + return true; +} + +static bool trans_STSKU(DisasContext *ctx, arg_STSKU *a) +{ + int64_t n; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + gen_addr_register(ctx, base); + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_st_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + + gen_store_spr(SPR_PPE42_EDR, base); + + n = ~(a->si); + + t8 = tcg_temp_new_i64(); + if (n > 0) { + hi = cpu_gpr[30]; + lo = cpu_gpr[31]; + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + if (n > 1) { + hi = cpu_gpr[28]; + lo = cpu_gpr[29]; + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, base, -16); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + + tcg_gen_addi_tl(EA, base, a->si * 8); + tcg_gen_qemu_st_tl(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) | + MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], EA); + return true; +} + +static bool do_ppe_ldst(DisasContext *ctx, int rt, int ra, TCGv disp, + bool update, bool store) +{ + TCGv ea; + int rt_lo; + TCGv_i64 t8; + + CHECK_VDR(ctx, rt); + CHECK_PPE_GPR(ctx, ra); + rt_lo = VDR_PAIR_REG(rt); + if (update && (ra == 0 || (!store && ((ra == rt) || (ra == rt_lo))))) { + gen_invalid(ctx); + return true; + } + gen_set_access_type(ctx, ACCESS_INT); + + ea = do_ea_calc(ctx, ra, disp); + t8 = tcg_temp_new_i64(); + if (store) { + tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[rt]); + tcg_gen_qemu_st_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64)); + } else { + tcg_gen_qemu_ld_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64)); + tcg_gen_extr_i64_tl(cpu_gpr[rt_lo], cpu_gpr[rt], t8); + } + if (update) { + tcg_gen_mov_tl(cpu_gpr[ra], ea); + } + return true; +} + +static bool do_ppe_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + return do_ppe_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update, + store); +} + +static bool do_ppe_ldst_X(DisasContext *ctx, arg_X *a, bool store) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->rb); + return do_ppe_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], false, store); +} + +TRANS(LVD, do_ppe_ldst_D, false, false) +TRANS(LVDU, do_ppe_ldst_D, true, false) +TRANS(STVD, do_ppe_ldst_D, false, true) +TRANS(STVDU, do_ppe_ldst_D, true, true) +TRANS(LVDX, do_ppe_ldst_X, false) +TRANS(STVDX, do_ppe_ldst_X, true) + + +static bool do_fcb(DisasContext *ctx, TCGv ra_val, TCGv rb_val, int bix, + int32_t bdx, bool s, bool px, bool lk) +{ + TCGCond cond; + uint32_t mask; + TCGLabel *no_branch; + target_ulong dest; + + /* Update CR0 */ + gen_op_cmp32(ra_val, rb_val, s, 0); + + if (lk) { + gen_setlr(ctx, ctx->base.pc_next); + } + + + mask = PPC_BIT32(28 + bix); + cond = (px) ? TCG_COND_TSTEQ : TCG_COND_TSTNE; + no_branch = gen_new_label(); + dest = ctx->cia + bdx; + + /* Do the branch if CR0[bix] == PX */ + tcg_gen_brcondi_i32(cond, cpu_crf[0], mask, no_branch); + gen_goto_tb(ctx, 0, dest); + gen_set_label(no_branch); + gen_goto_tb(ctx, 1, ctx->base.pc_next); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} + +static bool do_cmp_branch(DisasContext *ctx, arg_FCB_bix *a, bool s, + bool rb_is_gpr) +{ + TCGv old_ra; + TCGv rb_val; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->ra); + if (rb_is_gpr) { + CHECK_PPE_GPR(ctx, a->rb); + rb_val = cpu_gpr[a->rb]; + } else { + rb_val = tcg_constant_tl(a->rb); + } + if (a->bix == 3) { + old_ra = tcg_temp_new(); + tcg_gen_mov_tl(old_ra, cpu_gpr[a->ra]); + tcg_gen_sub_tl(cpu_gpr[a->ra], cpu_gpr[a->ra], rb_val); + return do_fcb(ctx, old_ra, rb_val, 2, + a->bdx, s, a->px, a->lk); + } else { + return do_fcb(ctx, cpu_gpr[a->ra], rb_val, a->bix, + a->bdx, s, a->px, a->lk); + } +} + +TRANS(CMPWBC, do_cmp_branch, true, true) +TRANS(CMPLWBC, do_cmp_branch, false, true) +TRANS(CMPWIBC, do_cmp_branch, true, false) + +static bool do_mask_branch(DisasContext *ctx, arg_FCB * a, bool invert, + bool update, bool rb_is_gpr) +{ + TCGv r; + TCGv mask, shift; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->ra); + if (rb_is_gpr) { + CHECK_PPE_GPR(ctx, a->rb); + mask = tcg_temp_new(); + shift = tcg_temp_new(); + tcg_gen_andi_tl(shift, cpu_gpr[a->rb], 0x1f); + tcg_gen_shr_tl(mask, tcg_constant_tl(0x80000000), shift); + } else { + mask = tcg_constant_tl(PPC_BIT32(a->rb)); + } + if (invert) { + tcg_gen_not_tl(mask, mask); + } + + /* apply mask to ra */ + r = tcg_temp_new(); + tcg_gen_and_tl(r, cpu_gpr[a->ra], mask); + if (update) { + tcg_gen_mov_tl(cpu_gpr[a->ra], r); + } + return do_fcb(ctx, r, tcg_constant_tl(0), 2, + a->bdx, false, a->px, a->lk); +} + +TRANS(BNBWI, do_mask_branch, false, false, false) +TRANS(BNBW, do_mask_branch, false, false, true) +TRANS(CLRBWIBC, do_mask_branch, true, true, false) +TRANS(CLRBWBC, do_mask_branch, true, true, true) + +static void gen_set_Rc0_i64(DisasContext *ctx, TCGv_i64 reg) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_movi_i64(t0, CRF_EQ); + tcg_gen_movi_i64(t1, CRF_LT); + tcg_gen_movcond_i64(TCG_COND_LT, t0, reg, tcg_constant_i64(0), t1, t0); + tcg_gen_movi_i64(t1, CRF_GT); + tcg_gen_movcond_i64(TCG_COND_GT, t0, reg, tcg_constant_i64(0), t1, t0); + tcg_gen_extrl_i64_i32(t, t0); + tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], t); +} + +static bool do_shift64(DisasContext *ctx, arg_X_rc *a, bool left) +{ + int rt_lo, ra_lo; + TCGv_i64 t0, t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rt); + CHECK_VDR(ctx, a->ra); + CHECK_PPE_GPR(ctx, a->rb); + rt_lo = VDR_PAIR_REG(a->rt); + ra_lo = VDR_PAIR_REG(a->ra); + t8 = tcg_temp_new_i64(); + + /* AND rt with a mask that is 0 when rb >= 0x40 */ + t0 = tcg_temp_new_i64(); + tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]); + tcg_gen_shli_i64(t0, t0, 0x39); + tcg_gen_sari_i64(t0, t0, 0x3f); + + /* form 64bit value from two 32bit regs */ + tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[a->rt]); + + /* apply mask */ + tcg_gen_andc_i64(t8, t8, t0); + + /* do the shift */ + tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]); + tcg_gen_andi_i64(t0, t0, 0x3f); + if (left) { + tcg_gen_shl_i64(t8, t8, t0); + } else { + tcg_gen_shr_i64(t8, t8, t0); + } + + /* split the 64bit word back into two 32bit regs */ + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8); + + /* update CR0 if requested */ + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t8); + } + return true; +} + +TRANS(SRVD, do_shift64, false) +TRANS(SLVD, do_shift64, true) + +static bool trans_DCBQ(DisasContext *ctx, arg_DCBQ * a) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_GPR(ctx, a->rt); + CHECK_PPE_GPR(ctx, a->ra); + CHECK_PPE_GPR(ctx, a->rb); + + /* No cache exists, so just set RT to 0 */ + tcg_gen_movi_tl(cpu_gpr[a->rt], 0); + return true; +} + +static bool trans_RLDIMI(DisasContext *ctx, arg_RLDIMI *a) +{ + TCGv_i64 t_rs, t_ra; + int ra_lo, rs_lo; + uint32_t sh = a->sh; + uint32_t mb = a->mb; + uint32_t me = 63 - sh; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rs); + CHECK_VDR(ctx, a->ra); + + rs_lo = VDR_PAIR_REG(a->rs); + ra_lo = VDR_PAIR_REG(a->ra); + + t_rs = tcg_temp_new_i64(); + t_ra = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(t_rs, cpu_gpr[rs_lo], cpu_gpr[a->rs]); + tcg_gen_concat_tl_i64(t_ra, cpu_gpr[ra_lo], cpu_gpr[a->ra]); + + if (mb <= me) { + tcg_gen_deposit_i64(t_ra, t_ra, t_rs, sh, me - mb + 1); + } else { + uint64_t mask = mask_u64(mb, me); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_rotli_i64(t1, t_rs, sh); + tcg_gen_andi_i64(t1, t1, mask); + tcg_gen_andi_i64(t_ra, t_ra, ~mask); + tcg_gen_or_i64(t_ra, t_ra, t1); + } + + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t_ra); + + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t_ra); + } + return true; +} + + +static bool gen_rldinm_i64(DisasContext *ctx, arg_MD *a, int mb, int me, int sh) +{ + int len = me - mb + 1; + int rsh = (64 - sh) & 63; + int ra_lo, rs_lo; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rs); + CHECK_VDR(ctx, a->ra); + + rs_lo = VDR_PAIR_REG(a->rs); + ra_lo = VDR_PAIR_REG(a->ra); + t8 = tcg_temp_new_i64(); + tcg_gen_concat_tl_i64(t8, cpu_gpr[rs_lo], cpu_gpr[a->rs]); + if (sh != 0 && len > 0 && me == (63 - sh)) { + tcg_gen_deposit_z_i64(t8, t8, sh, len); + } else if (me == 63 && rsh + len <= 64) { + tcg_gen_extract_i64(t8, t8, rsh, len); + } else { + tcg_gen_rotli_i64(t8, t8, sh); + tcg_gen_andi_i64(t8, t8, mask_u64(mb, me)); + } + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8); + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t8); + } + return true; +} + +TRANS(RLDICL, gen_rldinm_i64, a->mb, 63, a->sh) +TRANS(RLDICR, gen_rldinm_i64, 0, a->mb, a->sh) diff --git a/target/ppc/user_only_helper.c b/target/ppc/user_only_helper.c index a4d07a0..ae210eb 100644 --- a/target/ppc/user_only_helper.c +++ b/target/ppc/user_only_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "internal.h" void ppc_cpu_record_sigsegv(CPUState *cs, vaddr address, diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/bitmanip_helper.c index b99c4a3..e9c8d7f 100644 --- a/target/riscv/bitmanip_helper.c +++ b/target/riscv/bitmanip_helper.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" +#include "exec/target_long.h" #include "exec/helper-proto.h" #include "tcg/tcg.h" diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.c index 7c8a59e..aeaeb88 100644 --- a/target/riscv/common-semi-target.h +++ b/target/riscv/common-semi-target.c @@ -8,43 +8,42 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H -#define TARGET_RISCV_COMMON_SEMI_TARGET_H +#include "qemu/osdep.h" +#include "cpu.h" +#include "semihosting/common-semi.h" -static inline target_ulong common_semi_arg(CPUState *cs, int argno) +uint64_t common_semi_arg(CPUState *cs, int argno) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; return env->gpr[xA0 + argno]; } -static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +void common_semi_set_ret(CPUState *cs, uint64_t ret) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; env->gpr[xA0] = ret; } -static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +bool is_64bit_semihosting(CPUArchState *env) { - return (nr == TARGET_SYS_EXIT_EXTENDED || sizeof(target_ulong) == 8); + return riscv_cpu_mxl(env) != MXL_RV32; } -static inline bool is_64bit_semihosting(CPUArchState *env) +bool common_semi_sys_exit_is_extended(CPUState *cs) { - return riscv_cpu_mxl(env) != MXL_RV32; + return is_64bit_semihosting(cpu_env(cs)); } -static inline target_ulong common_semi_stack_bottom(CPUState *cs) +uint64_t common_semi_stack_bottom(CPUState *cs) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; return env->gpr[xSP]; } -static inline bool common_semi_has_synccache(CPUArchState *env) +bool common_semi_has_synccache(CPUArchState *env) { return true; } - -#endif diff --git a/target/riscv/cpu-param.h b/target/riscv/cpu-param.h index fba30e9..cfdc67c 100644 --- a/target/riscv/cpu-param.h +++ b/target/riscv/cpu-param.h @@ -16,6 +16,14 @@ # define TARGET_VIRT_ADDR_SPACE_BITS 32 /* sv32 */ #endif #define TARGET_PAGE_BITS 12 /* 4 KiB Pages */ + +/* + * RISC-V-specific extra insn start words: + * 1: Original instruction opcode + * 2: more information about instruction + */ +#define TARGET_INSN_START_EXTRA_WORDS 2 + /* * The current MMU Modes are: * - U mode 0b000 @@ -26,6 +34,4 @@ * - M mode HLV/HLVX/HSV 0b111 */ -#define TCG_GUEST_DEFAULT_MO 0 - #endif diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 4cfdb74..75f4e43 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -44,15 +44,18 @@ #define TYPE_RISCV_CPU_RVA23S64 RISCV_CPU_TYPE_NAME("rva23s64") #define TYPE_RISCV_CPU_IBEX RISCV_CPU_TYPE_NAME("lowrisc-ibex") #define TYPE_RISCV_CPU_SHAKTI_C RISCV_CPU_TYPE_NAME("shakti-c") +#define TYPE_RISCV_CPU_SIFIVE_E RISCV_CPU_TYPE_NAME("sifive-e") #define TYPE_RISCV_CPU_SIFIVE_E31 RISCV_CPU_TYPE_NAME("sifive-e31") #define TYPE_RISCV_CPU_SIFIVE_E34 RISCV_CPU_TYPE_NAME("sifive-e34") #define TYPE_RISCV_CPU_SIFIVE_E51 RISCV_CPU_TYPE_NAME("sifive-e51") +#define TYPE_RISCV_CPU_SIFIVE_U RISCV_CPU_TYPE_NAME("sifive-u") #define TYPE_RISCV_CPU_SIFIVE_U34 RISCV_CPU_TYPE_NAME("sifive-u34") #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") #define TYPE_RISCV_CPU_THEAD_C906 RISCV_CPU_TYPE_NAME("thead-c906") #define TYPE_RISCV_CPU_VEYRON_V1 RISCV_CPU_TYPE_NAME("veyron-v1") #define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_XIANGSHAN_NANHU RISCV_CPU_TYPE_NAME("xiangshan-nanhu") +#define TYPE_RISCV_CPU_XIANGSHAN_KMH RISCV_CPU_TYPE_NAME("xiangshan-kunminghu") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 09ded68..a877018 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -24,7 +24,6 @@ #include "cpu.h" #include "cpu_vendorid.h" #include "internals.h" -#include "exec/exec-all.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "qemu/error-report.h" @@ -74,6 +73,13 @@ bool riscv_cpu_option_set(const char *optname) return g_hash_table_contains(general_user_opts, optname); } +static void riscv_cpu_cfg_merge(RISCVCPUConfig *dest, const RISCVCPUConfig *src) +{ +#define BOOL_FIELD(x) dest->x |= src->x; +#define TYPED_FIELD(type, x, default_) if (src->x != default_) dest->x = src->x; +#include "cpu_cfg_fields.h.inc" +} + #define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \ {#_name, _min_ver, CPU_CFG_OFFSET(_prop)} @@ -121,8 +127,8 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo), ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_13_0, ext_zabha), ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), - ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc), + ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), @@ -183,6 +189,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), + ISA_EXT_DATA_ENTRY(sdtrig, PRIV_VERSION_1_12_0, debug), ISA_EXT_DATA_ENTRY(shcounterenw, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sha, PRIV_VERSION_1_12_0, ext_sha), ISA_EXT_DATA_ENTRY(shgatpa, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -210,6 +217,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ssnpm, PRIV_VERSION_1_13_0, ext_ssnpm), ISA_EXT_DATA_ENTRY(sspm, PRIV_VERSION_1_13_0, ext_sspm), ISA_EXT_DATA_ENTRY(ssstateen, PRIV_VERSION_1_12_0, ext_ssstateen), + ISA_EXT_DATA_ENTRY(ssstrict, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc), ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -222,6 +230,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval), ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot), ISA_EXT_DATA_ENTRY(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt), + ISA_EXT_DATA_ENTRY(svrsw60t59b, PRIV_VERSION_1_13_0, ext_svrsw60t59b), ISA_EXT_DATA_ENTRY(svukte, PRIV_VERSION_1_13_0, ext_svukte), ISA_EXT_DATA_ENTRY(svvptc, PRIV_VERSION_1_13_0, ext_svvptc), ISA_EXT_DATA_ENTRY(xtheadba, PRIV_VERSION_1_11_0, ext_xtheadba), @@ -357,7 +366,7 @@ void riscv_cpu_set_misa_ext(CPURISCVState *env, uint32_t ext) int riscv_cpu_max_xlen(RISCVCPUClass *mcc) { - return 16 << mcc->misa_mxl_max; + return 16 << mcc->def->misa_mxl_max; } #ifndef CONFIG_USER_ONLY @@ -390,7 +399,7 @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) g_assert_not_reached(); } -uint8_t satp_mode_max_from_map(uint32_t map) +static uint8_t satp_mode_max_from_map(uint32_t map) { /* * 'map = 0' will make us return (31 - 32), which C will @@ -434,17 +443,23 @@ const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit) g_assert_not_reached(); } -static void set_satp_mode_max_supported(RISCVCPU *cpu, - uint8_t satp_mode) +static bool get_satp_mode_supported(RISCVCPU *cpu, uint16_t *supported) { - bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; + bool rv32 = riscv_cpu_is_32bit(cpu); const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64; + int satp_mode = cpu->cfg.max_satp_mode; + + if (satp_mode == -1) { + return false; + } + *supported = 0; for (int i = 0; i <= satp_mode; ++i) { if (valid_vm[i]) { - cpu->cfg.satp_mode.supported |= (1 << i); + *supported |= (1 << i); } } + return true; } /* Set the satp mode to the max supported */ @@ -453,382 +468,26 @@ static void set_satp_mode_default_map(RISCVCPU *cpu) /* * Bare CPUs do not default to the max available. * Users must set a valid satp_mode in the command - * line. + * line. Otherwise, leave the existing max_satp_mode + * in place. */ if (object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_BARE_CPU) != NULL) { warn_report("No satp mode set. Defaulting to 'bare'"); - cpu->cfg.satp_mode.map = (1 << VM_1_10_MBARE); - return; + cpu->cfg.max_satp_mode = VM_1_10_MBARE; } - - cpu->cfg.satp_mode.map = cpu->cfg.satp_mode.supported; -} -#endif - -static void riscv_max_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), - riscv_cpu_mxl(&RISCV_CPU(obj)->env) == MXL_RV32 ? - VM_1_10_SV32 : VM_1_10_SV57); -#endif -} - -#if defined(TARGET_RISCV64) -static void rv64_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); -#endif -} - -static void rv64_sifive_u_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV39); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; } - -static void rv64_sifive_e_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); #endif - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv64_thead_c906_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_11_0; - - cpu->cfg.ext_zfa = true; - cpu->cfg.ext_zfh = true; - cpu->cfg.mmu = true; - cpu->cfg.ext_xtheadba = true; - cpu->cfg.ext_xtheadbb = true; - cpu->cfg.ext_xtheadbs = true; - cpu->cfg.ext_xtheadcmo = true; - cpu->cfg.ext_xtheadcondmov = true; - cpu->cfg.ext_xtheadfmemidx = true; - cpu->cfg.ext_xtheadmac = true; - cpu->cfg.ext_xtheadmemidx = true; - cpu->cfg.ext_xtheadmempair = true; - cpu->cfg.ext_xtheadsync = true; - - cpu->cfg.mvendorid = THEAD_VENDOR_ID; #ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV39); - th_register_custom_csrs(cpu); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.pmp = true; -} - -static void rv64_veyron_v1_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH); - env->priv_ver = PRIV_VERSION_1_12_0; - - /* Enable ISA extensions */ - cpu->cfg.mmu = true; - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; - cpu->cfg.ext_zicbom = true; - cpu->cfg.cbom_blocksize = 64; - cpu->cfg.cboz_blocksize = 64; - cpu->cfg.ext_zicboz = true; - cpu->cfg.ext_smaia = true; - cpu->cfg.ext_ssaia = true; - cpu->cfg.ext_sscofpmf = true; - cpu->cfg.ext_sstc = true; - cpu->cfg.ext_svinval = true; - cpu->cfg.ext_svnapot = true; - cpu->cfg.ext_svpbmt = true; - cpu->cfg.ext_smstateen = true; - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbs = true; - cpu->cfg.ext_XVentanaCondOps = true; - - cpu->cfg.mvendorid = VEYRON_V1_MVENDORID; - cpu->cfg.marchid = VEYRON_V1_MARCHID; - cpu->cfg.mimpid = VEYRON_V1_MIMPID; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV48); -#endif -} - -/* Tenstorrent Ascalon */ -static void rv64_tt_ascalon_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV); - env->priv_ver = PRIV_VERSION_1_13_0; - - /* Enable ISA extensions */ - cpu->cfg.mmu = true; - cpu->cfg.vlenb = 256 >> 3; - cpu->cfg.elen = 64; - cpu->env.vext_ver = VEXT_VERSION_1_00_0; - cpu->cfg.rvv_ma_all_1s = true; - cpu->cfg.rvv_ta_all_1s = true; - cpu->cfg.misa_w = true; - cpu->cfg.pmp = true; - cpu->cfg.cbom_blocksize = 64; - cpu->cfg.cbop_blocksize = 64; - cpu->cfg.cboz_blocksize = 64; - cpu->cfg.ext_zic64b = true; - cpu->cfg.ext_zicbom = true; - cpu->cfg.ext_zicbop = true; - cpu->cfg.ext_zicboz = true; - cpu->cfg.ext_zicntr = true; - cpu->cfg.ext_zicond = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zihintntl = true; - cpu->cfg.ext_zihintpause = true; - cpu->cfg.ext_zihpm = true; - cpu->cfg.ext_zimop = true; - cpu->cfg.ext_zawrs = true; - cpu->cfg.ext_zfa = true; - cpu->cfg.ext_zfbfmin = true; - cpu->cfg.ext_zfh = true; - cpu->cfg.ext_zfhmin = true; - cpu->cfg.ext_zcb = true; - cpu->cfg.ext_zcmop = true; - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbs = true; - cpu->cfg.ext_zkt = true; - cpu->cfg.ext_zvbb = true; - cpu->cfg.ext_zvbc = true; - cpu->cfg.ext_zvfbfmin = true; - cpu->cfg.ext_zvfbfwma = true; - cpu->cfg.ext_zvfh = true; - cpu->cfg.ext_zvfhmin = true; - cpu->cfg.ext_zvkng = true; - cpu->cfg.ext_smaia = true; - cpu->cfg.ext_smstateen = true; - cpu->cfg.ext_ssaia = true; - cpu->cfg.ext_sscofpmf = true; - cpu->cfg.ext_sstc = true; - cpu->cfg.ext_svade = true; - cpu->cfg.ext_svinval = true; - cpu->cfg.ext_svnapot = true; - cpu->cfg.ext_svpbmt = true; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV57); -#endif -} - -static void rv64_xiangshan_nanhu_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVB | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_12_0; - - /* Enable ISA extensions */ - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbkb = true; - cpu->cfg.ext_zbkc = true; - cpu->cfg.ext_zbkx = true; - cpu->cfg.ext_zknd = true; - cpu->cfg.ext_zkne = true; - cpu->cfg.ext_zknh = true; - cpu->cfg.ext_zksed = true; - cpu->cfg.ext_zksh = true; - cpu->cfg.ext_svinval = true; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV39); -#endif -} - -#ifdef CONFIG_TCG -static void rv128_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); -#endif -} -#endif /* CONFIG_TCG */ - -static void rv64i_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVI); -} - -static void rv64e_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVE); -} - -#endif /* !TARGET_RISCV64 */ - -#if defined(TARGET_RISCV32) || \ - (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - -static void rv32_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); -#endif -} - -static void rv32_sifive_u_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; -} - -static void rv32_sifive_e_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv32_ibex_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_12_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; - cpu->cfg.ext_smepmp = true; - - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbs = true; -} - -static void rv32_imafcu_nommu_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv32i_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVI); -} - -static void rv32e_bare_cpu_init(Object *obj) +static void riscv_register_custom_csrs(RISCVCPU *cpu, const RISCVCSR *csr_list) { - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVE); + for (size_t i = 0; csr_list[i].csr_ops.name; i++) { + int csrno = csr_list[i].csrno; + const riscv_csr_operations *csr_ops = &csr_list[i].csr_ops; + if (!csr_list[i].insertion_test || csr_list[i].insertion_test(cpu)) { + riscv_set_csr_ops(csrno, csr_ops); + } + } } #endif @@ -945,7 +604,7 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } } - if (riscv_has_ext(env, RVV) && (flags & CPU_DUMP_VPU)) { + if (riscv_cpu_cfg(env)->ext_zve32x && (flags & CPU_DUMP_VPU)) { static const int dump_rvv_csrs[] = { CSR_VSTART, CSR_VXSAT, @@ -1021,11 +680,6 @@ bool riscv_cpu_has_work(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -static int riscv_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - return riscv_env_mmu_index(cpu_env(cs), ifetch); -} - static void riscv_cpu_reset_hold(Object *obj, ResetType type) { #ifndef CONFIG_USER_ONLY @@ -1041,7 +695,7 @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type) mcc->parent_phases.hold(obj, type); } #ifndef CONFIG_USER_ONLY - env->misa_mxl = mcc->misa_mxl_max; + env->misa_mxl = mcc->def->misa_mxl_max; env->priv = PRV_M; env->mstatus &= ~(MSTATUS_MIE | MSTATUS_MPRV); if (env->misa_mxl > MXL_RV32) { @@ -1178,18 +832,16 @@ static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info) static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) { bool rv32 = riscv_cpu_is_32bit(cpu); - uint8_t satp_mode_map_max, satp_mode_supported_max; + uint16_t supported; + uint8_t satp_mode_map_max; - /* The CPU wants the OS to decide which satp mode to use */ - if (cpu->cfg.satp_mode.supported == 0) { + if (!get_satp_mode_supported(cpu, &supported)) { + /* The CPU wants the hypervisor to decide which satp mode to allow */ return; } - satp_mode_supported_max = - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); - - if (cpu->cfg.satp_mode.map == 0) { - if (cpu->cfg.satp_mode.init == 0) { + if (cpu->satp_modes.map == 0) { + if (cpu->satp_modes.init == 0) { /* If unset by the user, we fallback to the default satp mode. */ set_satp_mode_default_map(cpu); } else { @@ -1199,27 +851,27 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) * valid_vm_1_10_32/64. */ for (int i = 1; i < 16; ++i) { - if ((cpu->cfg.satp_mode.init & (1 << i)) && - (cpu->cfg.satp_mode.supported & (1 << i))) { + if ((cpu->satp_modes.init & (1 << i)) && + supported & (1 << i)) { for (int j = i - 1; j >= 0; --j) { - if (cpu->cfg.satp_mode.supported & (1 << j)) { - cpu->cfg.satp_mode.map |= (1 << j); - break; + if (supported & (1 << j)) { + cpu->cfg.max_satp_mode = j; + return; } } - break; } } } + return; } - satp_mode_map_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); + satp_mode_map_max = satp_mode_max_from_map(cpu->satp_modes.map); /* Make sure the user asked for a supported configuration (HW and qemu) */ - if (satp_mode_map_max > satp_mode_supported_max) { + if (satp_mode_map_max > cpu->cfg.max_satp_mode) { error_setg(errp, "satp_mode %s is higher than hw max capability %s", satp_mode_str(satp_mode_map_max, rv32), - satp_mode_str(satp_mode_supported_max, rv32)); + satp_mode_str(cpu->cfg.max_satp_mode, rv32)); return; } @@ -1229,9 +881,9 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) */ if (!rv32) { for (int i = satp_mode_map_max - 1; i >= 0; --i) { - if (!(cpu->cfg.satp_mode.map & (1 << i)) && - (cpu->cfg.satp_mode.init & (1 << i)) && - (cpu->cfg.satp_mode.supported & (1 << i))) { + if (!(cpu->satp_modes.map & (1 << i)) && + (cpu->satp_modes.init & (1 << i)) && + (supported & (1 << i))) { error_setg(errp, "cannot disable %s satp mode if %s " "is enabled", satp_mode_str(i, false), satp_mode_str(satp_mode_map_max, false)); @@ -1240,12 +892,7 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) } } - /* Finally expand the map so that all valid modes are set */ - for (int i = satp_mode_map_max - 1; i >= 0; --i) { - if (cpu->cfg.satp_mode.supported & (1 << i)) { - cpu->cfg.satp_mode.map |= (1 << i); - } - } + cpu->cfg.max_satp_mode = satp_mode_map_max; } #endif @@ -1323,11 +970,11 @@ bool riscv_cpu_accelerator_compatible(RISCVCPU *cpu) static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - RISCVSATPMap *satp_map = opaque; + RISCVSATPModes *satp_modes = opaque; uint8_t satp = satp_mode_from_str(name); bool value; - value = satp_map->map & (1 << satp); + value = satp_modes->map & (1 << satp); visit_type_bool(v, name, &value, errp); } @@ -1335,7 +982,7 @@ static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name, static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - RISCVSATPMap *satp_map = opaque; + RISCVSATPModes *satp_modes = opaque; uint8_t satp = satp_mode_from_str(name); bool value; @@ -1343,8 +990,8 @@ static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, return; } - satp_map->map = deposit32(satp_map->map, satp, 1, value); - satp_map->init |= 1 << satp; + satp_modes->map = deposit32(satp_modes->map, satp, 1, value); + satp_modes->init |= 1 << satp; } void riscv_add_satp_mode_properties(Object *obj) @@ -1353,16 +1000,16 @@ void riscv_add_satp_mode_properties(Object *obj) if (cpu->env.misa_mxl == MXL_RV32) { object_property_add(obj, "sv32", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); } else { object_property_add(obj, "sv39", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv48", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv57", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv64", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); } } @@ -1439,18 +1086,13 @@ static bool riscv_cpu_is_dynamic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } -static void riscv_cpu_post_init(Object *obj) -{ - accel_cpu_instance_init(CPU(obj)); -} - static void riscv_cpu_init(Object *obj) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(obj); RISCVCPU *cpu = RISCV_CPU(obj); CPURISCVState *env = &cpu->env; - env->misa_mxl = mcc->misa_mxl_max; + env->misa_mxl = mcc->def->misa_mxl_max; #ifndef CONFIG_USER_ONLY qdev_init_gpio_in(DEVICE(obj), riscv_cpu_set_irq, @@ -1468,8 +1110,8 @@ static void riscv_cpu_init(Object *obj) * for all CPUs. Each accelerator will decide what to do when * users disable them. */ - RISCV_CPU(obj)->cfg.ext_zicntr = true; - RISCV_CPU(obj)->cfg.ext_zihpm = true; + RISCV_CPU(obj)->cfg.ext_zicntr = !mcc->def->bare; + RISCV_CPU(obj)->cfg.ext_zihpm = !mcc->def->bare; /* Default values for non-bool cpu properties */ cpu->cfg.pmu_mask = MAKE_64BIT_MASK(3, 16); @@ -1478,35 +1120,30 @@ static void riscv_cpu_init(Object *obj) cpu->cfg.cbom_blocksize = 64; cpu->cfg.cbop_blocksize = 64; cpu->cfg.cboz_blocksize = 64; + cpu->cfg.pmp_regions = 16; cpu->env.vext_ver = VEXT_VERSION_1_00_0; -} + cpu->cfg.max_satp_mode = -1; -static void riscv_bare_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - - /* - * Bare CPUs do not inherit the timer and performance - * counters from the parent class (see riscv_cpu_init() - * for info on why the parent enables them). - * - * Users have to explicitly enable these counters for - * bare CPUs. - */ - cpu->cfg.ext_zicntr = false; - cpu->cfg.ext_zihpm = false; + if (mcc->def->profile) { + mcc->def->profile->enabled = true; + } - /* Set to QEMU's first supported priv version */ - cpu->env.priv_ver = PRIV_VERSION_1_10_0; + env->misa_ext_mask = env->misa_ext = mcc->def->misa_ext; + riscv_cpu_cfg_merge(&cpu->cfg, &mcc->def->cfg); - /* - * Support all available satp_mode settings. The default - * value will be set to MBARE if the user doesn't set - * satp_mode manually (see set_satp_mode_default()). - */ + if (mcc->def->priv_spec != RISCV_PROFILE_ATTR_UNUSED) { + cpu->env.priv_ver = mcc->def->priv_spec; + } + if (mcc->def->vext_spec != RISCV_PROFILE_ATTR_UNUSED) { + cpu->env.vext_ver = mcc->def->vext_spec; + } #ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV64); + if (mcc->def->custom_csrs) { + riscv_register_custom_csrs(cpu, mcc->def->custom_csrs); + } #endif + + accel_cpu_instance_init(CPU(obj)); } typedef struct misa_ext_info { @@ -1541,7 +1178,7 @@ static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc) CPUClass *cc = CPU_CLASS(mcc); /* Validate that MISA_MXL is set properly. */ - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { #ifdef TARGET_RISCV64 case MXL_RV64: case MXL_RV128: @@ -1649,6 +1286,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + MULTI_EXT_CFG_BOOL("svrsw60t59b", ext_svrsw60t59b, false), MULTI_EXT_CFG_BOOL("svvptc", ext_svvptc, true), MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true), @@ -1742,31 +1380,24 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { * 'Named features' is the name we give to extensions that we * don't want to expose to users. They are either immutable * (always enabled/disable) or they'll vary depending on - * the resulting CPU state. They have riscv,isa strings - * and priv_ver like regular extensions. + * the resulting CPU state. + * + * Some of them are always enabled depending on priv version + * of the CPU and are declared directly in isa_edata_arr[]. + * The ones listed here have special checks during finalize() + * time and require their own flags like regular extensions. + * See riscv_cpu_update_named_features() for more info. */ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true), MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true), MULTI_EXT_CFG_BOOL("sha", ext_sha, true), - MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), - - { }, -}; -/* Deprecated entries marked for future removal */ -const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { - MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true), - MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true), - MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), - MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), - MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), - MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), - MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), - MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), - MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), - MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), - MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), + /* + * 'ziccrse' has its own flag because the KVM driver + * wants to enable/disable it on its own accord. + */ + MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), { }, }; @@ -1935,6 +1566,46 @@ static const PropertyInfo prop_pmp = { .set = prop_pmp_set, }; +static void prop_num_pmp_regions_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + uint8_t value; + + visit_type_uint8(v, name, &value, errp); + + if (cpu->cfg.pmp_regions != value && riscv_cpu_is_vendor(obj)) { + cpu_set_prop_err(cpu, name, errp); + return; + } + + if (cpu->env.priv_ver < PRIV_VERSION_1_12_0 && value > OLD_MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } else if (value > MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } + + cpu_option_add_user_setting(name, value); + cpu->cfg.pmp_regions = value; +} + +static void prop_num_pmp_regions_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t value = RISCV_CPU(obj)->cfg.pmp_regions; + + visit_type_uint8(v, name, &value, errp); +} + +static const PropertyInfo prop_num_pmp_regions = { + .type = "uint8", + .description = "num-pmp-regions", + .get = prop_num_pmp_regions_get, + .set = prop_num_pmp_regions_set, +}; + static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; @@ -2934,6 +2605,7 @@ static const Property riscv_cpu_properties[] = { {.name = "mmu", .info = &prop_mmu}, {.name = "pmp", .info = &prop_pmp}, + {.name = "num-pmp-regions", .info = &prop_num_pmp_regions}, {.name = "priv_spec", .info = &prop_priv_spec}, {.name = "vext_spec", .info = &prop_vext_spec}, @@ -2962,6 +2634,7 @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false), DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false), + DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false), /* * write_misa() is marked as experimental for now so mark @@ -2970,36 +2643,6 @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("x-misa-w", RISCVCPU, cfg.misa_w, false), }; -#if defined(TARGET_RISCV64) -static void rva22u64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA22U64.enabled = true; -} - -static void rva22s64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA22S64.enabled = true; -} - -static void rva23u64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA23U64.enabled = true; -} - -static void rva23s64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA23S64.enabled = true; -} -#endif - static const gchar *riscv_gdb_arch_name(CPUState *cs) { RISCVCPU *cpu = RISCV_CPU(cs); @@ -3035,7 +2678,7 @@ static const struct SysemuCPUOps riscv_sysemu_ops = { }; #endif -static void riscv_cpu_common_class_init(ObjectClass *c, void *data) +static void riscv_cpu_common_class_init(ObjectClass *c, const void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); @@ -3049,7 +2692,6 @@ static void riscv_cpu_common_class_init(ObjectClass *c, void *data) &mcc->parent_phases); cc->class_by_name = riscv_cpu_class_by_name; - cc->mmu_index = riscv_cpu_mmu_index; cc->dump_state = riscv_cpu_dump_state; cc->set_pc = riscv_cpu_set_pc; cc->get_pc = riscv_cpu_get_pc; @@ -3062,16 +2704,94 @@ static void riscv_cpu_common_class_init(ObjectClass *c, void *data) cc->get_arch_id = riscv_get_arch_id; #endif cc->gdb_arch_name = riscv_gdb_arch_name; +#ifdef CONFIG_TCG + cc->tcg_ops = &riscv_tcg_ops; +#endif /* CONFIG_TCG */ device_class_set_props(dc, riscv_cpu_properties); } -static void riscv_cpu_class_init(ObjectClass *c, void *data) +static bool profile_extends(RISCVCPUProfile *trial, RISCVCPUProfile *parent) +{ + RISCVCPUProfile *curr; + if (!parent) { + return true; + } + + curr = trial; + while (curr) { + if (curr == parent) { + return true; + } + curr = curr->u_parent; + } + + curr = trial; + while (curr) { + if (curr == parent) { + return true; + } + curr = curr->s_parent; + } + + return false; +} + +static void riscv_cpu_class_base_init(ObjectClass *c, const void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); + RISCVCPUClass *pcc = RISCV_CPU_CLASS(object_class_get_parent(c)); + + if (pcc->def) { + mcc->def = g_memdup2(pcc->def, sizeof(*pcc->def)); + } else { + mcc->def = g_new0(RISCVCPUDef, 1); + } + + if (data) { + const RISCVCPUDef *def = data; + mcc->def->bare |= def->bare; + if (def->profile) { + assert(profile_extends(def->profile, mcc->def->profile)); + assert(mcc->def->bare); + mcc->def->profile = def->profile; + } + if (def->misa_mxl_max) { + assert(def->misa_mxl_max <= MXL_RV128); + mcc->def->misa_mxl_max = def->misa_mxl_max; + +#ifndef CONFIG_USER_ONLY + /* + * Hack to simplify CPU class hierarchies that include both 32- and + * 64-bit models: reduce SV39/48/57/64 to SV32 for 32-bit models. + */ + if (mcc->def->misa_mxl_max == MXL_RV32 && + !valid_vm_1_10_32[mcc->def->cfg.max_satp_mode]) { + mcc->def->cfg.max_satp_mode = VM_1_10_SV32; + } +#endif + } + if (def->priv_spec != RISCV_PROFILE_ATTR_UNUSED) { + assert(def->priv_spec <= PRIV_VERSION_LATEST); + mcc->def->priv_spec = def->priv_spec; + } + if (def->vext_spec != RISCV_PROFILE_ATTR_UNUSED) { + assert(def->vext_spec != 0); + mcc->def->vext_spec = def->vext_spec; + } + mcc->def->misa_ext |= def->misa_ext; + + riscv_cpu_cfg_merge(&mcc->def->cfg, &def->cfg); - mcc->misa_mxl_max = (RISCVMXL)GPOINTER_TO_UINT(data); - riscv_cpu_validate_misa_mxl(mcc); + if (def->custom_csrs) { + assert(!mcc->def->custom_csrs); + mcc->def->custom_csrs = def->custom_csrs; + } + } + + if (!object_class_is_abstract(c)) { + riscv_cpu_validate_misa_mxl(mcc); + } } static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, @@ -3166,41 +2886,34 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename) } #endif -#define DEFINE_DYNAMIC_CPU(type_name, misa_mxl_max, initfn) \ +#define DEFINE_ABSTRACT_RISCV_CPU(type_name, parent_type_name, ...) \ { \ .name = (type_name), \ - .parent = TYPE_RISCV_DYNAMIC_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ + .parent = (parent_type_name), \ + .abstract = true, \ + .class_data = &(const RISCVCPUDef) { \ + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .cfg.max_satp_mode = -1, \ + __VA_ARGS__ \ + }, \ } -#define DEFINE_VENDOR_CPU(type_name, misa_mxl_max, initfn) \ +#define DEFINE_RISCV_CPU(type_name, parent_type_name, ...) \ { \ .name = (type_name), \ - .parent = TYPE_RISCV_VENDOR_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ + .parent = (parent_type_name), \ + .class_data = &(const RISCVCPUDef) { \ + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .cfg.max_satp_mode = -1, \ + __VA_ARGS__ \ + }, \ } -#define DEFINE_BARE_CPU(type_name, misa_mxl_max, initfn) \ - { \ - .name = (type_name), \ - .parent = TYPE_RISCV_BARE_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ - } - -#define DEFINE_PROFILE_CPU(type_name, misa_mxl_max, initfn) \ - { \ - .name = (type_name), \ - .parent = TYPE_RISCV_BARE_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ - } +#define DEFINE_PROFILE_CPU(type_name, parent_type_name, profile_) \ + DEFINE_RISCV_CPU(type_name, parent_type_name, \ + .profile = &(profile_)) static const TypeInfo riscv_cpu_type_infos[] = { { @@ -3209,67 +2922,370 @@ static const TypeInfo riscv_cpu_type_infos[] = { .instance_size = sizeof(RISCVCPU), .instance_align = __alignof(RISCVCPU), .instance_init = riscv_cpu_init, - .instance_post_init = riscv_cpu_post_init, .abstract = true, .class_size = sizeof(RISCVCPUClass), .class_init = riscv_cpu_common_class_init, + .class_base_init = riscv_cpu_class_base_init, }, - { - .name = TYPE_RISCV_DYNAMIC_CPU, - .parent = TYPE_RISCV_CPU, - .abstract = true, - }, - { - .name = TYPE_RISCV_VENDOR_CPU, - .parent = TYPE_RISCV_CPU, - .abstract = true, - }, - { - .name = TYPE_RISCV_BARE_CPU, - .parent = TYPE_RISCV_CPU, - .instance_init = riscv_bare_cpu_init, - .abstract = true, - }, + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_DYNAMIC_CPU, TYPE_RISCV_CPU, + .cfg.mmu = true, + .cfg.pmp = true, + .priv_spec = PRIV_VERSION_LATEST, + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_VENDOR_CPU, TYPE_RISCV_CPU), + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_BARE_CPU, TYPE_RISCV_CPU, + /* + * Bare CPUs do not inherit the timer and performance + * counters from the parent class (see riscv_cpu_init() + * for info on why the parent enables them). + * + * Users have to explicitly enable these counters for + * bare CPUs. + */ + .bare = true, + + /* Set to QEMU's first supported priv version */ + .priv_spec = PRIV_VERSION_1_10_0, + + /* + * Support all available satp_mode settings. By default + * only MBARE will be available if the user doesn't enable + * a mode manually (see riscv_cpu_satp_mode_finalize()). + */ +#ifdef TARGET_RISCV32 + .cfg.max_satp_mode = VM_1_10_SV32, +#else + .cfg.max_satp_mode = VM_1_10_SV57, +#endif + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_MAX, TYPE_RISCV_DYNAMIC_CPU, #if defined(TARGET_RISCV32) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV32, riscv_max_cpu_init), + .misa_mxl_max = MXL_RV32, + .cfg.max_satp_mode = VM_1_10_SV32, #elif defined(TARGET_RISCV64) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV64, riscv_max_cpu_init), + .misa_mxl_max = MXL_RV64, + .cfg.max_satp_mode = VM_1_10_SV57, #endif + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E, TYPE_RISCV_VENDOR_CPU, + .misa_ext = RVI | RVM | RVA | RVC | RVU, + .priv_spec = PRIV_VERSION_1_10_0, + .cfg.max_satp_mode = VM_1_10_MBARE, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.pmp_regions = 8 + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U, TYPE_RISCV_VENDOR_CPU, + .misa_ext = RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU, + .priv_spec = PRIV_VERSION_1_10_0, + + .cfg.max_satp_mode = VM_1_10_SV39, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.pmp_regions = 8 + ), #if defined(TARGET_RISCV32) || \ (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32, MXL_RV32, rv32_base_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_IBEX, MXL_RV32, rv32_ibex_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E31, MXL_RV32, rv32_sifive_e_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E34, MXL_RV32, rv32_imafcu_nommu_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U34, MXL_RV32, rv32_sifive_u_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32I, MXL_RV32, rv32i_bare_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32E, MXL_RV32, rv32e_bare_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE32, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV32, + .misa_mxl_max = MXL_RV32, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_IBEX, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVI | RVM | RVC | RVU, + .priv_spec = PRIV_VERSION_1_12_0, + .cfg.max_satp_mode = VM_1_10_MBARE, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.ext_smepmp = true, + + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbc = true, + .cfg.ext_zbs = true + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E31, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV32 + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E34, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVF, /* IMAFCU */ + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U34, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV32, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV32I, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVI + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV32E, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVE + ), #endif #if (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX32, MXL_RV32, riscv_max_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_MAX32, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV32, + .misa_mxl_max = MXL_RV32, + ), #endif #if defined(TARGET_RISCV64) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE64, MXL_RV64, rv64_base_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E51, MXL_RV64, rv64_sifive_e_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64, rv64_sifive_u_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C, MXL_RV64, rv64_sifive_u_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64, rv64_thead_c906_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64, rv64_tt_ascalon_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1, MXL_RV64, rv64_veyron_v1_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_XIANGSHAN_NANHU, - MXL_RV64, rv64_xiangshan_nanhu_cpu_init), -#ifdef CONFIG_TCG - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128, MXL_RV128, rv128_base_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE64, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV57, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E51, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV64 + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U54, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SHAKTI_C, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_THEAD_C906, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU, + .priv_spec = PRIV_VERSION_1_11_0, + + .cfg.ext_zfa = true, + .cfg.ext_zfh = true, + .cfg.mmu = true, + .cfg.ext_xtheadba = true, + .cfg.ext_xtheadbb = true, + .cfg.ext_xtheadbs = true, + .cfg.ext_xtheadcmo = true, + .cfg.ext_xtheadcondmov = true, + .cfg.ext_xtheadfmemidx = true, + .cfg.ext_xtheadmac = true, + .cfg.ext_xtheadmemidx = true, + .cfg.ext_xtheadmempair = true, + .cfg.ext_xtheadsync = true, + .cfg.pmp = true, + + .cfg.mvendorid = THEAD_VENDOR_ID, + + .cfg.max_satp_mode = VM_1_10_SV39, +#ifndef CONFIG_USER_ONLY + .custom_csrs = th_csr_list, +#endif + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_TT_ASCALON, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + .vext_spec = VEXT_VERSION_1_00_0, + + /* ISA extensions */ + .cfg.mmu = true, + .cfg.vlenb = 256 >> 3, + .cfg.elen = 64, + .cfg.rvv_ma_all_1s = true, + .cfg.rvv_ta_all_1s = true, + .cfg.misa_w = true, + .cfg.pmp = true, + .cfg.cbom_blocksize = 64, + .cfg.cbop_blocksize = 64, + .cfg.cboz_blocksize = 64, + .cfg.ext_zic64b = true, + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_zicntr = true, + .cfg.ext_zicond = true, + .cfg.ext_zicsr = true, + .cfg.ext_zifencei = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zihpm = true, + .cfg.ext_zimop = true, + .cfg.ext_zawrs = true, + .cfg.ext_zfa = true, + .cfg.ext_zfbfmin = true, + .cfg.ext_zfh = true, + .cfg.ext_zfhmin = true, + .cfg.ext_zcb = true, + .cfg.ext_zcmop = true, + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbs = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvbc = true, + .cfg.ext_zvfbfmin = true, + .cfg.ext_zvfbfwma = true, + .cfg.ext_zvfh = true, + .cfg.ext_zvfhmin = true, + .cfg.ext_zvkng = true, + .cfg.ext_smaia = true, + .cfg.ext_smstateen = true, + .cfg.ext_ssaia = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_sstc = true, + .cfg.ext_svade = true, + .cfg.ext_svinval = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + + .cfg.max_satp_mode = VM_1_10_SV57, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_VEYRON_V1, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU | RVH, + .priv_spec = PRIV_VERSION_1_12_0, + + /* ISA extensions */ + .cfg.mmu = true, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.ext_zicbom = true, + .cfg.cbom_blocksize = 64, + .cfg.cboz_blocksize = 64, + .cfg.ext_zicboz = true, + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_sstc = true, + .cfg.ext_svinval = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_smstateen = true, + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbc = true, + .cfg.ext_zbs = true, + .cfg.ext_XVentanaCondOps = true, + + .cfg.mvendorid = VEYRON_V1_MVENDORID, + .cfg.marchid = VEYRON_V1_MARCHID, + .cfg.mimpid = VEYRON_V1_MIMPID, + + .cfg.max_satp_mode = VM_1_10_SV48, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_NANHU, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU, + .priv_spec = PRIV_VERSION_1_12_0, + + /* ISA extensions */ + .cfg.ext_zbc = true, + .cfg.ext_zbkb = true, + .cfg.ext_zbkc = true, + .cfg.ext_zbkx = true, + .cfg.ext_zknd = true, + .cfg.ext_zkne = true, + .cfg.ext_zknh = true, + .cfg.ext_zksed = true, + .cfg.ext_zksh = true, + .cfg.ext_svinval = true, + + .cfg.mmu = true, + .cfg.pmp = true, + + .cfg.max_satp_mode = VM_1_10_SV39, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_KMH, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + /* + * The RISC-V Instruction Set Manual: Volume I + * Unprivileged Architecture + */ + .cfg.ext_zicntr = true, + .cfg.ext_zihpm = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zimop = true, + .cfg.ext_zcmop = true, + .cfg.ext_zicond = true, + .cfg.ext_zawrs = true, + .cfg.ext_zacas = true, + .cfg.ext_zfh = true, + .cfg.ext_zfa = true, + .cfg.ext_zcb = true, + .cfg.ext_zbc = true, + .cfg.ext_zvfh = true, + .cfg.ext_zkn = true, + .cfg.ext_zks = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvkt = true, + /* + * The RISC-V Instruction Set Manual: Volume II + * Privileged Architecture + */ + .cfg.ext_smstateen = true, + .cfg.ext_smcsrind = true, + .cfg.ext_sscsrind = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_svinval = true, + .cfg.ext_sstc = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_ssdbltrp = true, + .cfg.ext_ssnpm = true, + .cfg.ext_smnpm = true, + .cfg.ext_smmpm = true, + .cfg.ext_sspm = true, + .cfg.ext_supm = true, + /* The RISC-V Advanced Interrupt Architecture */ + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + /* RVA23 Profiles */ + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_svade = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.max_satp_mode = VM_1_10_SV48, + ), + +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE128, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV57, + .misa_mxl_max = MXL_RV128, + ), #endif /* CONFIG_TCG */ - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64I, MXL_RV64, rv64i_bare_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64E, MXL_RV64, rv64e_bare_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64, MXL_RV64, rva22u64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64, MXL_RV64, rva22s64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, MXL_RV64, rva23u64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, MXL_RV64, rva23s64_profile_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV64I, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVI + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV64E, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVE + ), + + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64, TYPE_RISCV_CPU_RV64I, RVA22U64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64, TYPE_RISCV_CPU_RV64I, RVA22S64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, TYPE_RISCV_CPU_RV64I, RVA23U64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, TYPE_RISCV_CPU_RV64I, RVA23S64), #endif /* TARGET_RISCV64 */ }; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 51e49e0..2c22664 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -23,7 +23,9 @@ #include "hw/core/cpu.h" #include "hw/registerfields.h" #include "hw/qdev-properties.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "exec/gdbstub.h" #include "qemu/cpu-float.h" #include "qom/object.h" @@ -44,12 +46,6 @@ typedef struct CPUArchState CPURISCVState; #endif /* - * RISC-V-specific extra insn start words: - * 1: Original instruction opcode - * 2: more information about instruction - */ -#define TARGET_INSN_START_EXTRA_WORDS 2 -/* * b0: Whether a instruction always raise a store AMO or not. */ #define RISCV_UW2_ALWAYS_STORE_AMO 1 @@ -79,13 +75,29 @@ const char *riscv_get_misa_ext_name(uint32_t bit); const char *riscv_get_misa_ext_description(uint32_t bit); #define CPU_CFG_OFFSET(_prop) offsetof(struct RISCVCPUConfig, _prop) +#define ENV_CSR_OFFSET(_csr) offsetof(CPURISCVState, _csr) typedef struct riscv_cpu_profile { struct riscv_cpu_profile *u_parent; struct riscv_cpu_profile *s_parent; const char *name; uint32_t misa_ext; + /* + * The profile is enabled/disabled via command line or + * via cpu_init(). Enabling a profile will add all its + * mandatory extensions in the CPU during init(). + */ bool enabled; + /* + * The profile is present in the CPU, i.e. the current set of + * CPU extensions complies with it. A profile can be enabled + * and not present (e.g. the user disabled a mandatory extension) + * and the other way around (e.g. all mandatory extensions are + * present in a non-profile CPU). + * + * QMP uses this flag. + */ + bool present; bool user_set; int priv_spec; int satp_mode; @@ -162,7 +174,8 @@ extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; #define MMU_USER_IDX 3 -#define MAX_RISCV_PMPS (16) +#define MAX_RISCV_PMPS (64) +#define OLD_MAX_RISCV_PMPS (16) #if !defined(CONFIG_USER_ONLY) #include "pmp.h" @@ -503,6 +516,19 @@ struct CPUArchState { }; /* + * map is a 16-bit bitmap: the most significant set bit in map is the maximum + * satp mode that is supported. It may be chosen by the user and must respect + * what qemu implements (valid_1_10_32/64) and what the hw is capable of + * (supported bitmap below). + * + * init is a 16-bit bitmap used to make sure the user selected a correct + * configuration as per the specification. + */ +typedef struct { + uint16_t map, init; +} RISCVSATPModes; + +/* * RISCVCPU: * @env: #CPURISCVState * @@ -518,6 +544,7 @@ struct ArchCPU { /* Configuration Settings */ RISCVCPUConfig cfg; + RISCVSATPModes satp_modes; QEMUTimer *pmu_timer; /* A bitmask of Available programmable counters */ @@ -527,6 +554,19 @@ struct ArchCPU { const GPtrArray *decoders; }; +typedef struct RISCVCSR RISCVCSR; + +typedef struct RISCVCPUDef { + RISCVMXL misa_mxl_max; /* max mxl for this cpu */ + RISCVCPUProfile *profile; + uint32_t misa_ext; + int priv_spec; + int32_t vext_spec; + RISCVCPUConfig cfg; + bool bare; + const RISCVCSR *custom_csrs; +} RISCVCPUDef; + /** * RISCVCPUClass: * @parent_realize: The parent class' realize handler. @@ -539,7 +579,7 @@ struct RISCVCPUClass { DeviceRealize parent_realize; ResettablePhases parent_phases; - RISCVMXL misa_mxl_max; /* max mxl for this cpu */ + RISCVCPUDef *def; }; static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext) @@ -552,6 +592,7 @@ static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext) extern const char * const riscv_int_regnames[]; extern const char * const riscv_int_regnamesh[]; extern const char * const riscv_fpr_regnames[]; +extern const char * const riscv_rvv_regnames[]; const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, @@ -632,8 +673,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env, target_ulong riscv_cpu_get_fflags(CPURISCVState *env); void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); -#include "exec/cpu-all.h" - FIELD(TB_FLAGS, MEM_IDX, 0, 3) FIELD(TB_FLAGS, FS, 3, 2) /* Vector flags */ @@ -808,9 +847,6 @@ static inline uint32_t vext_get_vlmax(uint32_t vlenb, uint32_t vsew, return vlen >> (vsew + 3 - lmul); } -void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags); - bool riscv_cpu_is_32bit(RISCVCPU *cpu); bool riscv_cpu_virt_mem_enabled(CPURISCVState *env); @@ -822,8 +858,8 @@ RISCVException riscv_csrr(CPURISCVState *env, int csrno, target_ulong *ret_value); RISCVException riscv_csrrw(CPURISCVState *env, int csrno, - target_ulong *ret_value, - target_ulong new_value, target_ulong write_mask); + target_ulong *ret_value, target_ulong new_value, + target_ulong write_mask, uintptr_t ra); RISCVException riscv_csrrw_debug(CPURISCVState *env, int csrno, target_ulong *ret_value, target_ulong new_value, @@ -832,13 +868,13 @@ RISCVException riscv_csrrw_debug(CPURISCVState *env, int csrno, static inline void riscv_csr_write(CPURISCVState *env, int csrno, target_ulong val) { - riscv_csrrw(env, csrno, NULL, val, MAKE_64BIT_MASK(0, TARGET_LONG_BITS)); + riscv_csrrw(env, csrno, NULL, val, MAKE_64BIT_MASK(0, TARGET_LONG_BITS), 0); } static inline target_ulong riscv_csr_read(CPURISCVState *env, int csrno) { target_ulong val = 0; - riscv_csrrw(env, csrno, &val, 0, 0); + riscv_csrr(env, csrno, &val); return val; } @@ -847,7 +883,8 @@ typedef RISCVException (*riscv_csr_predicate_fn)(CPURISCVState *env, typedef RISCVException (*riscv_csr_read_fn)(CPURISCVState *env, int csrno, target_ulong *ret_value); typedef RISCVException (*riscv_csr_write_fn)(CPURISCVState *env, int csrno, - target_ulong new_value); + target_ulong new_value, + uintptr_t ra); typedef RISCVException (*riscv_csr_op_fn)(CPURISCVState *env, int csrno, target_ulong *ret_value, target_ulong new_value, @@ -856,8 +893,8 @@ typedef RISCVException (*riscv_csr_op_fn)(CPURISCVState *env, int csrno, RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno, Int128 *ret_value); RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno, - Int128 *ret_value, - Int128 new_value, Int128 write_mask); + Int128 *ret_value, Int128 new_value, + Int128 write_mask, uintptr_t ra); typedef RISCVException (*riscv_csr_read128_fn)(CPURISCVState *env, int csrno, Int128 *ret_value); @@ -876,6 +913,12 @@ typedef struct { uint32_t min_priv_ver; } riscv_csr_operations; +struct RISCVCSR { + int csrno; + bool (*insertion_test)(RISCVCPU *cpu); + riscv_csr_operations csr_ops; +}; + /* CSR function table constants */ enum { CSR_TABLE_SIZE = 0x1000 @@ -910,7 +953,6 @@ extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_named_features[]; -extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; typedef struct isa_ext_data { const char *name; @@ -930,18 +972,17 @@ extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE]; extern const bool valid_vm_1_10_32[], valid_vm_1_10_64[]; void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops); -void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); +void riscv_set_csr_ops(int csrno, const riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); target_ulong riscv_new_csr_seed(target_ulong new_value, target_ulong write_mask); -uint8_t satp_mode_max_from_map(uint32_t map); const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); -/* Implemented in th_csr.c */ -void th_register_custom_csrs(RISCVCPU *cpu); +/* In th_csr.c */ +extern const RISCVCSR th_csr_list[]; const char *priv_spec_to_str(int priv_version); #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index a30317c..b62dd82 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -372,6 +372,18 @@ #define CSR_PMPCFG1 0x3a1 #define CSR_PMPCFG2 0x3a2 #define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af #define CSR_PMPADDR0 0x3b0 #define CSR_PMPADDR1 0x3b1 #define CSR_PMPADDR2 0x3b2 @@ -388,6 +400,54 @@ #define CSR_PMPADDR13 0x3bd #define CSR_PMPADDR14 0x3be #define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef /* RNMI */ #define CSR_MNSCRATCH 0x740 @@ -675,7 +735,8 @@ typedef enum { #define PTE_SOFT 0x300 /* Reserved for Software */ #define PTE_PBMT 0x6000000000000000ULL /* Page-based memory types */ #define PTE_N 0x8000000000000000ULL /* NAPOT translation */ -#define PTE_RESERVED 0x1FC0000000000000ULL /* Reserved bits */ +#define PTE_RESERVED(svrsw60t59b) \ + (svrsw60t59b ? 0x07C0000000000000ULL : 0x1FC0000000000000ULL) /* Reserved bits */ #define PTE_ATTR (PTE_N | PTE_PBMT) /* All attributes bits */ /* Page table PPN shift amount */ diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 8a84348..aa28dc8 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -21,184 +21,10 @@ #ifndef RISCV_CPU_CFG_H #define RISCV_CPU_CFG_H -/* - * map is a 16-bit bitmap: the most significant set bit in map is the maximum - * satp mode that is supported. It may be chosen by the user and must respect - * what qemu implements (valid_1_10_32/64) and what the hw is capable of - * (supported bitmap below). - * - * init is a 16-bit bitmap used to make sure the user selected a correct - * configuration as per the specification. - * - * supported is a 16-bit bitmap used to reflect the hw capabilities. - */ -typedef struct { - uint16_t map, init, supported; -} RISCVSATPMap; - struct RISCVCPUConfig { - bool ext_zba; - bool ext_zbb; - bool ext_zbc; - bool ext_zbkb; - bool ext_zbkc; - bool ext_zbkx; - bool ext_zbs; - bool ext_zca; - bool ext_zcb; - bool ext_zcd; - bool ext_zce; - bool ext_zcf; - bool ext_zcmp; - bool ext_zcmt; - bool ext_zk; - bool ext_zkn; - bool ext_zknd; - bool ext_zkne; - bool ext_zknh; - bool ext_zkr; - bool ext_zks; - bool ext_zksed; - bool ext_zksh; - bool ext_zkt; - bool ext_zifencei; - bool ext_zicntr; - bool ext_zicsr; - bool ext_zicbom; - bool ext_zicbop; - bool ext_zicboz; - bool ext_zicfilp; - bool ext_zicfiss; - bool ext_zicond; - bool ext_zihintntl; - bool ext_zihintpause; - bool ext_zihpm; - bool ext_zimop; - bool ext_zcmop; - bool ext_ztso; - bool ext_smstateen; - bool ext_sstc; - bool ext_smcdeleg; - bool ext_ssccfg; - bool ext_smcntrpmf; - bool ext_smcsrind; - bool ext_sscsrind; - bool ext_ssdbltrp; - bool ext_smdbltrp; - bool ext_svadu; - bool ext_svinval; - bool ext_svnapot; - bool ext_svpbmt; - bool ext_svvptc; - bool ext_svukte; - bool ext_zdinx; - bool ext_zaamo; - bool ext_zacas; - bool ext_zama16b; - bool ext_zabha; - bool ext_zalrsc; - bool ext_zawrs; - bool ext_zfa; - bool ext_zfbfmin; - bool ext_zfh; - bool ext_zfhmin; - bool ext_zfinx; - bool ext_zhinx; - bool ext_zhinxmin; - bool ext_zve32f; - bool ext_zve32x; - bool ext_zve64f; - bool ext_zve64d; - bool ext_zve64x; - bool ext_zvbb; - bool ext_zvbc; - bool ext_zvkb; - bool ext_zvkg; - bool ext_zvkned; - bool ext_zvknha; - bool ext_zvknhb; - bool ext_zvksed; - bool ext_zvksh; - bool ext_zvkt; - bool ext_zvkn; - bool ext_zvknc; - bool ext_zvkng; - bool ext_zvks; - bool ext_zvksc; - bool ext_zvksg; - bool ext_zmmul; - bool ext_zvfbfmin; - bool ext_zvfbfwma; - bool ext_zvfh; - bool ext_zvfhmin; - bool ext_smaia; - bool ext_ssaia; - bool ext_smctr; - bool ext_ssctr; - bool ext_sscofpmf; - bool ext_smepmp; - bool ext_smrnmi; - bool ext_ssnpm; - bool ext_smnpm; - bool ext_smmpm; - bool ext_sspm; - bool ext_supm; - bool rvv_ta_all_1s; - bool rvv_ma_all_1s; - bool rvv_vl_half_avl; - - uint32_t mvendorid; - uint64_t marchid; - uint64_t mimpid; - - /* Named features */ - bool ext_svade; - bool ext_zic64b; - bool ext_ssstateen; - bool ext_sha; - - /* - * Always 'true' booleans for named features - * TCG always implement/can't be user disabled, - * based on spec version. - */ - bool has_priv_1_13; - bool has_priv_1_12; - bool has_priv_1_11; - - /* Always enabled for TCG if has_priv_1_11 */ - bool ext_ziccrse; - - /* Vendor-specific custom extensions */ - bool ext_xtheadba; - bool ext_xtheadbb; - bool ext_xtheadbs; - bool ext_xtheadcmo; - bool ext_xtheadcondmov; - bool ext_xtheadfmemidx; - bool ext_xtheadfmv; - bool ext_xtheadmac; - bool ext_xtheadmemidx; - bool ext_xtheadmempair; - bool ext_xtheadsync; - bool ext_XVentanaCondOps; - - uint32_t pmu_mask; - uint16_t vlenb; - uint16_t elen; - uint16_t cbom_blocksize; - uint16_t cbop_blocksize; - uint16_t cboz_blocksize; - bool mmu; - bool pmp; - bool debug; - bool misa_w; - - bool short_isa_string; - -#ifndef CONFIG_USER_ONLY - RISCVSATPMap satp_mode; -#endif +#define BOOL_FIELD(x) bool x; +#define TYPED_FIELD(type, x, default) type x; +#include "cpu_cfg_fields.h.inc" }; typedef struct RISCVCPUConfig RISCVCPUConfig; diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc new file mode 100644 index 0000000..e2d116f --- /dev/null +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -0,0 +1,173 @@ +/* + * Required definitions before including this file: + * + * #define BOOL_FIELD(x) + * #define TYPED_FIELD(type, x, default) + */ + +BOOL_FIELD(ext_zba) +BOOL_FIELD(ext_zbb) +BOOL_FIELD(ext_zbc) +BOOL_FIELD(ext_zbkb) +BOOL_FIELD(ext_zbkc) +BOOL_FIELD(ext_zbkx) +BOOL_FIELD(ext_zbs) +BOOL_FIELD(ext_zca) +BOOL_FIELD(ext_zcb) +BOOL_FIELD(ext_zcd) +BOOL_FIELD(ext_zce) +BOOL_FIELD(ext_zcf) +BOOL_FIELD(ext_zcmp) +BOOL_FIELD(ext_zcmt) +BOOL_FIELD(ext_zk) +BOOL_FIELD(ext_zkn) +BOOL_FIELD(ext_zknd) +BOOL_FIELD(ext_zkne) +BOOL_FIELD(ext_zknh) +BOOL_FIELD(ext_zkr) +BOOL_FIELD(ext_zks) +BOOL_FIELD(ext_zksed) +BOOL_FIELD(ext_zksh) +BOOL_FIELD(ext_zkt) +BOOL_FIELD(ext_zifencei) +BOOL_FIELD(ext_zicntr) +BOOL_FIELD(ext_zicsr) +BOOL_FIELD(ext_zicbom) +BOOL_FIELD(ext_zicbop) +BOOL_FIELD(ext_zicboz) +BOOL_FIELD(ext_zicfilp) +BOOL_FIELD(ext_zicfiss) +BOOL_FIELD(ext_zicond) +BOOL_FIELD(ext_zihintntl) +BOOL_FIELD(ext_zihintpause) +BOOL_FIELD(ext_zihpm) +BOOL_FIELD(ext_zimop) +BOOL_FIELD(ext_zcmop) +BOOL_FIELD(ext_ztso) +BOOL_FIELD(ext_smstateen) +BOOL_FIELD(ext_sstc) +BOOL_FIELD(ext_smcdeleg) +BOOL_FIELD(ext_ssccfg) +BOOL_FIELD(ext_smcntrpmf) +BOOL_FIELD(ext_smcsrind) +BOOL_FIELD(ext_sscsrind) +BOOL_FIELD(ext_ssdbltrp) +BOOL_FIELD(ext_smdbltrp) +BOOL_FIELD(ext_svadu) +BOOL_FIELD(ext_svinval) +BOOL_FIELD(ext_svnapot) +BOOL_FIELD(ext_svpbmt) +BOOL_FIELD(ext_svrsw60t59b) +BOOL_FIELD(ext_svvptc) +BOOL_FIELD(ext_svukte) +BOOL_FIELD(ext_zdinx) +BOOL_FIELD(ext_zaamo) +BOOL_FIELD(ext_zacas) +BOOL_FIELD(ext_zama16b) +BOOL_FIELD(ext_zabha) +BOOL_FIELD(ext_zalrsc) +BOOL_FIELD(ext_zawrs) +BOOL_FIELD(ext_zfa) +BOOL_FIELD(ext_zfbfmin) +BOOL_FIELD(ext_zfh) +BOOL_FIELD(ext_zfhmin) +BOOL_FIELD(ext_zfinx) +BOOL_FIELD(ext_zhinx) +BOOL_FIELD(ext_zhinxmin) +BOOL_FIELD(ext_zve32f) +BOOL_FIELD(ext_zve32x) +BOOL_FIELD(ext_zve64f) +BOOL_FIELD(ext_zve64d) +BOOL_FIELD(ext_zve64x) +BOOL_FIELD(ext_zvbb) +BOOL_FIELD(ext_zvbc) +BOOL_FIELD(ext_zvkb) +BOOL_FIELD(ext_zvkg) +BOOL_FIELD(ext_zvkned) +BOOL_FIELD(ext_zvknha) +BOOL_FIELD(ext_zvknhb) +BOOL_FIELD(ext_zvksed) +BOOL_FIELD(ext_zvksh) +BOOL_FIELD(ext_zvkt) +BOOL_FIELD(ext_zvkn) +BOOL_FIELD(ext_zvknc) +BOOL_FIELD(ext_zvkng) +BOOL_FIELD(ext_zvks) +BOOL_FIELD(ext_zvksc) +BOOL_FIELD(ext_zvksg) +BOOL_FIELD(ext_zmmul) +BOOL_FIELD(ext_zvfbfmin) +BOOL_FIELD(ext_zvfbfwma) +BOOL_FIELD(ext_zvfh) +BOOL_FIELD(ext_zvfhmin) +BOOL_FIELD(ext_smaia) +BOOL_FIELD(ext_ssaia) +BOOL_FIELD(ext_smctr) +BOOL_FIELD(ext_ssctr) +BOOL_FIELD(ext_sscofpmf) +BOOL_FIELD(ext_smepmp) +BOOL_FIELD(ext_smrnmi) +BOOL_FIELD(ext_ssnpm) +BOOL_FIELD(ext_smnpm) +BOOL_FIELD(ext_smmpm) +BOOL_FIELD(ext_sspm) +BOOL_FIELD(ext_supm) +BOOL_FIELD(rvv_ta_all_1s) +BOOL_FIELD(rvv_ma_all_1s) +BOOL_FIELD(rvv_vl_half_avl) +BOOL_FIELD(rvv_vsetvl_x0_vill) +/* Named features */ +BOOL_FIELD(ext_svade) +BOOL_FIELD(ext_zic64b) +BOOL_FIELD(ext_ssstateen) +BOOL_FIELD(ext_sha) + +/* + * Always 'true' booleans for named features + * TCG always implement/can't be user disabled, + * based on spec version. + */ +BOOL_FIELD(has_priv_1_13) +BOOL_FIELD(has_priv_1_12) +BOOL_FIELD(has_priv_1_11) + +/* Always enabled for TCG if has_priv_1_11 */ +BOOL_FIELD(ext_ziccrse) + +/* Vendor-specific custom extensions */ +BOOL_FIELD(ext_xtheadba) +BOOL_FIELD(ext_xtheadbb) +BOOL_FIELD(ext_xtheadbs) +BOOL_FIELD(ext_xtheadcmo) +BOOL_FIELD(ext_xtheadcondmov) +BOOL_FIELD(ext_xtheadfmemidx) +BOOL_FIELD(ext_xtheadfmv) +BOOL_FIELD(ext_xtheadmac) +BOOL_FIELD(ext_xtheadmemidx) +BOOL_FIELD(ext_xtheadmempair) +BOOL_FIELD(ext_xtheadsync) +BOOL_FIELD(ext_XVentanaCondOps) + +BOOL_FIELD(mmu) +BOOL_FIELD(pmp) +BOOL_FIELD(debug) +BOOL_FIELD(misa_w) + +BOOL_FIELD(short_isa_string) + +TYPED_FIELD(uint32_t, mvendorid, 0) +TYPED_FIELD(uint64_t, marchid, 0) +TYPED_FIELD(uint64_t, mimpid, 0) + +TYPED_FIELD(uint32_t, pmu_mask, 0) +TYPED_FIELD(uint16_t, vlenb, 0) +TYPED_FIELD(uint16_t, elen, 0) +TYPED_FIELD(uint16_t, cbom_blocksize, 0) +TYPED_FIELD(uint16_t, cbop_blocksize, 0) +TYPED_FIELD(uint16_t, cboz_blocksize, 0) +TYPED_FIELD(uint8_t, pmp_regions, 0) + +TYPED_FIELD(int8_t, max_satp_mode, -1) + +#undef BOOL_FIELD +#undef TYPED_FIELD diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 6c4391d..3479a62 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -24,14 +24,15 @@ #include "internals.h" #include "pmu.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "system/memory.h" #include "instmap.h" #include "tcg/tcg-op.h" #include "accel/tcg/cpu-ops.h" #include "trace.h" #include "semihosting/common-semi.h" -#include "system/cpu-timers.h" +#include "exec/icount.h" #include "cpu_bits.h" #include "debug.h" #include "pmp.h" @@ -134,103 +135,6 @@ bool riscv_env_smode_dbltrp_enabled(CPURISCVState *env, bool virt) #endif } -void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - RISCVCPU *cpu = env_archcpu(env); - RISCVExtStatus fs, vs; - uint32_t flags = 0; - bool pm_signext = riscv_cpu_virt_mem_enabled(env); - - *pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc; - *cs_base = 0; - - if (cpu->cfg.ext_zve32x) { - /* - * If env->vl equals to VLMAX, we can use generic vector operation - * expanders (GVEC) to accerlate the vector operations. - * However, as LMUL could be a fractional number. The maximum - * vector size can be operated might be less than 8 bytes, - * which is not supported by GVEC. So we set vl_eq_vlmax flag to true - * only when maxsz >= 8 bytes. - */ - - /* lmul encoded as in DisasContext::lmul */ - int8_t lmul = sextract32(FIELD_EX64(env->vtype, VTYPE, VLMUL), 0, 3); - uint32_t vsew = FIELD_EX64(env->vtype, VTYPE, VSEW); - uint32_t vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); - uint32_t maxsz = vlmax << vsew; - bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl) && - (maxsz >= 8); - flags = FIELD_DP32(flags, TB_FLAGS, VILL, env->vill); - flags = FIELD_DP32(flags, TB_FLAGS, SEW, vsew); - flags = FIELD_DP32(flags, TB_FLAGS, LMUL, - FIELD_EX64(env->vtype, VTYPE, VLMUL)); - flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); - flags = FIELD_DP32(flags, TB_FLAGS, VTA, - FIELD_EX64(env->vtype, VTYPE, VTA)); - flags = FIELD_DP32(flags, TB_FLAGS, VMA, - FIELD_EX64(env->vtype, VTYPE, VMA)); - flags = FIELD_DP32(flags, TB_FLAGS, VSTART_EQ_ZERO, env->vstart == 0); - } else { - flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); - } - - if (cpu_get_fcfien(env)) { - /* - * For Forward CFI, only the expectation of a lpad at - * the start of the block is tracked via env->elp. env->elp - * is turned on during jalr translation. - */ - flags = FIELD_DP32(flags, TB_FLAGS, FCFI_LP_EXPECTED, env->elp); - flags = FIELD_DP32(flags, TB_FLAGS, FCFI_ENABLED, 1); - } - - if (cpu_get_bcfien(env)) { - flags = FIELD_DP32(flags, TB_FLAGS, BCFI_ENABLED, 1); - } - -#ifdef CONFIG_USER_ONLY - fs = EXT_STATUS_DIRTY; - vs = EXT_STATUS_DIRTY; -#else - flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv); - - flags |= riscv_env_mmu_index(env, 0); - fs = get_field(env->mstatus, MSTATUS_FS); - vs = get_field(env->mstatus, MSTATUS_VS); - - if (env->virt_enabled) { - flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1); - /* - * Merge DISABLED and !DIRTY states using MIN. - * We will set both fields when dirtying. - */ - fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS)); - vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS)); - } - - /* With Zfinx, floating point is enabled/disabled by Smstateen. */ - if (!riscv_has_ext(env, RVF)) { - fs = (smstateen_acc_ok(env, 0, SMSTATEEN0_FCSR) == RISCV_EXCP_NONE) - ? EXT_STATUS_DIRTY : EXT_STATUS_DISABLED; - } - - if (cpu->cfg.debug && !icount_enabled()) { - flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled); - } -#endif - - flags = FIELD_DP32(flags, TB_FLAGS, FS, fs); - flags = FIELD_DP32(flags, TB_FLAGS, VS, vs); - flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl); - flags = FIELD_DP32(flags, TB_FLAGS, AXL, cpu_address_xl(env)); - flags = FIELD_DP32(flags, TB_FLAGS, PM_PMM, riscv_pm_get_pmm(env)); - flags = FIELD_DP32(flags, TB_FLAGS, PM_SIGNEXTEND, pm_signext); - - *pflags = flags; -} - RISCVPmPmm riscv_pm_get_pmm(CPURISCVState *env) { #ifndef CONFIG_USER_ONLY @@ -1405,6 +1309,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, bool svade = riscv_cpu_cfg(env)->ext_svade; bool svadu = riscv_cpu_cfg(env)->ext_svadu; bool adue = svadu ? env->menvcfg & MENVCFG_ADUE : !svade; + bool svrsw60t59b = riscv_cpu_cfg(env)->ext_svrsw60t59b; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); @@ -1472,7 +1377,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, if (riscv_cpu_sxl(env) == MXL_RV32) { ppn = pte >> PTE_PPN_SHIFT; } else { - if (pte & PTE_RESERVED) { + if (pte & PTE_RESERVED(svrsw60t59b)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: " "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n", __func__, pte_addr, pte); @@ -1662,9 +1567,11 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, target_ulong *pte_pa = qemu_map_ram_ptr(mr->ram_block, addr1); target_ulong old_pte; if (riscv_cpu_sxl(env) == MXL_RV32) { - old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, pte, updated_pte); + old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, cpu_to_le32(pte), cpu_to_le32(updated_pte)); + old_pte = le32_to_cpu(old_pte); } else { - old_pte = qatomic_cmpxchg(pte_pa, pte, updated_pte); + old_pte = qatomic_cmpxchg(pte_pa, cpu_to_le64(pte), cpu_to_le64(updated_pte)); + old_pte = le64_to_cpu(old_pte); } if (old_pte != pte) { goto restart; diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c index bb084e0..a0fb54b 100644 --- a/target/riscv/crypto_helper.c +++ b/target/riscv/crypto_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "crypto/aes.h" #include "crypto/aes-round.h" diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 7948188..5c91658 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -24,12 +24,13 @@ #include "tcg/tcg-cpu.h" #include "pmu.h" #include "time_helper.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" -#include "exec/tb-flush.h" -#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "accel/tcg/getpc.h" #include "qemu/guest-random.h" #include "qapi/error.h" +#include "tcg/insn-start-words.h" +#include "internals.h" #include <stdbool.h> /* CSR function table public API */ @@ -38,7 +39,7 @@ void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops) *ops = csr_ops[csrno & (CSR_TABLE_SIZE - 1)]; } -void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) +void riscv_set_csr_ops(int csrno, const riscv_csr_operations *ops) { csr_ops[csrno & (CSR_TABLE_SIZE - 1)] = *ops; } @@ -202,6 +203,8 @@ static RISCVException cfi_ss(CPURISCVState *env, int csrno) #if !defined(CONFIG_USER_ONLY) if (env->debugger) { return RISCV_EXCP_NONE; + } else if (env->virt_enabled) { + return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } #endif return RISCV_EXCP_ILLEGAL_INST; @@ -372,8 +375,11 @@ static RISCVException aia_smode(CPURISCVState *env, int csrno) static RISCVException aia_smode32(CPURISCVState *env, int csrno) { int ret; + int csr_priv = get_field(csrno, 0x300); - if (!riscv_cpu_cfg(env)->ext_ssaia) { + if (csr_priv == PRV_M && !riscv_cpu_cfg(env)->ext_smaia) { + return RISCV_EXCP_ILLEGAL_INST; + } else if (!riscv_cpu_cfg(env)->ext_ssaia) { return RISCV_EXCP_ILLEGAL_INST; } @@ -736,7 +742,10 @@ static RISCVException dbltrp_hmode(CPURISCVState *env, int csrno) static RISCVException pmp(CPURISCVState *env, int csrno) { if (riscv_cpu_cfg(env)->pmp) { - if (csrno <= CSR_PMPCFG3) { + int max_pmpcfg = (env->priv_ver >= PRIV_VERSION_1_12_0) ? ++ CSR_PMPCFG15 : CSR_PMPCFG3; + + if (csrno <= max_pmpcfg) { uint32_t reg_index = csrno - CSR_PMPCFG0; /* TODO: RV128 restriction check */ @@ -830,13 +839,15 @@ static RISCVException seed(CPURISCVState *env, int csrno) } /* zicfiss CSR_SSP read and write */ -static int read_ssp(CPURISCVState *env, int csrno, target_ulong *val) +static RISCVException read_ssp(CPURISCVState *env, int csrno, + target_ulong *val) { *val = env->ssp; return RISCV_EXCP_NONE; } -static int write_ssp(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_ssp(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { env->ssp = val; return RISCV_EXCP_NONE; @@ -851,7 +862,7 @@ static RISCVException read_fflags(CPURISCVState *env, int csrno, } static RISCVException write_fflags(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) if (riscv_has_ext(env, RVF)) { @@ -870,7 +881,7 @@ static RISCVException read_frm(CPURISCVState *env, int csrno, } static RISCVException write_frm(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) if (riscv_has_ext(env, RVF)) { @@ -890,7 +901,7 @@ static RISCVException read_fcsr(CPURISCVState *env, int csrno, } static RISCVException write_fcsr(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) if (riscv_has_ext(env, RVF)) { @@ -942,7 +953,7 @@ static RISCVException read_vxrm(CPURISCVState *env, int csrno, } static RISCVException write_vxrm(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) env->mstatus |= MSTATUS_VS; @@ -959,7 +970,7 @@ static RISCVException read_vxsat(CPURISCVState *env, int csrno, } static RISCVException write_vxsat(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) env->mstatus |= MSTATUS_VS; @@ -976,7 +987,7 @@ static RISCVException read_vstart(CPURISCVState *env, int csrno, } static RISCVException write_vstart(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) env->mstatus |= MSTATUS_VS; @@ -997,7 +1008,7 @@ static RISCVException read_vcsr(CPURISCVState *env, int csrno, } static RISCVException write_vcsr(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { #if !defined(CONFIG_USER_ONLY) env->mstatus |= MSTATUS_VS; @@ -1055,7 +1066,7 @@ static RISCVException read_mcyclecfg(CPURISCVState *env, int csrno, } static RISCVException write_mcyclecfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t inh_avail_mask; @@ -1084,7 +1095,7 @@ static RISCVException read_mcyclecfgh(CPURISCVState *env, int csrno, } static RISCVException write_mcyclecfgh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { target_ulong inh_avail_mask = (target_ulong)(~MHPMEVENTH_FILTER_MASK | MCYCLECFGH_BIT_MINH); @@ -1109,7 +1120,7 @@ static RISCVException read_minstretcfg(CPURISCVState *env, int csrno, } static RISCVException write_minstretcfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t inh_avail_mask; @@ -1136,7 +1147,7 @@ static RISCVException read_minstretcfgh(CPURISCVState *env, int csrno, } static RISCVException write_minstretcfgh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { target_ulong inh_avail_mask = (target_ulong)(~MHPMEVENTH_FILTER_MASK | MINSTRETCFGH_BIT_MINH); @@ -1163,7 +1174,7 @@ static RISCVException read_mhpmevent(CPURISCVState *env, int csrno, } static RISCVException write_mhpmevent(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { int evt_index = csrno - CSR_MCOUNTINHIBIT; uint64_t mhpmevt_val = val; @@ -1201,7 +1212,7 @@ static RISCVException read_mhpmeventh(CPURISCVState *env, int csrno, } static RISCVException write_mhpmeventh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { int evt_index = csrno - CSR_MHPMEVENT3H + 3; uint64_t mhpmevth_val; @@ -1343,14 +1354,16 @@ static RISCVException riscv_pmu_write_ctrh(CPURISCVState *env, target_ulong val, return RISCV_EXCP_NONE; } -static int write_mhpmcounter(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mhpmcounter(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { int ctr_idx = csrno - CSR_MCYCLE; return riscv_pmu_write_ctr(env, val, ctr_idx); } -static int write_mhpmcounterh(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { int ctr_idx = csrno - CSR_MCYCLEH; @@ -1661,7 +1674,7 @@ static RISCVException read_vstimecmph(CPURISCVState *env, int csrno, } static RISCVException write_vstimecmp(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (riscv_cpu_mxl(env) == MXL_RV32) { env->vstimecmp = deposit64(env->vstimecmp, 0, 32, (uint64_t)val); @@ -1676,7 +1689,7 @@ static RISCVException write_vstimecmp(CPURISCVState *env, int csrno, } static RISCVException write_vstimecmph(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vstimecmp = deposit64(env->vstimecmp, 32, 32, (uint64_t)val); riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, @@ -1710,13 +1723,13 @@ static RISCVException read_stimecmph(CPURISCVState *env, int csrno, } static RISCVException write_stimecmp(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (env->virt_enabled) { if (env->hvictl & HVICTL_VTI) { return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } - return write_vstimecmp(env, csrno, val); + return write_vstimecmp(env, csrno, val, ra); } if (riscv_cpu_mxl(env) == MXL_RV32) { @@ -1731,13 +1744,13 @@ static RISCVException write_stimecmp(CPURISCVState *env, int csrno, } static RISCVException write_stimecmph(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (env->virt_enabled) { if (env->hvictl & HVICTL_VTI) { return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } - return write_vstimecmph(env, csrno, val); + return write_vstimecmph(env, csrno, val, ra); } env->stimecmp = deposit64(env->stimecmp, 32, 32, (uint64_t)val); @@ -1842,7 +1855,7 @@ static RISCVException read_zero(CPURISCVState *env, int csrno, } static RISCVException write_ignore(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return RISCV_EXCP_NONE; } @@ -1906,8 +1919,13 @@ static RISCVException read_mstatus(CPURISCVState *env, int csrno, static bool validate_vm(CPURISCVState *env, target_ulong vm) { - uint64_t mode_supported = riscv_cpu_cfg(env)->satp_mode.map; - return get_field(mode_supported, (1 << vm)); + bool rv32 = riscv_cpu_mxl(env) == MXL_RV32; + RISCVCPU *cpu = env_archcpu(env); + int satp_mode_supported_max = cpu->cfg.max_satp_mode; + const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64; + + assert(satp_mode_supported_max >= 0); + return vm <= satp_mode_supported_max && valid_vm[vm]; } static target_ulong legalize_xatp(CPURISCVState *env, target_ulong old_xatp, @@ -1963,7 +1981,7 @@ static target_ulong legalize_mpp(CPURISCVState *env, target_ulong old_mpp, } static RISCVException write_mstatus(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t mstatus = env->mstatus; uint64_t mask = 0; @@ -1987,7 +2005,8 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno, if (riscv_has_ext(env, RVF)) { mask |= MSTATUS_FS; } - if (riscv_has_ext(env, RVV)) { + + if (riscv_cpu_cfg(env)->ext_zve32x) { mask |= MSTATUS_VS; } @@ -2042,7 +2061,7 @@ static RISCVException read_mstatush(CPURISCVState *env, int csrno, } static RISCVException write_mstatush(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t valh = (uint64_t)val << 32; uint64_t mask = riscv_has_ext(env, RVH) ? MSTATUS_MPV | MSTATUS_GVA : 0; @@ -2095,8 +2114,21 @@ static RISCVException read_misa(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } +static target_ulong get_next_pc(CPURISCVState *env, uintptr_t ra) +{ + uint64_t data[INSN_START_WORDS]; + + /* Outside of a running cpu, env contains the next pc. */ + if (ra == 0 || !cpu_unwind_state_data(env_cpu(env), ra, data)) { + return env->pc; + } + + /* Within unwind data, [0] is pc and [1] is the opcode. */ + return data[0] + insn_len(data[1]); +} + static RISCVException write_misa(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { RISCVCPU *cpu = env_archcpu(env); uint32_t orig_misa_ext = env->misa_ext; @@ -2110,11 +2142,8 @@ static RISCVException write_misa(CPURISCVState *env, int csrno, /* Mask extensions that are not supported by this hart */ val &= env->misa_ext_mask; - /* - * Suppress 'C' if next instruction is not aligned - * TODO: this should check next_pc - */ - if ((val & RVC) && (GETPC() & ~3) != 0) { + /* Suppress 'C' if next instruction is not aligned. */ + if ((val & RVC) && (get_next_pc(env, ra) & 3) != 0) { val &= ~RVC; } @@ -2146,8 +2175,6 @@ static RISCVException write_misa(CPURISCVState *env, int csrno, env->mstatus &= ~MSTATUS_FS; } - /* flush translation cache */ - tb_flush(env_cpu(env)); env->xl = riscv_cpu_mxl(env); return RISCV_EXCP_NONE; } @@ -2160,7 +2187,7 @@ static RISCVException read_medeleg(CPURISCVState *env, int csrno, } static RISCVException write_medeleg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->medeleg = (env->medeleg & ~DELEGABLE_EXCPS) | (val & DELEGABLE_EXCPS); return RISCV_EXCP_NONE; @@ -2955,7 +2982,7 @@ static RISCVException read_mtvec(CPURISCVState *env, int csrno, } static RISCVException write_mtvec(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { /* bits [1:0] encode mode; 0 = direct, 1 = vectored, 2 >= reserved */ if ((val & 3) < 2) { @@ -2974,7 +3001,7 @@ static RISCVException read_mcountinhibit(CPURISCVState *env, int csrno, } static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { int cidx; PMUCTRState *counter; @@ -3049,10 +3076,9 @@ static RISCVException read_scountinhibit(CPURISCVState *env, int csrno, } static RISCVException write_scountinhibit(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { - write_mcountinhibit(env, csrno, val & env->mcounteren); - return RISCV_EXCP_NONE; + return write_mcountinhibit(env, csrno, val & env->mcounteren, ra); } static RISCVException read_mcounteren(CPURISCVState *env, int csrno, @@ -3063,7 +3089,7 @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno, } static RISCVException write_mcounteren(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { RISCVCPU *cpu = env_archcpu(env); @@ -3097,7 +3123,7 @@ static RISCVException read_mscratch(CPURISCVState *env, int csrno, } static RISCVException write_mscratch(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->mscratch = val; return RISCV_EXCP_NONE; @@ -3106,14 +3132,14 @@ static RISCVException write_mscratch(CPURISCVState *env, int csrno, static RISCVException read_mepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->mepc; + *val = env->mepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_mepc(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { - env->mepc = val; + env->mepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -3125,7 +3151,7 @@ static RISCVException read_mcause(CPURISCVState *env, int csrno, } static RISCVException write_mcause(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->mcause = val; return RISCV_EXCP_NONE; @@ -3139,7 +3165,7 @@ static RISCVException read_mtval(CPURISCVState *env, int csrno, } static RISCVException write_mtval(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->mtval = val; return RISCV_EXCP_NONE; @@ -3154,13 +3180,14 @@ static RISCVException read_menvcfg(CPURISCVState *env, int csrno, } static RISCVException write_henvcfg(CPURISCVState *env, int csrno, - target_ulong val); + target_ulong val, uintptr_t ra); static RISCVException write_menvcfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE | MENVCFG_CDE; + bool stce_changed = false; if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -3186,11 +3213,19 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if ((val & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (val & MENVCFG_STCE))) { + stce_changed = true; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); - write_henvcfg(env, CSR_HENVCFG, env->henvcfg); - return RISCV_EXCP_NONE; + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(val & MENVCFG_STCE)); + } + + return write_henvcfg(env, CSR_HENVCFG, env->henvcfg, ra); } static RISCVException read_menvcfgh(CPURISCVState *env, int csrno, @@ -3201,9 +3236,9 @@ static RISCVException read_menvcfgh(CPURISCVState *env, int csrno, } static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, - target_ulong val); + target_ulong val, uintptr_t ra); static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -3212,15 +3247,24 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, (cfg->ext_smcdeleg ? MENVCFG_CDE : 0) | (cfg->ext_ssdbltrp ? MENVCFG_DTE : 0); uint64_t valh = (uint64_t)val << 32; + bool stce_changed = false; + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (valh & MENVCFG_STCE))) { + stce_changed = true; + } if ((valh & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); - write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32); - return RISCV_EXCP_NONE; + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(valh & MENVCFG_STCE)); + } + + return write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32, ra); } static RISCVException read_senvcfg(CPURISCVState *env, int csrno, @@ -3238,7 +3282,7 @@ static RISCVException read_senvcfg(CPURISCVState *env, int csrno, } static RISCVException write_senvcfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE | SENVCFG_CBZE; RISCVException ret; @@ -3295,10 +3339,12 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, } static RISCVException write_henvcfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { @@ -3324,6 +3370,11 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, get_field(val, HENVCFG_PMM) != PMM_FIELD_RESERVED) { mask |= HENVCFG_PMM; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (val & HENVCFG_STCE))) { + stce_changed = true; + } } env->henvcfg = val & mask; @@ -3331,6 +3382,10 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, env->vsstatus &= ~MSTATUS_SDT; } + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3350,21 +3405,34 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, } static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE | HENVCFG_DTE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { return ret; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (valh & HENVCFG_STCE))) { + stce_changed = true; + } + env->henvcfg = (env->henvcfg & 0xFFFFFFFF) | (valh & mask); if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } + + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3388,7 +3456,7 @@ static RISCVException write_mstateen(CPURISCVState *env, int csrno, } static RISCVException write_mstateen0(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; if (!riscv_has_ext(env, RVF)) { @@ -3420,7 +3488,7 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno, } static RISCVException write_mstateen_1_3(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { return write_mstateen(env, csrno, SMSTATEEN_STATEEN, new_val); } @@ -3447,7 +3515,7 @@ static RISCVException write_mstateenh(CPURISCVState *env, int csrno, } static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; @@ -3463,7 +3531,7 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, } static RISCVException write_mstateenh_1_3(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { return write_mstateenh(env, csrno, SMSTATEEN_STATEEN, new_val); } @@ -3492,7 +3560,7 @@ static RISCVException write_hstateen(CPURISCVState *env, int csrno, } static RISCVException write_hstateen0(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; @@ -3521,7 +3589,7 @@ static RISCVException write_hstateen0(CPURISCVState *env, int csrno, } static RISCVException write_hstateen_1_3(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { return write_hstateen(env, csrno, SMSTATEEN_STATEEN, new_val); } @@ -3552,7 +3620,7 @@ static RISCVException write_hstateenh(CPURISCVState *env, int csrno, } static RISCVException write_hstateen0h(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; @@ -3564,7 +3632,7 @@ static RISCVException write_hstateen0h(CPURISCVState *env, int csrno, } static RISCVException write_hstateenh_1_3(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { return write_hstateenh(env, csrno, SMSTATEEN_STATEEN, new_val); } @@ -3603,7 +3671,7 @@ static RISCVException write_sstateen(CPURISCVState *env, int csrno, } static RISCVException write_sstateen0(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; @@ -3615,7 +3683,7 @@ static RISCVException write_sstateen0(CPURISCVState *env, int csrno, } static RISCVException write_sstateen_1_3(CPURISCVState *env, int csrno, - target_ulong new_val) + target_ulong new_val, uintptr_t ra) { return write_sstateen(env, csrno, SMSTATEEN_STATEEN, new_val); } @@ -3635,7 +3703,14 @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, if (riscv_cpu_cfg(env)->ext_sstc && (env->priv == PRV_M) && get_field(env->menvcfg, MENVCFG_STCE)) { /* sstc extension forbids STIP & VSTIP to be writeable in mip */ - mask = mask & ~(MIP_STIP | MIP_VSTIP); + + /* STIP is not writable when menvcfg.STCE is enabled. */ + mask = mask & ~MIP_STIP; + + /* VSTIP is not writable when both [mh]envcfg.STCE are enabled. */ + if (get_field(env->henvcfg, HENVCFG_STCE)) { + mask = mask & ~MIP_VSTIP; + } } if (mask) { @@ -3866,7 +3941,7 @@ static RISCVException read_sstatus(CPURISCVState *env, int csrno, } static RISCVException write_sstatus(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { target_ulong mask = (sstatus_v1_10_mask); @@ -3883,7 +3958,7 @@ static RISCVException write_sstatus(CPURISCVState *env, int csrno, mask |= SSTATUS_SDT; } target_ulong newval = (env->mstatus & ~mask) | (val & mask); - return write_mstatus(env, CSR_MSTATUS, newval); + return write_mstatus(env, CSR_MSTATUS, newval, ra); } static RISCVException rmw_vsie64(CPURISCVState *env, int csrno, @@ -4035,7 +4110,7 @@ static RISCVException read_stvec(CPURISCVState *env, int csrno, } static RISCVException write_stvec(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { /* bits [1:0] encode mode; 0 = direct, 1 = vectored, 2 >= reserved */ if ((val & 3) < 2) { @@ -4054,7 +4129,7 @@ static RISCVException read_scounteren(CPURISCVState *env, int csrno, } static RISCVException write_scounteren(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { RISCVCPU *cpu = env_archcpu(env); @@ -4088,7 +4163,7 @@ static RISCVException read_sscratch(CPURISCVState *env, int csrno, } static RISCVException write_sscratch(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->sscratch = val; return RISCV_EXCP_NONE; @@ -4097,14 +4172,14 @@ static RISCVException write_sscratch(CPURISCVState *env, int csrno, static RISCVException read_sepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->sepc; + *val = env->sepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_sepc(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { - env->sepc = val; + env->sepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -4116,7 +4191,7 @@ static RISCVException read_scause(CPURISCVState *env, int csrno, } static RISCVException write_scause(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->scause = val; return RISCV_EXCP_NONE; @@ -4130,7 +4205,7 @@ static RISCVException read_stval(CPURISCVState *env, int csrno, } static RISCVException write_stval(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->stval = val; return RISCV_EXCP_NONE; @@ -4270,7 +4345,7 @@ static RISCVException read_satp(CPURISCVState *env, int csrno, } static RISCVException write_satp(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (!riscv_cpu_cfg(env)->mmu) { return RISCV_EXCP_NONE; @@ -4492,7 +4567,7 @@ static RISCVException read_hstatus(CPURISCVState *env, int csrno, } static RISCVException write_hstatus(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t mask = (target_ulong)-1; if (!env_archcpu(env)->cfg.ext_svukte) { @@ -4524,7 +4599,7 @@ static RISCVException read_hedeleg(CPURISCVState *env, int csrno, } static RISCVException write_hedeleg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->hedeleg = val & vs_delegable_excps; return RISCV_EXCP_NONE; @@ -4545,7 +4620,7 @@ static RISCVException read_hedelegh(CPURISCVState *env, int csrno, } static RISCVException write_hedelegh(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { RISCVException ret; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13); @@ -4808,7 +4883,7 @@ static RISCVException read_hcounteren(CPURISCVState *env, int csrno, } static RISCVException write_hcounteren(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { RISCVCPU *cpu = env_archcpu(env); @@ -4828,7 +4903,7 @@ static RISCVException read_hgeie(CPURISCVState *env, int csrno, } static RISCVException write_hgeie(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { /* Only GEILEN:1 bits implemented and BIT0 is never implemented */ val &= ((((target_ulong)1) << env->geilen) - 1) << 1; @@ -4847,7 +4922,7 @@ static RISCVException read_htval(CPURISCVState *env, int csrno, } static RISCVException write_htval(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->htval = val; return RISCV_EXCP_NONE; @@ -4861,7 +4936,7 @@ static RISCVException read_htinst(CPURISCVState *env, int csrno, } static RISCVException write_htinst(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return RISCV_EXCP_NONE; } @@ -4883,7 +4958,7 @@ static RISCVException read_hgatp(CPURISCVState *env, int csrno, } static RISCVException write_hgatp(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->hgatp = legalize_xatp(env, env->hgatp, val); return RISCV_EXCP_NONE; @@ -4901,7 +4976,7 @@ static RISCVException read_htimedelta(CPURISCVState *env, int csrno, } static RISCVException write_htimedelta(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (!env->rdtime_fn) { return RISCV_EXCP_ILLEGAL_INST; @@ -4933,7 +5008,7 @@ static RISCVException read_htimedeltah(CPURISCVState *env, int csrno, } static RISCVException write_htimedeltah(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (!env->rdtime_fn) { return RISCV_EXCP_ILLEGAL_INST; @@ -4957,7 +5032,7 @@ static RISCVException read_hvictl(CPURISCVState *env, int csrno, } static RISCVException write_hvictl(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->hvictl = val & HVICTL_VALID_MASK; return RISCV_EXCP_NONE; @@ -5022,7 +5097,7 @@ static RISCVException read_hviprio1(CPURISCVState *env, int csrno, } static RISCVException write_hviprio1(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return write_hvipriox(env, 0, env->hviprio, val); } @@ -5034,7 +5109,7 @@ static RISCVException read_hviprio1h(CPURISCVState *env, int csrno, } static RISCVException write_hviprio1h(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return write_hvipriox(env, 4, env->hviprio, val); } @@ -5046,7 +5121,7 @@ static RISCVException read_hviprio2(CPURISCVState *env, int csrno, } static RISCVException write_hviprio2(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return write_hvipriox(env, 8, env->hviprio, val); } @@ -5058,7 +5133,7 @@ static RISCVException read_hviprio2h(CPURISCVState *env, int csrno, } static RISCVException write_hviprio2h(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { return write_hvipriox(env, 12, env->hviprio, val); } @@ -5072,7 +5147,7 @@ static RISCVException read_vsstatus(CPURISCVState *env, int csrno, } static RISCVException write_vsstatus(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint64_t mask = (target_ulong)-1; if ((val & VSSTATUS64_UXL) == 0) { @@ -5097,7 +5172,7 @@ static RISCVException read_vstvec(CPURISCVState *env, int csrno, } static RISCVException write_vstvec(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { /* bits [1:0] encode mode; 0 = direct, 1 = vectored, 2 >= reserved */ if ((val & 3) < 2) { @@ -5116,7 +5191,7 @@ static RISCVException read_vsscratch(CPURISCVState *env, int csrno, } static RISCVException write_vsscratch(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vsscratch = val; return RISCV_EXCP_NONE; @@ -5130,7 +5205,7 @@ static RISCVException read_vsepc(CPURISCVState *env, int csrno, } static RISCVException write_vsepc(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vsepc = val; return RISCV_EXCP_NONE; @@ -5144,7 +5219,7 @@ static RISCVException read_vscause(CPURISCVState *env, int csrno, } static RISCVException write_vscause(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vscause = val; return RISCV_EXCP_NONE; @@ -5158,7 +5233,7 @@ static RISCVException read_vstval(CPURISCVState *env, int csrno, } static RISCVException write_vstval(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vstval = val; return RISCV_EXCP_NONE; @@ -5172,7 +5247,7 @@ static RISCVException read_vsatp(CPURISCVState *env, int csrno, } static RISCVException write_vsatp(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->vsatp = legalize_xatp(env, env->vsatp, val); return RISCV_EXCP_NONE; @@ -5186,7 +5261,7 @@ static RISCVException read_mtval2(CPURISCVState *env, int csrno, } static RISCVException write_mtval2(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->mtval2 = val; return RISCV_EXCP_NONE; @@ -5200,7 +5275,7 @@ static RISCVException read_mtinst(CPURISCVState *env, int csrno, } static RISCVException write_mtinst(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->mtinst = val; return RISCV_EXCP_NONE; @@ -5215,7 +5290,7 @@ static RISCVException read_mseccfg(CPURISCVState *env, int csrno, } static RISCVException write_mseccfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { mseccfg_csr_write(env, val); return RISCV_EXCP_NONE; @@ -5231,7 +5306,7 @@ static RISCVException read_pmpcfg(CPURISCVState *env, int csrno, } static RISCVException write_pmpcfg(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { uint32_t reg_index = csrno - CSR_PMPCFG0; @@ -5247,7 +5322,7 @@ static RISCVException read_pmpaddr(CPURISCVState *env, int csrno, } static RISCVException write_pmpaddr(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { pmpaddr_csr_write(env, csrno - CSR_PMPADDR0, val); return RISCV_EXCP_NONE; @@ -5261,7 +5336,7 @@ static RISCVException read_tselect(CPURISCVState *env, int csrno, } static RISCVException write_tselect(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { tselect_csr_write(env, val); return RISCV_EXCP_NONE; @@ -5285,7 +5360,7 @@ static RISCVException read_tdata(CPURISCVState *env, int csrno, } static RISCVException write_tdata(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { if (!tdata_available(env, csrno - CSR_TDATA1)) { return RISCV_EXCP_ILLEGAL_INST; @@ -5310,7 +5385,7 @@ static RISCVException read_mcontext(CPURISCVState *env, int csrno, } static RISCVException write_mcontext(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false; int32_t mask; @@ -5334,43 +5409,50 @@ static RISCVException read_mnscratch(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } -static int write_mnscratch(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mnscratch(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { env->mnscratch = val; return RISCV_EXCP_NONE; } -static int read_mnepc(CPURISCVState *env, int csrno, target_ulong *val) +static RISCVException read_mnepc(CPURISCVState *env, int csrno, + target_ulong *val) { *val = env->mnepc; return RISCV_EXCP_NONE; } -static int write_mnepc(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mnepc(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { env->mnepc = val; return RISCV_EXCP_NONE; } -static int read_mncause(CPURISCVState *env, int csrno, target_ulong *val) +static RISCVException read_mncause(CPURISCVState *env, int csrno, + target_ulong *val) { *val = env->mncause; return RISCV_EXCP_NONE; } -static int write_mncause(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mncause(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { env->mncause = val; return RISCV_EXCP_NONE; } -static int read_mnstatus(CPURISCVState *env, int csrno, target_ulong *val) +static RISCVException read_mnstatus(CPURISCVState *env, int csrno, + target_ulong *val) { *val = env->mnstatus; return RISCV_EXCP_NONE; } -static int write_mnstatus(CPURISCVState *env, int csrno, target_ulong val) +static RISCVException write_mnstatus(CPURISCVState *env, int csrno, + target_ulong val, uintptr_t ra) { target_ulong mask = (MNSTATUS_NMIE | MNSTATUS_MNPP); @@ -5498,7 +5580,7 @@ static inline RISCVException riscv_csrrw_check(CPURISCVState *env, csr_priv = get_field(csrno, 0x300); if (!env->debugger && (effective_priv < csr_priv)) { - if (csr_priv == (PRV_S + 1) && env->virt_enabled) { + if (csr_priv <= (PRV_S + 1) && env->virt_enabled) { return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } return RISCV_EXCP_ILLEGAL_INST; @@ -5510,7 +5592,8 @@ static inline RISCVException riscv_csrrw_check(CPURISCVState *env, static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, target_ulong *ret_value, target_ulong new_value, - target_ulong write_mask) + target_ulong write_mask, + uintptr_t ra) { RISCVException ret; target_ulong old_value = 0; @@ -5540,7 +5623,7 @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, if (write_mask) { new_value = (old_value & ~write_mask) | (new_value & write_mask); if (csr_ops[csrno].write) { - ret = csr_ops[csrno].write(env, csrno, new_value); + ret = csr_ops[csrno].write(env, csrno, new_value, ra); if (ret != RISCV_EXCP_NONE) { return ret; } @@ -5563,25 +5646,25 @@ RISCVException riscv_csrr(CPURISCVState *env, int csrno, return ret; } - return riscv_csrrw_do64(env, csrno, ret_value, 0, 0); + return riscv_csrrw_do64(env, csrno, ret_value, 0, 0, 0); } RISCVException riscv_csrrw(CPURISCVState *env, int csrno, - target_ulong *ret_value, - target_ulong new_value, target_ulong write_mask) + target_ulong *ret_value, target_ulong new_value, + target_ulong write_mask, uintptr_t ra) { RISCVException ret = riscv_csrrw_check(env, csrno, true); if (ret != RISCV_EXCP_NONE) { return ret; } - return riscv_csrrw_do64(env, csrno, ret_value, new_value, write_mask); + return riscv_csrrw_do64(env, csrno, ret_value, new_value, write_mask, ra); } static RISCVException riscv_csrrw_do128(CPURISCVState *env, int csrno, Int128 *ret_value, Int128 new_value, - Int128 write_mask) + Int128 write_mask, uintptr_t ra) { RISCVException ret; Int128 old_value; @@ -5603,7 +5686,7 @@ static RISCVException riscv_csrrw_do128(CPURISCVState *env, int csrno, } } else if (csr_ops[csrno].write) { /* avoids having to write wrappers for all registers */ - ret = csr_ops[csrno].write(env, csrno, int128_getlo(new_value)); + ret = csr_ops[csrno].write(env, csrno, int128_getlo(new_value), ra); if (ret != RISCV_EXCP_NONE) { return ret; } @@ -5630,7 +5713,7 @@ RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno, if (csr_ops[csrno].read128) { return riscv_csrrw_do128(env, csrno, ret_value, - int128_zero(), int128_zero()); + int128_zero(), int128_zero(), 0); } /* @@ -5641,9 +5724,7 @@ RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno, * accesses */ target_ulong old_value; - ret = riscv_csrrw_do64(env, csrno, &old_value, - (target_ulong)0, - (target_ulong)0); + ret = riscv_csrrw_do64(env, csrno, &old_value, 0, 0, 0); if (ret == RISCV_EXCP_NONE && ret_value) { *ret_value = int128_make64(old_value); } @@ -5651,8 +5732,8 @@ RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno, } RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno, - Int128 *ret_value, - Int128 new_value, Int128 write_mask) + Int128 *ret_value, Int128 new_value, + Int128 write_mask, uintptr_t ra) { RISCVException ret; @@ -5662,7 +5743,8 @@ RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno, } if (csr_ops[csrno].read128) { - return riscv_csrrw_do128(env, csrno, ret_value, new_value, write_mask); + return riscv_csrrw_do128(env, csrno, ret_value, + new_value, write_mask, ra); } /* @@ -5675,7 +5757,7 @@ RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno, target_ulong old_value; ret = riscv_csrrw_do64(env, csrno, &old_value, int128_getlo(new_value), - int128_getlo(write_mask)); + int128_getlo(write_mask), ra); if (ret == RISCV_EXCP_NONE && ret_value) { *ret_value = int128_make64(old_value); } @@ -5698,7 +5780,7 @@ RISCVException riscv_csrrw_debug(CPURISCVState *env, int csrno, if (!write_mask) { ret = riscv_csrr(env, csrno, ret_value); } else { - ret = riscv_csrrw(env, csrno, ret_value, new_value, write_mask); + ret = riscv_csrrw(env, csrno, ret_value, new_value, write_mask, 0); } #if !defined(CONFIG_USER_ONLY) env->debugger = false; @@ -5714,7 +5796,7 @@ static RISCVException read_jvt(CPURISCVState *env, int csrno, } static RISCVException write_jvt(CPURISCVState *env, int csrno, - target_ulong val) + target_ulong val, uintptr_t ra) { env->jvt = val; return RISCV_EXCP_NONE; @@ -5783,8 +5865,8 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { NULL, read_mstatus_i128 }, [CSR_MISA] = { "misa", any, read_misa, write_misa, NULL, read_misa_i128 }, - [CSR_MIDELEG] = { "mideleg", any, NULL, NULL, rmw_mideleg }, - [CSR_MEDELEG] = { "medeleg", any, read_medeleg, write_medeleg }, + [CSR_MIDELEG] = { "mideleg", smode, NULL, NULL, rmw_mideleg }, + [CSR_MEDELEG] = { "medeleg", smode, read_medeleg, write_medeleg }, [CSR_MIE] = { "mie", any, NULL, NULL, rmw_mie }, [CSR_MTVEC] = { "mtvec", any, read_mtvec, write_mtvec }, [CSR_MCOUNTEREN] = { "mcounteren", umode, read_mcounteren, @@ -5792,7 +5874,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { "mstatush", any32, read_mstatush, write_mstatush }, - [CSR_MEDELEGH] = { "medelegh", any32, read_zero, write_ignore, + [CSR_MEDELEGH] = { "medelegh", smode32, read_zero, write_ignore, .min_priv_ver = PRIV_VERSION_1_13_0 }, [CSR_HEDELEGH] = { "hedelegh", hmode32, read_hedelegh, write_hedelegh, .min_priv_ver = PRIV_VERSION_1_13_0 }, @@ -5832,7 +5914,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MVIP] = { "mvip", aia_any, NULL, NULL, rmw_mvip }, /* Machine-Level High-Half CSRs (AIA) */ - [CSR_MIDELEGH] = { "midelegh", aia_any32, NULL, NULL, rmw_midelegh }, + [CSR_MIDELEGH] = { "midelegh", aia_smode32, NULL, NULL, rmw_midelegh }, [CSR_MIEH] = { "mieh", aia_any32, NULL, NULL, rmw_mieh }, [CSR_MVIENH] = { "mvienh", aia_any32, NULL, NULL, rmw_mvienh }, [CSR_MVIPH] = { "mviph", aia_any32, NULL, NULL, rmw_mviph }, @@ -6088,6 +6170,30 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPCFG1] = { "pmpcfg1", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG2] = { "pmpcfg2", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG3] = { "pmpcfg3", pmp, read_pmpcfg, write_pmpcfg }, + [CSR_PMPCFG4] = { "pmpcfg4", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG5] = { "pmpcfg5", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG6] = { "pmpcfg6", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG7] = { "pmpcfg7", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG8] = { "pmpcfg8", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG9] = { "pmpcfg9", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG10] = { "pmpcfg10", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG11] = { "pmpcfg11", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG12] = { "pmpcfg12", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG13] = { "pmpcfg13", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG14] = { "pmpcfg14", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG15] = { "pmpcfg15", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_PMPADDR0] = { "pmpaddr0", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR1] = { "pmpaddr1", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR2] = { "pmpaddr2", pmp, read_pmpaddr, write_pmpaddr }, @@ -6102,8 +6208,104 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPADDR11] = { "pmpaddr11", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR12] = { "pmpaddr12", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR13] = { "pmpaddr13", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR16] = { "pmpaddr16", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR17] = { "pmpaddr17", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR18] = { "pmpaddr18", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR19] = { "pmpaddr19", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR20] = { "pmpaddr20", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR21] = { "pmpaddr21", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR22] = { "pmpaddr22", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR23] = { "pmpaddr23", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR24] = { "pmpaddr24", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR25] = { "pmpaddr25", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR26] = { "pmpaddr26", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR27] = { "pmpaddr27", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR28] = { "pmpaddr28", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR29] = { "pmpaddr29", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR30] = { "pmpaddr30", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR31] = { "pmpaddr31", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR32] = { "pmpaddr32", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR33] = { "pmpaddr33", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR34] = { "pmpaddr34", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR35] = { "pmpaddr35", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR36] = { "pmpaddr36", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR37] = { "pmpaddr37", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR38] = { "pmpaddr38", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR39] = { "pmpaddr39", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR40] = { "pmpaddr40", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR41] = { "pmpaddr41", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR42] = { "pmpaddr42", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR43] = { "pmpaddr43", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR44] = { "pmpaddr44", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR45] = { "pmpaddr45", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR46] = { "pmpaddr46", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR47] = { "pmpaddr47", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR48] = { "pmpaddr48", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR49] = { "pmpaddr49", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR50] = { "pmpaddr50", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR51] = { "pmpaddr51", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR52] = { "pmpaddr52", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR53] = { "pmpaddr53", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR54] = { "pmpaddr54", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR55] = { "pmpaddr55", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR56] = { "pmpaddr56", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR57] = { "pmpaddr57", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR58] = { "pmpaddr58", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR59] = { "pmpaddr59", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR60] = { "pmpaddr60", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR61] = { "pmpaddr61", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR62] = { "pmpaddr62", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR63] = { "pmpaddr63", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, /* Debug CSRs */ [CSR_TSELECT] = { "tselect", debug, read_tselect, write_tselect }, diff --git a/target/riscv/debug.c b/target/riscv/debug.c index 9db4048..5664466 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -28,9 +28,10 @@ #include "qapi/error.h" #include "cpu.h" #include "trace.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "exec/watchpoint.h" #include "system/cpu-timers.h" +#include "exec/icount.h" /* * The following M-mode trigger CSRs are implemented: @@ -551,8 +552,6 @@ static void type2_reg_write(CPURISCVState *env, target_ulong index, default: g_assert_not_reached(); } - - return; } /* type 6 trigger */ @@ -667,8 +666,6 @@ static void type6_reg_write(CPURISCVState *env, target_ulong index, default: g_assert_not_reached(); } - - return; } /* icount trigger type */ @@ -849,8 +846,6 @@ static void itrigger_reg_write(CPURISCVState *env, target_ulong index, default: g_assert_not_reached(); } - - return; } static int itrigger_get_adjust_count(CPURISCVState *env) diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 91b1a56..af40561 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "internals.h" @@ -756,6 +755,6 @@ uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1) uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(env, rs1); + float16 frs1 = check_nanbox_bf16(env, rs1); return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status)); } diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c index 18e88f4..1934f91 100644 --- a/target/riscv/gdbstub.c +++ b/target/riscv/gdbstub.c @@ -62,7 +62,7 @@ int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) return 0; } - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: return gdb_get_reg32(mem_buf, tmp); case MXL_RV64: @@ -82,7 +82,7 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) int length = 0; target_ulong tmp; - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: tmp = (int32_t)ldl_p(mem_buf); length = 4; @@ -359,7 +359,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs) ricsv_gen_dynamic_vector_feature(cs, cs->gdb_num_regs), 0); } - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: gdb_register_coprocessor(cs, riscv_gdb_get_virtual, riscv_gdb_set_virtual, diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 85d73e4..b785456 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -159,7 +159,7 @@ DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl) #endif /* Vector functions */ -DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_4(vsetvl, tl, env, tl, tl, tl) DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32) @@ -1101,14 +1101,14 @@ DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfslide1up_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfslide1up_vf_w, void, ptr, ptr, i64, ptr, env, i32) @@ -1284,3 +1284,8 @@ DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) + +/* CFI (zicfiss) helpers */ +#ifndef CONFIG_USER_ONLY +DEF_HELPER_1(ssamoswap_disabled, void, env) +#endif diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 6d1a13c..cd23b1f 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -703,14 +703,14 @@ vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm # Vector widening ordered and unordered float reduction sum vfwredusum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm -vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r -vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r -vmandn_mm 011000 - ..... ..... 010 ..... 1010111 @r -vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r -vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r -vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r -vmorn_mm 011100 - ..... ..... 010 ..... 1010111 @r -vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r +vmand_mm 011001 1 ..... ..... 010 ..... 1010111 @r +vmnand_mm 011101 1 ..... ..... 010 ..... 1010111 @r +vmandn_mm 011000 1 ..... ..... 010 ..... 1010111 @r +vmxor_mm 011011 1 ..... ..... 010 ..... 1010111 @r +vmor_mm 011010 1 ..... ..... 010 ..... 1010111 @r +vmnor_mm 011110 1 ..... ..... 010 ..... 1010111 @r +vmorn_mm 011100 1 ..... ..... 010 ..... 1010111 @r +vmxnor_mm 011111 1 ..... ..... 010 ..... 1010111 @r vcpop_m 010000 . ..... 10000 010 ..... 1010111 @r2_vm vfirst_m 010000 . ..... 10001 010 ..... 1010111 @r2_vm vmsbf_m 010100 . ..... 00001 010 ..... 1010111 @r2_vm @@ -732,7 +732,7 @@ vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm vrgatherei16_vv 001110 . ..... ..... 000 ..... 1010111 @r_vm vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm -vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r +vcompress_vm 010111 1 ..... ..... 010 ..... 1010111 @r vmv1r_v 100111 1 ..... 00000 011 ..... 1010111 @r2rd vmv2r_v 100111 1 ..... 00001 011 ..... 1010111 @r2rd vmv4r_v 100111 1 ..... 00011 011 ..... 1010111 @r2rd diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc index 0a9cd1e..066dc36 100644 --- a/target/riscv/insn_trans/trans_rvbf16.c.inc +++ b/target/riscv/insn_trans/trans_rvbf16.c.inc @@ -119,8 +119,11 @@ static bool trans_vfwmaccbf16_vv(DisasContext *ctx, arg_vfwmaccbf16_vv *a) REQUIRE_FPU; REQUIRE_ZVFBFWMA(ctx); + uint8_t sew = ctx->sew; if (require_rvv(ctx) && vext_check_isa_ill(ctx) && (ctx->sew == MO_16) && - vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm)) { + vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs1, sew, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) { uint32_t data = 0; gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN); @@ -146,8 +149,10 @@ static bool trans_vfwmaccbf16_vf(DisasContext *ctx, arg_vfwmaccbf16_vf *a) REQUIRE_FPU; REQUIRE_ZVFBFWMA(ctx); + uint8_t sew = ctx->sew; if (require_rvv(ctx) && (ctx->sew == MO_16) && vext_check_isa_ill(ctx) && - vext_check_ds(ctx, a->rd, a->rs2, a->vm)) { + vext_check_ds(ctx, a->rd, a->rs2, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) { uint32_t data = 0; gen_set_rm(ctx, RISCV_FRM_DYN); diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index b9883a5..f4b5460 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s) } } -/* Destination vector register group cannot overlap source mask register. */ -static bool require_vm(int vm, int vd) +/* + * Source and destination vector register groups cannot overlap source mask + * register: + * + * A vector register cannot be used to provide source operands with more than + * one EEW for a single instruction. A mask register source is considered to + * have EEW=1 for this constraint. An encoding that would result in the same + * vector register being read with two or more different EEWs, including when + * the vector register appears at different positions within two or more vector + * register groups, is reserved. + * (Section 5.2) + * + * A destination vector register group can overlap a source vector + * register group only if one of the following holds: + * 1. The destination EEW equals the source EEW. + * 2. The destination EEW is smaller than the source EEW and the overlap + * is in the lowest-numbered part of the source register group. + * 3. The destination EEW is greater than the source EEW, the source EMUL + * is at least 1, and the overlap is in the highest-numbered part of + * the destination register group. + * For the purpose of determining register group overlap constraints, mask + * elements have EEW=1. + * (Section 5.2) + */ +static bool require_vm(int vm, int v) { - return (vm != 0 || vd != 0); + return (vm != 0 || v != 0); } static bool require_nf(int vd, int nf, int lmul) @@ -179,7 +202,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -199,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); @@ -356,11 +379,41 @@ static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, return ret; } +/* + * Check whether a vector register is used to provide source operands with + * more than one EEW for the vector instruction. + * Returns true if the instruction has valid encoding + * Returns false if encoding violates the mismatched input EEWs constraint + */ +static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1, + int vs2, uint8_t eew_vs2, int vm) +{ + bool is_valid = true; + int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul; + int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul; + + /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */ + if ((vs1 != -1 && !require_vm(vm, vs1)) || + (vs2 != -1 && !require_vm(vm, vs2))) { + is_valid = false; + } + + /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */ + if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) && + is_overlapped(vs1, 1 << MAX(emul_vs1, 0), + vs2, 1 << MAX(emul_vs2, 0))) { + is_valid = false; + } + + return is_valid; +} + static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) { return require_vm(vm, vd) && require_align(vd, s->lmul) && - require_align(vs, s->lmul); + require_align(vs, s->lmul) && + vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm); } /* @@ -379,6 +432,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ss(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul); } @@ -474,6 +528,7 @@ static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew, -1, 0, vm) && require_align(vs, s->lmul) && require_noover(vd, s->lmul + 1, vs, s->lmul); } @@ -481,6 +536,7 @@ static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) && require_align(vs, s->lmul + 1); } @@ -499,6 +555,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul) && require_noover(vd, s->lmul + 1, vs1, s->lmul); } @@ -521,12 +578,14 @@ static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs1, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs2, s->lmul + 1); } static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) { - bool ret = vext_narrow_check_common(s, vd, vs, vm); + bool ret = vext_narrow_check_common(s, vd, vs, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm); if (vd != vs) { ret &= require_noover(vd, s->lmul, vs, s->lmul + 1); } @@ -549,6 +608,7 @@ static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_sd(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs1, s->lmul); } @@ -584,7 +644,9 @@ static bool vext_check_slide(DisasContext *s, int vd, int vs2, { bool ret = require_align(vs2, s->lmul) && require_align(vd, s->lmul) && - require_vm(vm, vd); + require_vm(vm, vd) && + vext_check_input_eew(s, -1, 0, vs2, s->sew, vm); + if (is_over) { ret &= (vd != vs2); } @@ -981,7 +1043,8 @@ static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew); + vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) && + vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check) @@ -1033,7 +1096,8 @@ static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_st_index(s, a->rd, a->rs2, a->nf, eew); + vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) && + vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check) @@ -1063,6 +1127,12 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } @@ -1100,25 +1170,86 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, - gen_helper_ldst_whole *fn, - DisasContext *s) + uint32_t log2_esz, gen_helper_ldst_whole *fn, + DisasContext *s, bool is_load) { - TCGv_ptr dest; - TCGv base; - TCGv_i32 desc; - - uint32_t data = FIELD_DP32(0, VDATA, NF, nf); - data = FIELD_DP32(data, VDATA, VM, 1); - dest = tcg_temp_new_ptr(); - desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, - s->cfg_ptr->vlenb, data)); - - base = get_gpr(s, rs1, EXT_NONE); - tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); - mark_vs_dirty(s); - fn(dest, base, tcg_env, desc); + /* + * Load/store multiple bytes per iteration. + * When possible do this atomically. + * Update vstart with the number of processed elements. + * Use the helper function if either: + * - vstart is not 0. + * - the target has 32 bit registers and we are loading/storing 64 bit long + * elements. This is to ensure that we process every element with a single + * memory instruction. + */ + + bool use_helper_fn = !(s->vstart_eq_zero) || + (TCG_TARGET_REG_BITS == 32 && log2_esz == 3); + + if (!use_helper_fn) { + TCGv addr = tcg_temp_new(); + uint32_t size = s->cfg_ptr->vlenb * nf; + TCGv_i64 t8 = tcg_temp_new_i64(); + TCGv_i32 t4 = tcg_temp_new_i32(); + MemOp atomicity = MO_ATOM_NONE; + if (log2_esz == 0) { + atomicity = MO_ATOM_NONE; + } else { + atomicity = MO_ATOM_IFALIGN_PAIR; + } + if (TCG_TARGET_REG_BITS == 64) { + for (int i = 0; i < size; i += 8) { + addr = get_address(s, rs1, i); + if (is_load) { + tcg_gen_qemu_ld_i64(t8, addr, s->mem_idx, + MO_LE | MO_64 | atomicity); + tcg_gen_st_i64(t8, tcg_env, vreg_ofs(s, vd) + i); + } else { + tcg_gen_ld_i64(t8, tcg_env, vreg_ofs(s, vd) + i); + tcg_gen_qemu_st_i64(t8, addr, s->mem_idx, + MO_LE | MO_64 | atomicity); + } + if (i == size - 8) { + tcg_gen_movi_tl(cpu_vstart, 0); + } else { + tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz); + } + } + } else { + for (int i = 0; i < size; i += 4) { + addr = get_address(s, rs1, i); + if (is_load) { + tcg_gen_qemu_ld_i32(t4, addr, s->mem_idx, + MO_LE | MO_32 | atomicity); + tcg_gen_st_i32(t4, tcg_env, vreg_ofs(s, vd) + i); + } else { + tcg_gen_ld_i32(t4, tcg_env, vreg_ofs(s, vd) + i); + tcg_gen_qemu_st_i32(t4, addr, s->mem_idx, + MO_LE | MO_32 | atomicity); + } + if (i == size - 4) { + tcg_gen_movi_tl(cpu_vstart, 0); + } else { + tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz); + } + } + } + } else { + TCGv_ptr dest; + TCGv base; + TCGv_i32 desc; + uint32_t data = FIELD_DP32(0, VDATA, NF, nf); + data = FIELD_DP32(data, VDATA, VM, 1); + dest = tcg_temp_new_ptr(); + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, + s->cfg_ptr->vlenb, data)); + base = get_gpr(s, rs1, EXT_NONE); + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); + fn(dest, base, tcg_env, desc); + } finalize_rvv_inst(s); return true; @@ -1128,42 +1259,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, * load and store whole register instructions ignore vtype and vl setting. * Thus, we don't need to check vill bit. (Section 7.9) */ -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \ -static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ -{ \ - if (require_rvv(s) && \ - QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ - return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \ - gen_helper_##NAME, s); \ - } \ - return false; \ -} - -GEN_LDST_WHOLE_TRANS(vl1re8_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1) -GEN_LDST_WHOLE_TRANS(vl2re8_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2) -GEN_LDST_WHOLE_TRANS(vl4re8_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4) -GEN_LDST_WHOLE_TRANS(vl8re8_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8) +#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \ +static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ +{ \ + if (require_rvv(s) && \ + QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ + return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \ + gen_helper_##NAME, s, IS_LOAD); \ + } \ + return false; \ +} + +GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true) /* * The vector whole register store instructions are encoded similar to * unmasked unit-stride store of elements with EEW=8. */ -GEN_LDST_WHOLE_TRANS(vs1r_v, 1) -GEN_LDST_WHOLE_TRANS(vs2r_v, 2) -GEN_LDST_WHOLE_TRANS(vs4r_v, 4) -GEN_LDST_WHOLE_TRANS(vs8r_v, 8) +GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false) +GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false) +GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false) +GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false) /* *** Vector Integer Arithmetic Instructions @@ -1475,6 +1606,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +/* OPIVV with overwrite and WIDEN */ +static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, gen_helper_gvec_4_ptr *fn, bool (*checkfn)(DisasContext *, arg_rmrr *)) @@ -1522,6 +1663,14 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + #define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ @@ -1993,13 +2142,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check) GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) /* Vector Widening Integer Multiply-Add Instructions */ -GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) @@ -2340,6 +2489,17 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVV with WIDEN */ #define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2379,11 +2539,21 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVF with WIDEN */ -#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ - if (opfvf_widen_check(s, a)) { \ + if (CHECK(s, a)) { \ uint32_t data = 0; \ static gen_helper_opfvf *const fns[2] = { \ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ @@ -2399,8 +2569,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ return false; \ } -GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) -GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) { @@ -2482,7 +2652,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) @@ -2503,14 +2673,14 @@ GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ -GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) /* Vector Floating-Point Square-Root Instruction */ @@ -3391,7 +3561,6 @@ static bool slideup_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslideup_vx, slideup_check) -GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check) static bool slidedown_check(DisasContext *s, arg_rmrr *a) @@ -3402,9 +3571,56 @@ static bool slidedown_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check) -GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check) GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check) +typedef void gen_helper_vslide1_vx(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +#define GEN_OPIVX_VSLIDE1_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_vslide1_vx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + TCGv_ptr dest, src2, mask; \ + TCGv_i64 src1; \ + TCGv_i32 desc; \ + uint32_t data = 0; \ + \ + dest = tcg_temp_new_ptr(); \ + mask = tcg_temp_new_ptr(); \ + src2 = tcg_temp_new_ptr(); \ + src1 = tcg_temp_new_i64(); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data)); \ + \ + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); \ + tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); \ + tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); \ + \ + fns[s->sew](dest, mask, src1, src2, tcg_env, desc); \ + \ + tcg_gen_movi_tl(cpu_vstart, 0); \ + finalize_rvv_inst(s); \ + \ + return true; \ + } \ + return false; \ +} + +GEN_OPIVX_VSLIDE1_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVX_VSLIDE1_TRANS(vslide1down_vx, slidedown_check) + /* Vector Floating-Point Slide Instructions */ static bool fslideup_check(DisasContext *s, arg_rmrr *a) { @@ -3426,6 +3642,7 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs1, s->lmul) && require_align(a->rs2, s->lmul) && @@ -3438,6 +3655,7 @@ static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) int8_t emul = MO_16 - s->sew + s->lmul; return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) && (emul >= -3 && emul <= 3) && require_align(a->rd, s->lmul) && require_align(a->rs1, emul) && @@ -3457,6 +3675,7 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul) && (a->rd != a->rs2) && @@ -3600,7 +3819,9 @@ static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div) require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul - div) && require_vm(a->vm, a->rd) && - require_noover(a->rd, s->lmul, a->rs2, s->lmul - div); + require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) && + vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm); + return ret; } diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc b/target/riscv/insn_trans/trans_rvzce.c.inc index c77c2b9..dd15af0 100644 --- a/target/riscv/insn_trans/trans_rvzce.c.inc +++ b/target/riscv/insn_trans/trans_rvzce.c.inc @@ -88,13 +88,13 @@ static bool trans_c_lbu(DisasContext *ctx, arg_c_lbu *a) static bool trans_c_lhu(DisasContext *ctx, arg_c_lhu *a) { REQUIRE_ZCB(ctx); - return gen_load(ctx, a, MO_UW); + return gen_load(ctx, a, MO_TEUW); } static bool trans_c_lh(DisasContext *ctx, arg_c_lh *a) { REQUIRE_ZCB(ctx); - return gen_load(ctx, a, MO_SW); + return gen_load(ctx, a, MO_TESW); } static bool trans_c_sb(DisasContext *ctx, arg_c_sb *a) @@ -106,7 +106,7 @@ static bool trans_c_sb(DisasContext *ctx, arg_c_sb *a) static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a) { REQUIRE_ZCB(ctx); - return gen_store(ctx, a, MO_UW); + return gen_store(ctx, a, MO_TEUW); } #define X_S0 8 diff --git a/target/riscv/insn_trans/trans_rvzicfiss.c.inc b/target/riscv/insn_trans/trans_rvzicfiss.c.inc index b0096ad..f4a1c12 100644 --- a/target/riscv/insn_trans/trans_rvzicfiss.c.inc +++ b/target/riscv/insn_trans/trans_rvzicfiss.c.inc @@ -40,6 +40,7 @@ static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a) tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip); tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_update_pc(ctx, 0); gen_helper_raise_exception(tcg_env, tcg_constant_i32(RISCV_EXCP_SW_CHECK)); gen_set_label(skip); @@ -90,7 +91,11 @@ static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a) } if (!ctx->bcfi_enabled) { +#ifndef CONFIG_USER_ONLY + gen_helper_ssamoswap_disabled(tcg_env); +#else return false; +#endif } TCGv dest = dest_gpr(ctx, a->rd); @@ -115,7 +120,11 @@ static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a) } if (!ctx->bcfi_enabled) { +#ifndef CONFIG_USER_ONLY + gen_helper_ssamoswap_disabled(tcg_env); +#else return false; +#endif } TCGv dest = dest_gpr(ctx, a->rd); diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 213aff3..172296f 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -142,6 +142,33 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) } } +static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) +{ + /* Disable nanbox check when enable zfinx */ + if (env_archcpu(env)->cfg.ext_zfinx) { + return (uint16_t)f; + } + + uint64_t mask = MAKE_64BIT_MASK(16, 48); + + if (likely((f & mask) == mask)) { + return (uint16_t)f; + } else { + return 0x7FC0u; /* default qnan */ + } +} + +static inline target_ulong get_xepc_mask(CPURISCVState *env) +{ + /* When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C extension), only bit 0 must be zero. */ + if (riscv_has_ext(env, RVC)) { + return ~(target_ulong)1; + } else { + return ~(target_ulong)3; + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); @@ -201,4 +228,9 @@ static inline target_ulong adjust_addr_virt(CPURISCVState *env, return adjust_addr_body(env, addr, true); } +static inline int insn_len(uint16_t first_word) +{ + return (first_word & 3) == 3 ? 4 : 2; +} + #endif diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 0f4997a..75ca3fb 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -35,7 +35,7 @@ #include "accel/accel-cpu-target.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "hw/boards.h" #include "hw/irq.h" #include "hw/intc/riscv_imsic.h" @@ -58,33 +58,17 @@ void riscv_kvm_aplic_request(void *opaque, int irq, int level) static bool cap_has_mp_state; -static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type, - uint64_t idx) -{ - uint64_t id = KVM_REG_RISCV | type | idx; - - switch (riscv_cpu_mxl(env)) { - case MXL_RV32: - id |= KVM_REG_SIZE_U32; - break; - case MXL_RV64: - id |= KVM_REG_SIZE_U64; - break; - default: - g_assert_not_reached(); - } - return id; -} +#define KVM_RISCV_REG_ID_U32(type, idx) (KVM_REG_RISCV | KVM_REG_SIZE_U32 | \ + type | idx) -static uint64_t kvm_riscv_reg_id_u32(uint64_t type, uint64_t idx) -{ - return KVM_REG_RISCV | KVM_REG_SIZE_U32 | type | idx; -} +#define KVM_RISCV_REG_ID_U64(type, idx) (KVM_REG_RISCV | KVM_REG_SIZE_U64 | \ + type | idx) -static uint64_t kvm_riscv_reg_id_u64(uint64_t type, uint64_t idx) -{ - return KVM_REG_RISCV | KVM_REG_SIZE_U64 | type | idx; -} +#if defined(TARGET_RISCV64) +#define KVM_RISCV_REG_ID_ULONG(type, idx) KVM_RISCV_REG_ID_U64(type, idx) +#else +#define KVM_RISCV_REG_ID_ULONG(type, idx) KVM_RISCV_REG_ID_U32(type, idx) +#endif static uint64_t kvm_encode_reg_size_id(uint64_t id, size_t size_b) { @@ -107,45 +91,29 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, return kvm_encode_reg_size_id(id, size_b); } -#define RISCV_CORE_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, \ +#define RISCV_CORE_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, \ KVM_REG_RISCV_CORE_REG(name)) -#define RISCV_CSR_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CSR, \ +#define RISCV_CSR_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CSR, \ KVM_REG_RISCV_CSR_REG(name)) -#define RISCV_CONFIG_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, \ +#define RISCV_CONFIG_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, \ KVM_REG_RISCV_CONFIG_REG(name)) -#define RISCV_TIMER_REG(name) kvm_riscv_reg_id_u64(KVM_REG_RISCV_TIMER, \ +#define RISCV_TIMER_REG(name) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_TIMER, \ KVM_REG_RISCV_TIMER_REG(name)) -#define RISCV_FP_F_REG(idx) kvm_riscv_reg_id_u32(KVM_REG_RISCV_FP_F, idx) +#define RISCV_FP_F_REG(idx) KVM_RISCV_REG_ID_U32(KVM_REG_RISCV_FP_F, idx) -#define RISCV_FP_D_REG(idx) kvm_riscv_reg_id_u64(KVM_REG_RISCV_FP_D, idx) +#define RISCV_FP_D_REG(idx) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_FP_D, idx) -#define RISCV_VECTOR_CSR_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_VECTOR, \ +#define RISCV_VECTOR_CSR_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_VECTOR, \ KVM_REG_RISCV_VECTOR_CSR_REG(name)) -#define KVM_RISCV_GET_CSR(cs, env, csr, reg) \ - do { \ - int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \ - if (_ret) { \ - return _ret; \ - } \ - } while (0) - -#define KVM_RISCV_SET_CSR(cs, env, csr, reg) \ - do { \ - int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \ - if (_ret) { \ - return _ret; \ - } \ - } while (0) - #define KVM_RISCV_GET_TIMER(cs, name, reg) \ do { \ int ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(name), ®); \ @@ -167,6 +135,7 @@ typedef struct KVMCPUConfig { const char *description; target_ulong offset; uint64_t kvm_reg_id; + uint32_t prop_size; bool user_set; bool supported; } KVMCPUConfig; @@ -248,7 +217,7 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) /* If we're here we're going to disable the MISA bit */ reg = 0; - id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, misa_cfg->kvm_reg_id); ret = kvm_set_one_reg(cs, id, ®); if (ret != 0) { @@ -267,6 +236,56 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) } } +#define KVM_CSR_CFG(_name, _env_prop, reg_id) \ + {.name = _name, .offset = ENV_CSR_OFFSET(_env_prop), \ + .prop_size = sizeof(((CPURISCVState *)0)->_env_prop), \ + .kvm_reg_id = reg_id} + +static KVMCPUConfig kvm_csr_cfgs[] = { + KVM_CSR_CFG("sstatus", mstatus, RISCV_CSR_REG(sstatus)), + KVM_CSR_CFG("sie", mie, RISCV_CSR_REG(sie)), + KVM_CSR_CFG("stvec", stvec, RISCV_CSR_REG(stvec)), + KVM_CSR_CFG("sscratch", sscratch, RISCV_CSR_REG(sscratch)), + KVM_CSR_CFG("sepc", sepc, RISCV_CSR_REG(sepc)), + KVM_CSR_CFG("scause", scause, RISCV_CSR_REG(scause)), + KVM_CSR_CFG("stval", stval, RISCV_CSR_REG(stval)), + KVM_CSR_CFG("sip", mip, RISCV_CSR_REG(sip)), + KVM_CSR_CFG("satp", satp, RISCV_CSR_REG(satp)), + KVM_CSR_CFG("scounteren", scounteren, RISCV_CSR_REG(scounteren)), + KVM_CSR_CFG("senvcfg", senvcfg, RISCV_CSR_REG(senvcfg)), +}; + +static void *kvmconfig_get_env_addr(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + return (void *)&cpu->env + csr_cfg->offset; +} + +static uint32_t kvm_cpu_csr_get_u32(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + uint32_t *val32 = kvmconfig_get_env_addr(cpu, csr_cfg); + return *val32; +} + +static uint64_t kvm_cpu_csr_get_u64(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + uint64_t *val64 = kvmconfig_get_env_addr(cpu, csr_cfg); + return *val64; +} + +static void kvm_cpu_csr_set_u32(RISCVCPU *cpu, KVMCPUConfig *csr_cfg, + uint32_t val) +{ + uint32_t *val32 = kvmconfig_get_env_addr(cpu, csr_cfg); + *val32 = val; +} + +static void kvm_cpu_csr_set_u64(RISCVCPU *cpu, KVMCPUConfig *csr_cfg, + uint64_t val) +{ + uint64_t *val64 = kvmconfig_get_env_addr(cpu, csr_cfg); + *val64 = val; +} + #define KVM_EXT_CFG(_name, _prop, _reg_id) \ {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \ .kvm_reg_id = _reg_id} @@ -434,7 +453,6 @@ static KVMCPUConfig kvm_sbi_dbcn = { static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) { - CPURISCVState *env = &cpu->env; uint64_t id, reg; int i, ret; @@ -445,7 +463,7 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) continue; } - id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); ret = kvm_set_one_reg(cs, id, ®); @@ -570,14 +588,14 @@ static int kvm_riscv_get_regs_core(CPUState *cs) target_ulong reg; CPURISCVState *env = &RISCV_CPU(cs)->env; - ret = kvm_get_one_reg(cs, RISCV_CORE_REG(env, regs.pc), ®); + ret = kvm_get_one_reg(cs, RISCV_CORE_REG(regs.pc), ®); if (ret) { return ret; } env->pc = reg; for (i = 1; i < 32; i++) { - uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i); + uint64_t id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, i); ret = kvm_get_one_reg(cs, id, ®); if (ret) { return ret; @@ -596,13 +614,13 @@ static int kvm_riscv_put_regs_core(CPUState *cs) CPURISCVState *env = &RISCV_CPU(cs)->env; reg = env->pc; - ret = kvm_set_one_reg(cs, RISCV_CORE_REG(env, regs.pc), ®); + ret = kvm_set_one_reg(cs, RISCV_CORE_REG(regs.pc), ®); if (ret) { return ret; } for (i = 1; i < 32; i++) { - uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i); + uint64_t id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, i); reg = env->gpr[i]; ret = kvm_set_one_reg(cs, id, ®); if (ret) { @@ -613,53 +631,81 @@ static int kvm_riscv_put_regs_core(CPUState *cs) return ret; } -static void kvm_riscv_reset_regs_csr(CPURISCVState *env) -{ - env->mstatus = 0; - env->mie = 0; - env->stvec = 0; - env->sscratch = 0; - env->sepc = 0; - env->scause = 0; - env->stval = 0; - env->mip = 0; - env->satp = 0; -} - static int kvm_riscv_get_regs_csr(CPUState *cs) { - CPURISCVState *env = &RISCV_CPU(cs)->env; + RISCVCPU *cpu = RISCV_CPU(cs); + uint64_t reg; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + + if (!csr_cfg->supported) { + continue; + } + + ret = kvm_get_one_reg(cs, csr_cfg->kvm_reg_id, ®); + if (ret) { + return ret; + } - KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus); - KVM_RISCV_GET_CSR(cs, env, sie, env->mie); - KVM_RISCV_GET_CSR(cs, env, stvec, env->stvec); - KVM_RISCV_GET_CSR(cs, env, sscratch, env->sscratch); - KVM_RISCV_GET_CSR(cs, env, sepc, env->sepc); - KVM_RISCV_GET_CSR(cs, env, scause, env->scause); - KVM_RISCV_GET_CSR(cs, env, stval, env->stval); - KVM_RISCV_GET_CSR(cs, env, sip, env->mip); - KVM_RISCV_GET_CSR(cs, env, satp, env->satp); + if (csr_cfg->prop_size == sizeof(uint32_t)) { + kvm_cpu_csr_set_u32(cpu, csr_cfg, (uint32_t)reg); + } else if (csr_cfg->prop_size == sizeof(uint64_t)) { + kvm_cpu_csr_set_u64(cpu, csr_cfg, reg); + } else { + g_assert_not_reached(); + } + } return 0; } static int kvm_riscv_put_regs_csr(CPUState *cs) { - CPURISCVState *env = &RISCV_CPU(cs)->env; + RISCVCPU *cpu = RISCV_CPU(cs); + uint64_t reg; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + + if (!csr_cfg->supported) { + continue; + } + + if (csr_cfg->prop_size == sizeof(uint32_t)) { + reg = kvm_cpu_csr_get_u32(cpu, csr_cfg); + } else if (csr_cfg->prop_size == sizeof(uint64_t)) { + reg = kvm_cpu_csr_get_u64(cpu, csr_cfg); + } else { + g_assert_not_reached(); + } - KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus); - KVM_RISCV_SET_CSR(cs, env, sie, env->mie); - KVM_RISCV_SET_CSR(cs, env, stvec, env->stvec); - KVM_RISCV_SET_CSR(cs, env, sscratch, env->sscratch); - KVM_RISCV_SET_CSR(cs, env, sepc, env->sepc); - KVM_RISCV_SET_CSR(cs, env, scause, env->scause); - KVM_RISCV_SET_CSR(cs, env, stval, env->stval); - KVM_RISCV_SET_CSR(cs, env, sip, env->mip); - KVM_RISCV_SET_CSR(cs, env, satp, env->satp); + ret = kvm_set_one_reg(cs, csr_cfg->kvm_reg_id, ®); + if (ret) { + return ret; + } + } return 0; } +static void kvm_riscv_reset_regs_csr(CPURISCVState *env) +{ + env->mstatus = 0; + env->mie = 0; + env->stvec = 0; + env->sscratch = 0; + env->sepc = 0; + env->scause = 0; + env->stval = 0; + env->mip = 0; + env->satp = 0; + env->scounteren = 0; + env->senvcfg = 0; +} + static int kvm_riscv_get_regs_fp(CPUState *cs) { int ret = 0; @@ -800,26 +846,26 @@ static int kvm_riscv_get_regs_vector(CPUState *cs) return 0; } - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vstart), ®); if (ret) { return ret; } env->vstart = reg; - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vl), ®); if (ret) { return ret; } env->vl = reg; - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vtype), ®); if (ret) { return ret; } env->vtype = reg; if (kvm_v_vlenb.supported) { - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vlenb), ®); if (ret) { return ret; } @@ -857,26 +903,26 @@ static int kvm_riscv_put_regs_vector(CPUState *cs) } reg = env->vstart; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vstart), ®); if (ret) { return ret; } reg = env->vl; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vl), ®); if (ret) { return ret; } reg = env->vtype; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vtype), ®); if (ret) { return ret; } if (kvm_v_vlenb.supported) { reg = cpu->cfg.vlenb; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vlenb), ®); for (int i = 0; i < 32; i++) { /* @@ -953,27 +999,39 @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) close(scratch->kvmfd); } +static void kvm_riscv_init_max_satp_mode(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + struct kvm_one_reg reg; + int ret; + + reg.id = RISCV_CONFIG_REG(satp_mode); + reg.addr = (uint64_t)&cpu->cfg.max_satp_mode; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to retrieve satp mode from host, error %d", ret); + } +} + static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { - CPURISCVState *env = &cpu->env; struct kvm_one_reg reg; int ret; - reg.id = RISCV_CONFIG_REG(env, mvendorid); + reg.id = RISCV_CONFIG_REG(mvendorid); reg.addr = (uint64_t)&cpu->cfg.mvendorid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { error_report("Unable to retrieve mvendorid from host, error %d", ret); } - reg.id = RISCV_CONFIG_REG(env, marchid); + reg.id = RISCV_CONFIG_REG(marchid); reg.addr = (uint64_t)&cpu->cfg.marchid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { error_report("Unable to retrieve marchid from host, error %d", ret); } - reg.id = RISCV_CONFIG_REG(env, mimpid); + reg.id = RISCV_CONFIG_REG(mimpid); reg.addr = (uint64_t)&cpu->cfg.mimpid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { @@ -988,7 +1046,7 @@ static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, struct kvm_one_reg reg; int ret; - reg.id = RISCV_CONFIG_REG(env, isa); + reg.id = RISCV_CONFIG_REG(isa); reg.addr = (uint64_t)&env->misa_ext_mask; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1005,11 +1063,10 @@ static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, KVMCPUConfig *cbomz_cfg) { - CPURISCVState *env = &cpu->env; struct kvm_one_reg reg; int ret; - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, cbomz_cfg->kvm_reg_id); reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg); ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1023,7 +1080,6 @@ static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { - CPURISCVState *env = &cpu->env; uint64_t val; int i, ret; @@ -1031,7 +1087,7 @@ static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i]; struct kvm_one_reg reg; - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1061,6 +1117,32 @@ static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, } } +static void kvm_riscv_read_csr_cfg_legacy(KVMScratchCPU *kvmcpu) +{ + uint64_t val; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + struct kvm_one_reg reg; + + reg.id = csr_cfg->kvm_reg_id; + reg.addr = (uint64_t)&val; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + if (errno == EINVAL) { + csr_cfg->supported = false; + } else { + error_report("Unable to read KVM CSR %s: %s", + csr_cfg->name, strerror(errno)); + exit(EXIT_FAILURE); + } + } else { + csr_cfg->supported = true; + } + } +} + static int uint64_cmp(const void *a, const void *b) { uint64_t val1 = *(const uint64_t *)a; @@ -1078,7 +1160,6 @@ static int uint64_cmp(const void *a, const void *b) } static void kvm_riscv_check_sbi_dbcn_support(RISCVCPU *cpu, - KVMScratchCPU *kvmcpu, struct kvm_reg_list *reglist) { struct kvm_reg_list *reg_search; @@ -1118,12 +1199,31 @@ static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, } } -static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +static void kvm_riscv_read_csr_cfg(struct kvm_reg_list *reglist) +{ + struct kvm_reg_list *reg_search; + uint64_t reg_id; + + for (int i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + + reg_id = csr_cfg->kvm_reg_id; + reg_search = bsearch(®_id, reglist->reg, reglist->n, + sizeof(uint64_t), uint64_cmp); + if (!reg_search) { + continue; + } + + csr_cfg->supported = true; + } +} + +static void kvm_riscv_init_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { + g_autofree struct kvm_reg_list *reglist = NULL; KVMCPUConfig *multi_ext_cfg; struct kvm_one_reg reg; struct kvm_reg_list rl_struct; - struct kvm_reg_list *reglist; uint64_t val, reg_id, *reg_search; int i, ret; @@ -1135,7 +1235,9 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) * (EINVAL). Use read_legacy() in this case. */ if (errno == EINVAL) { - return kvm_riscv_read_multiext_legacy(cpu, kvmcpu); + kvm_riscv_read_multiext_legacy(cpu, kvmcpu); + kvm_riscv_read_csr_cfg_legacy(kvmcpu); + return; } else if (errno != E2BIG) { /* * E2BIG is an expected error message for the API since we @@ -1164,7 +1266,7 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { multi_ext_cfg = &kvm_multi_ext_cfgs[i]; - reg_id = kvm_riscv_reg_id_ulong(&cpu->env, KVM_REG_RISCV_ISA_EXT, + reg_id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg_search = bsearch(®_id, reglist->reg, reglist->n, sizeof(uint64_t), uint64_cmp); @@ -1197,7 +1299,8 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) kvm_riscv_read_vlenb(cpu, kvmcpu, reglist); } - kvm_riscv_check_sbi_dbcn_support(cpu, kvmcpu, reglist); + kvm_riscv_check_sbi_dbcn_support(cpu, reglist); + kvm_riscv_read_csr_cfg(reglist); } static void riscv_init_kvm_registers(Object *cpu_obj) @@ -1211,7 +1314,8 @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); - kvm_riscv_init_multiext_cfg(cpu, &kvmcpu); + kvm_riscv_init_cfg(cpu, &kvmcpu); + kvm_riscv_init_max_satp_mode(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -1265,7 +1369,7 @@ int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { int ret = 0; @@ -1343,12 +1447,11 @@ void kvm_arch_init_irq_routing(KVMState *s) static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) { - CPURISCVState *env = &cpu->env; target_ulong reg; uint64_t id; int ret; - id = RISCV_CONFIG_REG(env, mvendorid); + id = RISCV_CONFIG_REG(mvendorid); /* * cfg.mvendorid is an uint32 but a target_ulong will * be written. Assign it to a target_ulong var to avoid @@ -1360,13 +1463,13 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) return ret; } - id = RISCV_CONFIG_REG(env, marchid); + id = RISCV_CONFIG_REG(marchid); ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid); if (ret != 0) { return ret; } - id = RISCV_CONFIG_REG(env, mimpid); + id = RISCV_CONFIG_REG(mimpid); ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid); return ret; @@ -1383,6 +1486,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; @@ -1480,7 +1588,7 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) * Handle the case where a 32 bit CPU is running in a * 64 bit addressing env. */ - if (riscv_cpu_mxl(&cpu->env) == MXL_RV32) { + if (riscv_cpu_is_32bit(cpu)) { addr |= (uint64_t)run->riscv_sbi.args[2] << 32; } @@ -1511,7 +1619,7 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) break; case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: ch = run->riscv_sbi.args[0]; - ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + ret = qemu_chr_fe_write_all(serial_hd(0)->be, &ch, sizeof(ch)); if (ret < 0) { error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " @@ -1891,7 +1999,7 @@ static bool kvm_cpu_realize(CPUState *cs, Error **errp) } } - return true; + return true; } void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) @@ -1916,7 +2024,7 @@ void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) if (cpu->cfg.ext_zicbom && riscv_cpu_option_set(kvm_cbom_blocksize.name)) { - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, kvm_cbom_blocksize.kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, ®); @@ -1935,7 +2043,7 @@ void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) if (cpu->cfg.ext_zicboz && riscv_cpu_option_set(kvm_cboz_blocksize.name)) { - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, kvm_cboz_blocksize.kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, ®); @@ -1976,7 +2084,7 @@ void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } -static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +static void kvm_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); @@ -1997,22 +2105,25 @@ static void kvm_cpu_accel_register_types(void) } type_init(kvm_cpu_accel_register_types); -static void riscv_host_cpu_class_init(ObjectClass *c, void *data) -{ - RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); - -#if defined(TARGET_RISCV32) - mcc->misa_mxl_max = MXL_RV32; -#elif defined(TARGET_RISCV64) - mcc->misa_mxl_max = MXL_RV64; -#endif -} - static const TypeInfo riscv_kvm_cpu_type_infos[] = { { .name = TYPE_RISCV_CPU_HOST, .parent = TYPE_RISCV_CPU, - .class_init = riscv_host_cpu_class_init, +#if defined(TARGET_RISCV32) + .class_data = &(const RISCVCPUDef) { + .misa_mxl_max = MXL_RV32, + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, + .cfg.max_satp_mode = -1, + }, +#elif defined(TARGET_RISCV64) + .class_data = &(const RISCVCPUDef) { + .misa_mxl_max = MXL_RV64, + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, + .cfg.max_satp_mode = -1, + }, +#endif } }; diff --git a/target/riscv/m128_helper.c b/target/riscv/m128_helper.c index ec14aaa..7d9b83b 100644 --- a/target/riscv/m128_helper.c +++ b/target/riscv/m128_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" target_ulong HELPER(divu_i128)(CPURISCVState *env, diff --git a/target/riscv/machine.c b/target/riscv/machine.c index 889e2b6..18d790a 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -21,7 +21,7 @@ #include "qemu/error-report.h" #include "system/kvm.h" #include "migration/cpu.h" -#include "system/cpu-timers.h" +#include "exec/icount.h" #include "debug.h" static bool pmp_needed(void *opaque) @@ -36,8 +36,9 @@ static int pmp_post_load(void *opaque, int version_id) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { pmp_update_rule_addr(env, i); } pmp_update_rule_nums(env); @@ -130,7 +131,8 @@ static bool vector_needed(void *opaque) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; - return riscv_has_ext(env, RVV); + return kvm_enabled() ? riscv_has_ext(env, RVV) : + riscv_cpu_cfg(env)->ext_zve32x; } static const VMStateDescription vmstate_vector = { @@ -170,7 +172,7 @@ static bool rv128_needed(void *opaque) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(opaque); - return mcc->misa_mxl_max == MXL_RV128; + return mcc->def->misa_mxl_max == MXL_RV128; } static const VMStateDescription vmstate_rv128 = { @@ -399,6 +401,30 @@ static const VMStateDescription vmstate_ssp = { } }; +static bool sstc_timer_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + if (!cpu->cfg.ext_sstc) { + return false; + } + + return env->stimer != NULL || env->vstimer != NULL; +} + +static const VMStateDescription vmstate_sstc = { + .name = "cpu/timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = sstc_timer_needed, + .fields = (const VMStateField[]) { + VMSTATE_TIMER_PTR(env.stimer, RISCVCPU), + VMSTATE_TIMER_PTR(env.vstimer, RISCVCPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_riscv_cpu = { .name = "cpu", .version_id = 10, @@ -475,6 +501,7 @@ const VMStateDescription vmstate_riscv_cpu = { &vmstate_elp, &vmstate_ssp, &vmstate_ctr, + &vmstate_sstc, NULL } }; diff --git a/target/riscv/meson.build b/target/riscv/meson.build index a4bd61e..fdefe88 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -8,6 +8,10 @@ gen = [ riscv_ss = ss.source_set() riscv_ss.add(gen) + +riscv_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) + riscv_ss.add(files( 'cpu.c', 'cpu_helper.c', diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 72dc48e..8382aa9 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -21,10 +21,11 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internals.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/helper-proto.h" +#include "exec/tlb-flags.h" #include "trace.h" /* Exceptions processing helpers */ @@ -70,7 +71,7 @@ target_ulong helper_csrr(CPURISCVState *env, int csr) void helper_csrw(CPURISCVState *env, int csr, target_ulong src) { target_ulong mask = env->xl == MXL_RV32 ? UINT32_MAX : (target_ulong)-1; - RISCVException ret = riscv_csrrw(env, csr, NULL, src, mask); + RISCVException ret = riscv_csrrw(env, csr, NULL, src, mask, GETPC()); if (ret != RISCV_EXCP_NONE) { riscv_raise_exception(env, ret, GETPC()); @@ -81,7 +82,7 @@ target_ulong helper_csrrw(CPURISCVState *env, int csr, target_ulong src, target_ulong write_mask) { target_ulong val = 0; - RISCVException ret = riscv_csrrw(env, csr, &val, src, write_mask); + RISCVException ret = riscv_csrrw(env, csr, &val, src, write_mask, GETPC()); if (ret != RISCV_EXCP_NONE) { riscv_raise_exception(env, ret, GETPC()); @@ -107,7 +108,7 @@ void helper_csrw_i128(CPURISCVState *env, int csr, { RISCVException ret = riscv_csrrw_i128(env, csr, NULL, int128_make128(srcl, srch), - UINT128_MAX); + UINT128_MAX, GETPC()); if (ret != RISCV_EXCP_NONE) { riscv_raise_exception(env, ret, GETPC()); @@ -115,13 +116,14 @@ void helper_csrw_i128(CPURISCVState *env, int csr, } target_ulong helper_csrrw_i128(CPURISCVState *env, int csr, - target_ulong srcl, target_ulong srch, - target_ulong maskl, target_ulong maskh) + target_ulong srcl, target_ulong srch, + target_ulong maskl, target_ulong maskh) { Int128 rv = int128_zero(); RISCVException ret = riscv_csrrw_i128(env, csr, &rv, int128_make128(srcl, srch), - int128_make128(maskl, maskh)); + int128_make128(maskl, maskh), + GETPC()); if (ret != RISCV_EXCP_NONE) { riscv_raise_exception(env, ret, GETPC()); @@ -278,7 +280,7 @@ target_ulong helper_sret(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } - target_ulong retpc = env->sepc; + target_ulong retpc = env->sepc & get_xepc_mask(env); if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { @@ -353,21 +355,22 @@ target_ulong helper_sret(CPURISCVState *env) } static void check_ret_from_m_mode(CPURISCVState *env, target_ulong retpc, - target_ulong prev_priv) + target_ulong prev_priv, + uintptr_t ra) { if (!(env->priv >= PRV_M)) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra); } if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { - riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, ra); } if (riscv_cpu_cfg(env)->pmp && !pmp_get_num_rules(env) && (prev_priv != PRV_M)) { - riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, ra); } } static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, @@ -389,11 +392,12 @@ static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, target_ulong helper_mret(CPURISCVState *env) { - target_ulong retpc = env->mepc; + target_ulong retpc = env->mepc & get_xepc_mask(env); uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); + uintptr_t ra = GETPC(); - check_ret_from_m_mode(env, retpc, prev_priv); + check_ret_from_m_mode(env, retpc, prev_priv, ra); target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV) && (prev_priv != PRV_M); @@ -441,8 +445,9 @@ target_ulong helper_mnret(CPURISCVState *env) target_ulong retpc = env->mnepc; target_ulong prev_priv = get_field(env->mnstatus, MNSTATUS_MNPP); target_ulong prev_virt; + uintptr_t ra = GETPC(); - check_ret_from_m_mode(env, retpc, prev_priv); + check_ret_from_m_mode(env, retpc, prev_priv, ra); prev_virt = get_field(env->mnstatus, MNSTATUS_MNPV) && (prev_priv != PRV_M); @@ -712,4 +717,53 @@ target_ulong helper_hyp_hlvx_wu(CPURISCVState *env, target_ulong addr) return cpu_ldl_code_mmu(env, addr, oi, ra); } +void helper_ssamoswap_disabled(CPURISCVState *env) +{ + int exception = RISCV_EXCP_ILLEGAL_INST; + + /* + * Here we follow the RISC-V CFI spec [1] to implement the exception type + * of ssamoswap* instruction. + * + * [1] RISC-V CFI spec v1.0, ch2.7 Atomic Swap from a Shadow Stack Location + * + * Note: We have already checked some conditions in trans_* functions: + * 1. The effective priv mode is not M-mode. + * 2. The xSSE specific to the effictive priv mode is disabled. + */ + if (!get_field(env->menvcfg, MENVCFG_SSE)) { + /* + * Disabled M-mode SSE always trigger illegal instruction when + * current priv mode is not M-mode. + */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } + + if (!riscv_has_ext(env, RVS)) { + /* S-mode is not implemented */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } else if (env->virt_enabled) { + /* + * VU/VS-mode with disabled xSSE will trigger the virtual instruction + * exception. + */ + exception = RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + goto done; + } else { + /* + * U-mode with disabled S-mode SSE will trigger the illegal instruction + * exception. + * + * Note: S-mode is already handled in the disabled M-mode SSE case. + */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } + +done: + riscv_raise_exception(env, exception, GETPC()); +} + #endif /* !CONFIG_USER_ONLY */ diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index b0841d4..72f1372 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -26,12 +26,22 @@ #include "trace.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" static bool pmp_write_cfg(CPURISCVState *env, uint32_t addr_index, uint8_t val); static uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t addr_index); /* + * Convert the PMP permissions to match the truth table in the Smepmp spec. + */ +static inline uint8_t pmp_get_smepmp_operation(uint8_t cfg) +{ + return ((cfg & PMP_LOCK) >> 4) | ((cfg & PMP_READ) << 2) | + (cfg & PMP_WRITE) | ((cfg & PMP_EXEC) >> 2); +} + +/* * Accessor method to extract address matching type 'a field' from cfg reg */ static inline uint8_t pmp_get_a_field(uint8_t cfg) @@ -45,21 +55,58 @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) */ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) { - /* mseccfg.RLB is set */ - if (MSECCFG_RLB_ISSET(env)) { - return 0; - } - if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { return 1; } - /* Top PMP has no 'next' to check */ - if ((pmp_index + 1u) >= MAX_RISCV_PMPS) { + return 0; +} + +/* + * Check whether a PMP is locked for writing or not. + * (i.e. has LOCK flag and mseccfg.RLB is unset) + */ +static int pmp_is_readonly(CPURISCVState *env, uint32_t pmp_index) +{ + return pmp_is_locked(env, pmp_index) && !MSECCFG_RLB_ISSET(env); +} + +/* + * Check whether `val` is an invalid Smepmp config value + */ +static int pmp_is_invalid_smepmp_cfg(CPURISCVState *env, uint8_t val) +{ + /* No check if mseccfg.MML is not set or if mseccfg.RLB is set */ + if (!MSECCFG_MML_ISSET(env) || MSECCFG_RLB_ISSET(env)) { return 0; } - return 0; + /* + * Adding a rule with executable privileges that either is M-mode-only + * or a locked Shared-Region is not possible + */ + switch (pmp_get_smepmp_operation(val)) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 12: + case 14: + case 15: + return 0; + case 9: + case 10: + case 11: + case 13: + return 1; + default: + g_assert_not_reached(); + } } /* @@ -75,7 +122,9 @@ uint32_t pmp_get_num_rules(CPURISCVState *env) */ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { return env->pmp_state.pmp[pmp_index].cfg_reg; } @@ -89,46 +138,21 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) */ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { - if (pmp_index < MAX_RISCV_PMPS) { - bool locked = true; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (riscv_cpu_cfg(env)->ext_smepmp) { - /* mseccfg.RLB is set */ - if (MSECCFG_RLB_ISSET(env)) { - locked = false; - } - - /* mseccfg.MML is not set */ - if (!MSECCFG_MML_ISSET(env) && !pmp_is_locked(env, pmp_index)) { - locked = false; - } - - /* mseccfg.MML is set */ - if (MSECCFG_MML_ISSET(env)) { - /* not adding execute bit */ - if ((val & PMP_LOCK) != 0 && (val & PMP_EXEC) != PMP_EXEC) { - locked = false; - } - /* shared region and not adding X bit */ - if ((val & PMP_LOCK) != PMP_LOCK && - (val & 0x7) != (PMP_WRITE | PMP_EXEC)) { - locked = false; - } - } - } else { - if (!pmp_is_locked(env, pmp_index)) { - locked = false; - } + if (pmp_index < pmp_regions) { + if (env->pmp_state.pmp[pmp_index].cfg_reg == val) { + /* no change */ + return false; } - if (locked) { - qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - locked\n"); - } else if (env->pmp_state.pmp[pmp_index].cfg_reg != val) { - /* If !mseccfg.MML then ignore writes with encoding RW=01 */ - if ((val & PMP_WRITE) && !(val & PMP_READ) && - !MSECCFG_MML_ISSET(env)) { - return false; - } + if (pmp_is_readonly(env, pmp_index)) { + qemu_log_mask(LOG_GUEST_ERROR, + "ignoring pmpcfg write - read only\n"); + } else if (pmp_is_invalid_smepmp_cfg(env, val)) { + qemu_log_mask(LOG_GUEST_ERROR, + "ignoring pmpcfg write - invalid\n"); + } else { env->pmp_state.pmp[pmp_index].cfg_reg = val; pmp_update_rule_addr(env, pmp_index); return true; @@ -187,11 +211,12 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) break; case PMP_AMATCH_TOR: - sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ - ea = (this_addr << 2) - 1u; - if (sa > ea) { + if (prev_addr >= this_addr) { sa = ea = 0u; + break; } + sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ + ea = (this_addr << 2) - 1u; break; case PMP_AMATCH_NA4: @@ -216,9 +241,10 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) void pmp_update_rule_nums(CPURISCVState *env) { int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; env->pmp_state.num_rules = 0; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); if (PMP_AMATCH_OFF != a_field) { @@ -312,6 +338,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, int pmp_size = 0; hwaddr s = 0; hwaddr e = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { @@ -336,7 +363,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, * 1.10 draft priv spec states there is an implicit order * from low to high */ - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { s = pmp_is_in_range(env, i, addr); e = pmp_is_in_range(env, i, addr + pmp_size - 1); @@ -352,16 +379,6 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); - /* - * Convert the PMP permissions to match the truth table in the - * Smepmp spec. - */ - const uint8_t smepmp_operation = - ((env->pmp_state.pmp[i].cfg_reg & PMP_LOCK) >> 4) | - ((env->pmp_state.pmp[i].cfg_reg & PMP_READ) << 2) | - (env->pmp_state.pmp[i].cfg_reg & PMP_WRITE) | - ((env->pmp_state.pmp[i].cfg_reg & PMP_EXEC) >> 2); - if (((s + e) == 2) && (PMP_AMATCH_OFF != a_field)) { /* * If the PMP entry is not off and the address is in range, @@ -380,6 +397,9 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, /* * If mseccfg.MML Bit set, do the enhanced pmp priv check */ + const uint8_t smepmp_operation = + pmp_get_smepmp_operation(env->pmp_state.pmp[i].cfg_reg); + if (mode == PRV_M) { switch (smepmp_operation) { case 0: @@ -514,35 +534,39 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, { trace_pmpaddr_csr_write(env->mhartid, addr_index, val); bool is_next_cfg_tor = false; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (addr_index < pmp_regions) { + if (env->pmp_state.pmp[addr_index].addr_reg == val) { + /* no change */ + return; + } - if (addr_index < MAX_RISCV_PMPS) { /* * In TOR mode, need to check the lock bit of the next pmp * (if there is a next). */ - if (addr_index + 1 < MAX_RISCV_PMPS) { + if (addr_index + 1 < pmp_regions) { uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); - if (pmp_is_locked(env, addr_index + 1) && is_next_cfg_tor) { + if (pmp_is_readonly(env, addr_index + 1) && is_next_cfg_tor) { qemu_log_mask(LOG_GUEST_ERROR, - "ignoring pmpaddr write - pmpcfg + 1 locked\n"); + "ignoring pmpaddr write - pmpcfg+1 read only\n"); return; } } - if (!pmp_is_locked(env, addr_index)) { - if (env->pmp_state.pmp[addr_index].addr_reg != val) { - env->pmp_state.pmp[addr_index].addr_reg = val; - pmp_update_rule_addr(env, addr_index); - if (is_next_cfg_tor) { - pmp_update_rule_addr(env, addr_index + 1); - } - tlb_flush(env_cpu(env)); + if (!pmp_is_readonly(env, addr_index)) { + env->pmp_state.pmp[addr_index].addr_reg = val; + pmp_update_rule_addr(env, addr_index); + if (is_next_cfg_tor) { + pmp_update_rule_addr(env, addr_index + 1); } + tlb_flush(env_cpu(env)); } else { qemu_log_mask(LOG_GUEST_ERROR, - "ignoring pmpaddr write - locked\n"); + "ignoring pmpaddr write - read only\n"); } } else { qemu_log_mask(LOG_GUEST_ERROR, @@ -557,8 +581,9 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index) { target_ulong val = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { val = env->pmp_state.pmp[addr_index].addr_reg; trace_pmpaddr_csr_read(env->mhartid, addr_index, val); } else { @@ -576,6 +601,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) { int i; uint64_t mask = MSECCFG_MMWP | MSECCFG_MML; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Update PMM field only if the value is valid according to Zjpm v1.0 */ if (riscv_cpu_cfg(env)->ext_smmpm && riscv_cpu_mxl(env) == MXL_RV64 && @@ -587,7 +613,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) /* RLB cannot be enabled if it's already 0 and if any regions are locked */ if (!MSECCFG_RLB_ISSET(env)) { - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_is_locked(env, i)) { val &= ~MSECCFG_RLB; break; @@ -643,6 +669,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) hwaddr tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); hwaddr tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* * If PMP is not supported or there are no PMP rules, the TLB page will not @@ -653,7 +680,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) return TARGET_PAGE_SIZE; } - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg) == PMP_AMATCH_OFF) { continue; } diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c index 0408f96..a68809e 100644 --- a/target/riscv/pmu.c +++ b/target/riscv/pmu.c @@ -22,7 +22,7 @@ #include "qemu/timer.h" #include "cpu.h" #include "pmu.h" -#include "system/cpu-timers.h" +#include "exec/icount.h" #include "system/device_tree.h" #define RISCV_TIMEBASE_FREQ 1000000000 /* 1Ghz */ diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index d0a3243..c499f9b 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -25,12 +25,16 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "qobject/qbool.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qapi/visitor.h" #include "qom/qom-qobject.h" +#include "qemu/ctype.h" +#include "qemu/qemu-print.h" +#include "monitor/hmp.h" +#include "monitor/hmp-target.h" #include "system/kvm.h" #include "system/tcg.h" #include "cpu-qom.h" @@ -121,7 +125,7 @@ static void riscv_obj_add_profiles_qdict(Object *obj, QDict *qdict_out) for (int i = 0; riscv_profiles[i] != NULL; i++) { profile = riscv_profiles[i]; - value = QOBJECT(qbool_from_bool(profile->enabled)); + value = QOBJECT(qbool_from_bool(profile->present)); qdict_put_obj(qdict_out, profile->name, value); } @@ -240,3 +244,147 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, return expansion_info; } + +/* + * We have way too many potential CSRs and regs being added + * regularly to register them in a static array. + * + * Declare an empty array instead, making get_monitor_def() use + * the target_get_monitor_def() API directly. + */ +const MonitorDef monitor_defs[] = { { } }; +const MonitorDef *target_monitor_defs(void) +{ + return monitor_defs; +} + +static bool reg_is_ulong_integer(CPURISCVState *env, const char *name, + target_ulong *val, bool is_gprh) +{ + const char * const *reg_names; + target_ulong *vals; + + if (is_gprh) { + reg_names = riscv_int_regnamesh; + vals = env->gprh; + } else { + reg_names = riscv_int_regnames; + vals = env->gpr; + } + + for (int i = 0; i < 32; i++) { + g_autofree char *reg_name = g_strdup(reg_names[i]); + char *reg1 = strtok(reg_name, "/"); + char *reg2 = strtok(NULL, "/"); + + if (strcasecmp(reg1, name) == 0 || + (reg2 && strcasecmp(reg2, name) == 0)) { + *val = vals[i]; + return true; + } + } + + return false; +} + +static bool reg_is_u64_fpu(CPURISCVState *env, const char *name, uint64_t *val) +{ + if (qemu_tolower(name[0]) != 'f') { + return false; + } + + for (int i = 0; i < 32; i++) { + g_autofree char *reg_name = g_strdup(riscv_fpr_regnames[i]); + char *reg1 = strtok(reg_name, "/"); + char *reg2 = strtok(NULL, "/"); + + if (strcasecmp(reg1, name) == 0 || + (reg2 && strcasecmp(reg2, name) == 0)) { + *val = env->fpr[i]; + return true; + } + } + + return false; +} + +static bool reg_is_vreg(const char *name) +{ + if (qemu_tolower(name[0]) != 'v' || strlen(name) > 3) { + return false; + } + + for (int i = 0; i < 32; i++) { + if (strcasecmp(name, riscv_rvv_regnames[i]) == 0) { + return true; + } + } + + return false; +} + +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) +{ + CPURISCVState *env = &RISCV_CPU(cs)->env; + target_ulong val = 0; + uint64_t val64 = 0; + int i; + + if (reg_is_ulong_integer(env, name, &val, false) || + reg_is_ulong_integer(env, name, &val, true)) { + *pval = val; + return 0; + } + + if (reg_is_u64_fpu(env, name, &val64)) { + *pval = val64; + return 0; + } + + if (reg_is_vreg(name)) { + if (!riscv_cpu_cfg(env)->ext_zve32x) { + return -EINVAL; + } + + qemu_printf("Unable to print the value of vector " + "vreg '%s' from this API\n", name); + + /* + * We're returning 0 because returning -EINVAL triggers + * an 'unknown register' message in exp_unary() later, + * which feels ankward after our own error message. + */ + *pval = 0; + return 0; + } + + for (i = 0; i < ARRAY_SIZE(csr_ops); i++) { + RISCVException res; + int csrno = i; + + /* + * Early skip when possible since we're going + * through a lot of NULL entries. + */ + if (csr_ops[csrno].predicate == NULL) { + continue; + } + + if (strcasecmp(csr_ops[csrno].name, name) != 0) { + continue; + } + + res = riscv_csrrw_debug(env, csrno, &val, 0, 0); + + /* + * Rely on the smode, hmode, etc, predicates within csr.c + * to do the filtering of the registers that are present. + */ + if (res == RISCV_EXCP_NONE) { + *pval = val; + return 0; + } + } + + return -EINVAL; +} diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 5aef9ee..1150bd1 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -18,10 +18,10 @@ */ #include "qemu/osdep.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "tcg-cpu.h" #include "cpu.h" +#include "exec/target_page.h" #include "internals.h" #include "pmu.h" #include "time_helper.h" @@ -35,6 +35,8 @@ #include "tcg/tcg.h" #ifndef CONFIG_USER_ONLY #include "hw/boards.h" +#include "system/tcg.h" +#include "exec/icount.h" #endif /* Hash that stores user set extensions */ @@ -91,6 +93,109 @@ static const char *cpu_priv_ver_to_str(int priv_ver) return priv_spec_str; } +static int riscv_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return riscv_env_mmu_index(cpu_env(cs), ifetch); +} + +static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) +{ + CPURISCVState *env = cpu_env(cs); + RISCVCPU *cpu = env_archcpu(env); + RISCVExtStatus fs, vs; + uint32_t flags = 0; + bool pm_signext = riscv_cpu_virt_mem_enabled(env); + + if (cpu->cfg.ext_zve32x) { + /* + * If env->vl equals to VLMAX, we can use generic vector operation + * expanders (GVEC) to accerlate the vector operations. + * However, as LMUL could be a fractional number. The maximum + * vector size can be operated might be less than 8 bytes, + * which is not supported by GVEC. So we set vl_eq_vlmax flag to true + * only when maxsz >= 8 bytes. + */ + + /* lmul encoded as in DisasContext::lmul */ + int8_t lmul = sextract32(FIELD_EX64(env->vtype, VTYPE, VLMUL), 0, 3); + uint32_t vsew = FIELD_EX64(env->vtype, VTYPE, VSEW); + uint32_t vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); + uint32_t maxsz = vlmax << vsew; + bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl) && + (maxsz >= 8); + flags = FIELD_DP32(flags, TB_FLAGS, VILL, env->vill); + flags = FIELD_DP32(flags, TB_FLAGS, SEW, vsew); + flags = FIELD_DP32(flags, TB_FLAGS, LMUL, + FIELD_EX64(env->vtype, VTYPE, VLMUL)); + flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + flags = FIELD_DP32(flags, TB_FLAGS, VTA, + FIELD_EX64(env->vtype, VTYPE, VTA)); + flags = FIELD_DP32(flags, TB_FLAGS, VMA, + FIELD_EX64(env->vtype, VTYPE, VMA)); + flags = FIELD_DP32(flags, TB_FLAGS, VSTART_EQ_ZERO, env->vstart == 0); + } else { + flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); + } + + if (cpu_get_fcfien(env)) { + /* + * For Forward CFI, only the expectation of a lpad at + * the start of the block is tracked via env->elp. env->elp + * is turned on during jalr translation. + */ + flags = FIELD_DP32(flags, TB_FLAGS, FCFI_LP_EXPECTED, env->elp); + flags = FIELD_DP32(flags, TB_FLAGS, FCFI_ENABLED, 1); + } + + if (cpu_get_bcfien(env)) { + flags = FIELD_DP32(flags, TB_FLAGS, BCFI_ENABLED, 1); + } + +#ifdef CONFIG_USER_ONLY + fs = EXT_STATUS_DIRTY; + vs = EXT_STATUS_DIRTY; +#else + flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv); + + flags |= riscv_env_mmu_index(env, 0); + fs = get_field(env->mstatus, MSTATUS_FS); + vs = get_field(env->mstatus, MSTATUS_VS); + + if (env->virt_enabled) { + flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1); + /* + * Merge DISABLED and !DIRTY states using MIN. + * We will set both fields when dirtying. + */ + fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS)); + vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS)); + } + + /* With Zfinx, floating point is enabled/disabled by Smstateen. */ + if (!riscv_has_ext(env, RVF)) { + fs = (smstateen_acc_ok(env, 0, SMSTATEEN0_FCSR) == RISCV_EXCP_NONE) + ? EXT_STATUS_DIRTY : EXT_STATUS_DISABLED; + } + + if (cpu->cfg.debug && !icount_enabled()) { + flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled); + } +#endif + + flags = FIELD_DP32(flags, TB_FLAGS, FS, fs); + flags = FIELD_DP32(flags, TB_FLAGS, VS, vs); + flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl); + flags = FIELD_DP32(flags, TB_FLAGS, AXL, cpu_address_xl(env)); + flags = FIELD_DP32(flags, TB_FLAGS, PM_PMM, riscv_pm_get_pmm(env)); + flags = FIELD_DP32(flags, TB_FLAGS, PM_SIGNEXTEND, pm_signext); + + return (TCGTBCPUState){ + .pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc, + .flags = flags, + .cs_base = env->misa_ext, + }; +} + static void riscv_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -133,16 +238,48 @@ static void riscv_restore_state_to_opc(CPUState *cs, env->excp_uw2 = data[2]; } -static const TCGCPUOps riscv_tcg_ops = { +#ifndef CONFIG_USER_ONLY +static vaddr riscv_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + CPURISCVState *env = cpu_env(cs); + uint32_t pm_len; + bool pm_signext; + + if (cpu_address_xl(env) == MXL_RV32) { + return (uint32_t)result; + } + + pm_len = riscv_pm_get_pmlen(riscv_pm_get_pmm(env)); + if (pm_len == 0) { + return result; + } + + pm_signext = riscv_cpu_virt_mem_enabled(env); + if (pm_signext) { + return sextract64(result, 0, 64 - pm_len); + } + return extract64(result, 0, 64 - pm_len); +} +#endif + +const TCGCPUOps riscv_tcg_ops = { + .mttcg_supported = true, + .guest_default_memory_order = 0, + .initialize = riscv_translate_init, .translate_code = riscv_translate_code, + .get_tb_cpu_state = riscv_get_tb_cpu_state, .synchronize_from_tb = riscv_cpu_synchronize_from_tb, .restore_state_to_opc = riscv_restore_state_to_opc, + .mmu_index = riscv_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = riscv_cpu_tlb_fill, + .pointer_wrap = riscv_pointer_wrap, .cpu_exec_interrupt = riscv_cpu_exec_interrupt, .cpu_exec_halt = riscv_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = riscv_cpu_do_interrupt, .do_transaction_failed = riscv_cpu_do_transaction_failed, .do_unaligned_access = riscv_cpu_do_unaligned_access, @@ -280,12 +417,21 @@ static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp) static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, Error **errp) { + uint32_t min_vlen; uint32_t vlen = cfg->vlenb << 3; - if (vlen > RV_VLEN_MAX || vlen < 128) { + if (riscv_has_ext(env, RVV)) { + min_vlen = 128; + } else if (cfg->ext_zve64x) { + min_vlen = 64; + } else if (cfg->ext_zve32x) { + min_vlen = 32; + } + + if (vlen > RV_VLEN_MAX || vlen < min_vlen) { error_setg(errp, "Vector extension implementation only supports VLEN " - "in the range [128, %d]", RV_VLEN_MAX); + "in the range [%d, %d]", min_vlen, RV_VLEN_MAX); return; } @@ -295,6 +441,12 @@ static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, "in the range [8, 64]"); return; } + + if (vlen < cfg->elen) { + error_setg(errp, "Vector extension implementation requires VLEN " + "to be greater than or equal to ELEN"); + return; + } } static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) @@ -315,6 +467,15 @@ static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) continue; } + /* + * cpu.debug = true is marked as 'sdtrig', priv spec 1.12. + * Skip this warning since existing CPUs with older priv + * spec and debug = true will be impacted. + */ + if (!strcmp(edata->name, "sdtrig")) { + continue; + } + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); /* @@ -515,7 +676,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (riscv_has_ext(env, RVV)) { + if (cpu->cfg.ext_zve32x) { riscv_cpu_validate_v(env, &cpu->cfg, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -581,7 +742,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (mcc->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { + if (mcc->def->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { error_setg(errp, "Zcf extension is only relevant to RV32"); return; } @@ -678,7 +839,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (mcc->misa_mxl_max == MXL_RV32 && cpu->cfg.ext_svukte) { + if (mcc->def->misa_mxl_max == MXL_RV32 && cpu->cfg.ext_svukte) { error_setg(errp, "svukte is not supported for RV32"); return; } @@ -694,6 +855,12 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_ssctr = false; } + if (cpu->cfg.ext_svrsw60t59b && + (!cpu->cfg.mmu || mcc->def->misa_mxl_max == MXL_RV32)) { + error_setg(errp, "svrsw60t59b is not supported on RV32 and MMU-less platforms"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. @@ -706,8 +873,9 @@ static bool riscv_cpu_validate_profile_satp(RISCVCPU *cpu, RISCVCPUProfile *profile, bool send_warn) { - int satp_max = satp_mode_max_from_map(cpu->cfg.satp_mode.supported); + int satp_max = cpu->cfg.max_satp_mode; + assert(satp_max >= 0); if (profile->satp_mode > satp_max) { if (send_warn) { bool is_32bit = riscv_cpu_is_32bit(cpu); @@ -730,16 +898,11 @@ static void riscv_cpu_check_parent_profile(RISCVCPU *cpu, RISCVCPUProfile *profile, RISCVCPUProfile *parent) { - const char *parent_name; - bool parent_enabled; - - if (!profile->enabled || !parent) { + if (!profile->present || !parent) { return; } - parent_name = parent->name; - parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL); - profile->enabled = parent_enabled; + profile->present = parent->present; } static void riscv_cpu_validate_profile(RISCVCPU *cpu, @@ -800,7 +963,7 @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu, } } - profile->enabled = profile_impl; + profile->present = profile_impl; riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent); riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent); @@ -915,7 +1078,7 @@ static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + if (riscv_has_ext(env, RVF) && mcc->def->misa_mxl_max == MXL_RV32) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); } } @@ -924,7 +1087,7 @@ static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + if (riscv_has_ext(env, RVF) && mcc->def->misa_mxl_max == MXL_RV32) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); } @@ -1029,6 +1192,70 @@ static bool riscv_cpu_is_generic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } +static void riscv_cpu_set_profile(RISCVCPU *cpu, + RISCVCPUProfile *profile, + bool enabled) +{ + int i, ext_offset; + + if (profile->u_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->u_parent, enabled); + } + + if (profile->s_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->s_parent, enabled); + } + + profile->enabled = enabled; + + if (profile->enabled) { + cpu->env.priv_ver = profile->priv_spec; + +#ifndef CONFIG_USER_ONLY + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(OBJECT(cpu), "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(OBJECT(cpu), satp_prop, true, NULL); + } +#endif + } + + for (i = 0; misa_bits[i] != 0; i++) { + uint32_t bit = misa_bits[i]; + + if (!(profile->misa_ext & bit)) { + continue; + } + + if (bit == RVI && !profile->enabled) { + /* + * Disabling profiles will not disable the base + * ISA RV64I. + */ + continue; + } + + cpu_misa_ext_add_user_opt(bit, profile->enabled); + riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); + } + + for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { + ext_offset = profile->ext_offsets[i]; + + if (profile->enabled) { + if (cpu_cfg_offset_is_named_feat(ext_offset)) { + riscv_cpu_enable_named_feat(cpu, ext_offset); + } + + cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); + } + + cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); + isa_ext_update_enabled(cpu, ext_offset, profile->enabled); + } +} + /* * We'll get here via the following path: * @@ -1039,7 +1266,6 @@ static bool riscv_cpu_is_generic(Object *cpu_obj) static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) { RISCVCPU *cpu = RISCV_CPU(cs); - RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); if (!riscv_cpu_tcg_compatible(cpu)) { g_autofree char *name = riscv_cpu_get_name(cpu); @@ -1048,7 +1274,10 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) return false; } - if (mcc->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) { +#ifndef CONFIG_USER_ONLY + RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); + + if (mcc->def->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) { /* Missing 128-bit aligned atomics */ error_setg(errp, "128-bit RISC-V currently does not work with Multi " @@ -1056,7 +1285,6 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) return false; } -#ifndef CONFIG_USER_ONLY CPURISCVState *env = &cpu->env; tcg_cflags_set(CPU(cs), CF_PCREL); @@ -1194,7 +1422,6 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, RISCVCPUProfile *profile = opaque; RISCVCPU *cpu = RISCV_CPU(obj); bool value; - int i, ext_offset; if (riscv_cpu_is_vendor(obj)) { error_setg(errp, "Profile %s is not available for vendor CPUs", @@ -1213,64 +1440,8 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, } profile->user_set = true; - profile->enabled = value; - - if (profile->u_parent != NULL) { - object_property_set_bool(obj, profile->u_parent->name, - profile->enabled, NULL); - } - - if (profile->s_parent != NULL) { - object_property_set_bool(obj, profile->s_parent->name, - profile->enabled, NULL); - } - - if (profile->enabled) { - cpu->env.priv_ver = profile->priv_spec; - } - -#ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, profile->enabled, NULL); - } -#endif - - for (i = 0; misa_bits[i] != 0; i++) { - uint32_t bit = misa_bits[i]; - - if (!(profile->misa_ext & bit)) { - continue; - } - - if (bit == RVI && !profile->enabled) { - /* - * Disabling profiles will not disable the base - * ISA RV64I. - */ - continue; - } - - cpu_misa_ext_add_user_opt(bit, profile->enabled); - riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); - } - - for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { - ext_offset = profile->ext_offsets[i]; - - if (profile->enabled) { - if (cpu_cfg_offset_is_named_feat(ext_offset)) { - riscv_cpu_enable_named_feat(cpu, ext_offset); - } - cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); - } - - cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); - isa_ext_update_enabled(cpu, ext_offset, profile->enabled); - } + riscv_cpu_set_profile(cpu, profile, value); } static void cpu_get_profile(Object *obj, Visitor *v, const char *name, @@ -1285,7 +1456,7 @@ static void cpu_get_profile(Object *obj, Visitor *v, const char *name, static void riscv_cpu_add_profiles(Object *cpu_obj) { for (int i = 0; riscv_profiles[i] != NULL; i++) { - const RISCVCPUProfile *profile = riscv_profiles[i]; + RISCVCPUProfile *profile = riscv_profiles[i]; object_property_add(cpu_obj, profile->name, "bool", cpu_get_profile, cpu_set_profile, @@ -1297,30 +1468,11 @@ static void riscv_cpu_add_profiles(Object *cpu_obj) * case. */ if (profile->enabled) { - object_property_set_bool(cpu_obj, profile->name, true, NULL); + riscv_cpu_set_profile(RISCV_CPU(cpu_obj), profile, true); } } } -static bool cpu_ext_is_deprecated(const char *ext_name) -{ - return isupper(ext_name[0]); -} - -/* - * String will be allocated in the heap. Caller is responsible - * for freeing it. - */ -static char *cpu_ext_to_lower(const char *ext_name) -{ - char *ret = g_malloc0(strlen(ext_name) + 1); - - strcpy(ret, ext_name); - ret[0] = tolower(ret[0]); - - return ret; -} - static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -1333,13 +1485,6 @@ static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, return; } - if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { - g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); - - warn_report("CPU property '%s' is deprecated. Please use '%s' instead", - multi_ext_cfg->name, lower); - } - cpu_cfg_ext_add_user_opt(multi_ext_cfg->offset, value); prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); @@ -1375,14 +1520,13 @@ static void cpu_add_multi_ext_prop(Object *cpu_obj, const RISCVCPUMultiExtConfig *multi_cfg) { bool generic_cpu = riscv_cpu_is_generic(cpu_obj); - bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); object_property_add(cpu_obj, multi_cfg->name, "bool", cpu_get_multi_ext_cfg, cpu_set_multi_ext_cfg, NULL, (void *)multi_cfg); - if (!generic_cpu || deprecated_ext) { + if (!generic_cpu) { return; } @@ -1425,8 +1569,6 @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); - riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); - riscv_cpu_add_profiles(obj); } @@ -1468,6 +1610,8 @@ static void riscv_init_max_cpu_extensions(Object *obj) if (env->misa_mxl != MXL_RV32) { isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } else { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_svrsw60t59b), false); } /* @@ -1516,24 +1660,10 @@ static void riscv_tcg_cpu_instance_init(CPUState *cs) } } -static void riscv_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) -{ - /* - * All cpus use the same set of operations. - */ - cc->tcg_ops = &riscv_tcg_ops; -} - -static void riscv_tcg_cpu_class_init(CPUClass *cc) -{ - cc->init_accel_cpu = riscv_tcg_cpu_init_ops; -} - -static void riscv_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +static void riscv_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); - acc->cpu_class_init = riscv_tcg_cpu_class_init; acc->cpu_instance_init = riscv_tcg_cpu_instance_init; acc->cpu_target_realize = riscv_tcg_cpu_realize; } diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h index ce94253..a23716a 100644 --- a/target/riscv/tcg/tcg-cpu.h +++ b/target/riscv/tcg/tcg-cpu.h @@ -26,6 +26,8 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp); void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp); bool riscv_cpu_tcg_compatible(RISCVCPU *cpu); +extern const TCGCPUOps riscv_tcg_ops; + struct DisasContext; struct RISCVCPUConfig; typedef struct RISCVDecoder { diff --git a/target/riscv/th_csr.c b/target/riscv/th_csr.c index 6c970d4..49eb7bb 100644 --- a/target/riscv/th_csr.c +++ b/target/riscv/th_csr.c @@ -27,12 +27,6 @@ #define TH_SXSTATUS_MAEE BIT(21) #define TH_SXSTATUS_THEADISAEE BIT(22) -typedef struct { - int csrno; - int (*insertion_test)(RISCVCPU *cpu); - riscv_csr_operations csr_ops; -} riscv_csr; - static RISCVException smode(CPURISCVState *env, int csrno) { if (riscv_has_ext(env, RVS)) { @@ -42,13 +36,9 @@ static RISCVException smode(CPURISCVState *env, int csrno) return RISCV_EXCP_ILLEGAL_INST; } -static int test_thead_mvendorid(RISCVCPU *cpu) +static bool test_thead_mvendorid(RISCVCPU *cpu) { - if (cpu->cfg.mvendorid != THEAD_VENDOR_ID) { - return -1; - } - - return 0; + return cpu->cfg.mvendorid == THEAD_VENDOR_ID; } static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno, @@ -59,21 +49,11 @@ static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } -static riscv_csr th_csr_list[] = { +const RISCVCSR th_csr_list[] = { { .csrno = CSR_TH_SXSTATUS, .insertion_test = test_thead_mvendorid, .csr_ops = { "th.sxstatus", smode, read_th_sxstatus } - } + }, + { } }; - -void th_register_custom_csrs(RISCVCPU *cpu) -{ - for (size_t i = 0; i < ARRAY_SIZE(th_csr_list); i++) { - int csrno = th_csr_list[i].csrno; - riscv_csr_operations *csr_ops = &th_csr_list[i].csr_ops; - if (!th_csr_list[i].insertion_test(cpu)) { - riscv_set_csr_ops(csrno, csr_ops); - } - } -} diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index bc0d9a0..400e917 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -46,8 +46,23 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, { uint64_t diff, ns_diff, next; RISCVAclintMTimerState *mtimer = env->rdtime_fn_arg; - uint32_t timebase_freq = mtimer->timebase_freq; - uint64_t rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; + uint32_t timebase_freq; + uint64_t rtc_r; + + if (!riscv_cpu_cfg(env)->ext_sstc || !env->rdtime_fn || + !env->rdtime_fn_arg || !get_field(env->menvcfg, MENVCFG_STCE)) { + /* S/VS Timer IRQ depends on sstc extension, rdtime_fn(), and STCE. */ + return; + } + + if (timer_irq == MIP_VSTIP && + (!riscv_has_ext(env, RVH) || !get_field(env->henvcfg, HENVCFG_STCE))) { + /* VS Timer IRQ also depends on RVH and henvcfg.STCE. */ + return; + } + + timebase_freq = mtimer->timebase_freq; + rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; if (timecmp <= rtc_r) { /* @@ -125,6 +140,52 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, timer_mod(timer, next); } +/* + * When disabling xenvcfg.STCE, the S/VS Timer may be disabled at the same time. + * It is safe to call this function regardless of whether the timer has been + * deleted or not. timer_del() will do nothing if the timer has already + * been deleted. + */ +static void riscv_timer_disable_timecmp(CPURISCVState *env, QEMUTimer *timer, + uint32_t timer_irq) +{ + /* Disable S-mode Timer IRQ and HW-based STIP */ + if ((timer_irq == MIP_STIP) && !get_field(env->menvcfg, MENVCFG_STCE)) { + riscv_cpu_update_mip(env, timer_irq, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } + + /* Disable VS-mode Timer IRQ and HW-based VSTIP */ + if ((timer_irq == MIP_VSTIP) && + (!get_field(env->menvcfg, MENVCFG_STCE) || + !get_field(env->henvcfg, HENVCFG_STCE))) { + env->vstime_irq = 0; + riscv_cpu_update_mip(env, 0, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } +} + +/* Enable or disable S/VS-mode Timer when xenvcfg.STCE is changed */ +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable) +{ + if (enable) { + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } else { + riscv_timer_disable_timecmp(env, env->vstimer, MIP_VSTIP); + } + + if (is_m_mode) { + if (enable) { + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + } else { + riscv_timer_disable_timecmp(env, env->stimer, MIP_STIP); + } + } +} + void riscv_timer_init(RISCVCPU *cpu) { CPURISCVState *env; diff --git a/target/riscv/time_helper.h b/target/riscv/time_helper.h index cacd79b..af1f634 100644 --- a/target/riscv/time_helper.h +++ b/target/riscv/time_helper.h @@ -25,6 +25,7 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, uint64_t timecmp, uint64_t delta, uint32_t timer_irq); +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable); void riscv_timer_init(RISCVCPU *cpu); #endif diff --git a/target/riscv/translate.c b/target/riscv/translate.c index d6651f2..6fc06c7 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -20,11 +20,11 @@ #include "qemu/log.h" #include "cpu.h" #include "tcg/tcg-op.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" - +#include "exec/target_page.h" #include "exec/translator.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/translation-block.h" #include "exec/log.h" #include "semihosting/semihost.h" @@ -1167,7 +1167,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) CPUState *cpu = ctx->cs; CPURISCVState *env = cpu_env(cpu); - return translator_ldl(env, &ctx->base, pc); + return cpu_ldl_code(env, pc); } #define SS_MMU_INDEX(ctx) (ctx->mem_idx | MMU_IDX_SS_WRITE) @@ -1210,11 +1210,6 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) /* The specification allows for longer insns, but not supported by qemu. */ #define MAX_INSN_LEN 4 -static inline int insn_len(uint16_t first_word) -{ - return (first_word & 3) == 3 ? 4 : 2; -} - const RISCVDecoder decoder_table[] = { { always_true_p, decode_insn32 }, { has_xthead_p, decode_xthead}, @@ -1223,13 +1218,35 @@ const RISCVDecoder decoder_table[] = { const size_t decoder_table_size = ARRAY_SIZE(decoder_table); -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) +static void decode_opc(CPURISCVState *env, DisasContext *ctx) { + uint32_t opcode; + bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0); + ctx->virt_inst_excp = false; - ctx->cur_insn_len = insn_len(opcode); + if (pc_is_4byte_align) { + /* + * Load 4 bytes at once to make instruction fetch atomically. + * + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be + * across pages. We could preload 4 bytes instruction no matter + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger + * additional page fault. + */ + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + } else { + /* + * For unaligned pc, instruction preload may trigger additional + * page fault so we only load 2 bytes here. + */ + opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next); + } + ctx->ol = ctx->xl; + + ctx->cur_insn_len = insn_len((uint16_t)opcode); /* Check for compressed insn */ if (ctx->cur_insn_len == 2) { - ctx->opcode = opcode; + ctx->opcode = (uint16_t)opcode; /* * The Zca extension is added as way to refer to instructions in the C * extension that do not include the floating-point loads and stores @@ -1239,15 +1256,17 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) return; } } else { - uint32_t opcode32 = opcode; - opcode32 = deposit32(opcode32, 16, 16, - translator_lduw(env, &ctx->base, - ctx->base.pc_next + 2)); - ctx->opcode = opcode32; + if (!pc_is_4byte_align) { + /* Load last 2 bytes of instruction here */ + opcode = deposit32(opcode, 16, 16, + translator_lduw(env, &ctx->base, + ctx->base.pc_next + 2)); + } + ctx->opcode = opcode; for (guint i = 0; i < ctx->decoders->len; ++i) { riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); - if (func(ctx, opcode32)) { + if (func(ctx, opcode)) { return; } } @@ -1282,7 +1301,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; ctx->vstart_eq_zero = FIELD_EX32(tb_flags, TB_FLAGS, VSTART_EQ_ZERO); ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); - ctx->misa_mxl_max = mcc->misa_mxl_max; + ctx->misa_mxl_max = mcc->def->misa_mxl_max; ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL); ctx->address_xl = FIELD_EX32(tb_flags, TB_FLAGS, AXL); ctx->cs = cs; @@ -1325,10 +1344,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu_env(cpu); - uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); - ctx->ol = ctx->xl; - decode_opc(env, ctx, opcode16); + decode_opc(env, ctx); ctx->base.pc_next += ctx->cur_insn_len; /* diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index 1526de9..9a0d9b4 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -26,7 +26,6 @@ #include "crypto/aes-round.h" #include "crypto/sm4.h" #include "exec/memop.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "internals.h" #include "vector_internals.h" diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 67b3baf..41ea223 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -21,10 +21,12 @@ #include "qemu/bitops.h" #include "cpu.h" #include "exec/memop.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/page-protection.h" #include "exec/helper-proto.h" +#include "exec/tlb-flags.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" @@ -32,7 +34,7 @@ #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, - target_ulong s2) + target_ulong s2, target_ulong x0) { int vlmax, vl; RISCVCPU *cpu = env_archcpu(env); @@ -80,6 +82,16 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } else { vl = vlmax; } + + if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; + return 0; + } + env->vl = vl; env->vtype = s2; env->vstart = 0; @@ -114,25 +126,42 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) * It will trigger an exception if there is no mapping in TLB * and page table walk can't fill the TLB entry. Then the guest * software can return here after process the exception or never return. + * + * This function can also be used when direct access to probe_access_flags is + * needed in order to access the flags. If a pointer to a flags operand is + * provided the function will call probe_access_flags instead, use nonfault + * and update host and flags. */ -static void probe_pages(CPURISCVState *env, target_ulong addr, - target_ulong len, uintptr_t ra, - MMUAccessType access_type) +static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len, + uintptr_t ra, MMUAccessType access_type, int mmu_index, + void **host, int *flags, bool nonfault) { target_ulong pagelen = -(addr | TARGET_PAGE_MASK); target_ulong curlen = MIN(pagelen, len); - int mmu_index = riscv_env_mmu_index(env, false); - probe_access(env, adjust_addr(env, addr), curlen, access_type, - mmu_index, ra); + if (flags != NULL) { + *flags = probe_access_flags(env, adjust_addr(env, addr), curlen, + access_type, mmu_index, nonfault, host, ra); + } else { + probe_access(env, adjust_addr(env, addr), curlen, access_type, + mmu_index, ra); + } + if (len > curlen) { addr += curlen; curlen = len - curlen; - probe_access(env, adjust_addr(env, addr), curlen, access_type, - mmu_index, ra); + if (flags != NULL) { + *flags = probe_access_flags(env, adjust_addr(env, addr), curlen, + access_type, mmu_index, nonfault, + host, ra); + } else { + probe_access(env, adjust_addr(env, addr), curlen, access_type, + mmu_index, ra); + } } } + static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -332,8 +361,8 @@ vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; /* Check page permission/pmp/watchpoint/etc. */ - flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, - mmu_index, true, &host, ra); + probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags, + true); if (flags == 0) { if (nf == 1) { @@ -632,7 +661,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t vma = vext_vma(desc); target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; int mmu_index = riscv_env_mmu_index(env, false); - int flags; + int flags, probe_flags; void *host; VSTART_CHECK_EARLY_EXIT(env, env->vl); @@ -646,15 +675,15 @@ vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, } /* Check page permission/pmp/watchpoint/etc. */ - flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, - MMU_DATA_LOAD, mmu_index, true, &host, ra); + probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host, + &flags, true); /* If we are crossing a page check also the second page. */ if (env->vl > elems) { addr_probe = addr + (elems << log2_esz); - flags |= probe_access_flags(env, adjust_addr(env, addr_probe), - elems * msize, MMU_DATA_LOAD, mmu_index, - true, &host, ra); + probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD, + mmu_index, &host, &probe_flags, true); + flags |= probe_flags; } if (flags & ~TLB_WATCHPOINT) { @@ -666,16 +695,16 @@ vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, addr_i = adjust_addr(env, base + i * (nf << log2_esz)); if (i == 0) { /* Allow fault on first element. */ - probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); + probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD, + mmu_index, &host, NULL, false); } else { remain = nf << log2_esz; while (remain > 0) { offset = -(addr_i | TARGET_PAGE_MASK); /* Probe nonfault on subsequent elements. */ - flags = probe_access_flags(env, addr_i, offset, - MMU_DATA_LOAD, mmu_index, true, - &host, 0); + probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD, + mmu_index, &host, &flags, true); /* * Stop if invalid (unmapped) or mmio (transaction may @@ -5113,9 +5142,11 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ \ for (i = i_max; i < vl; ++i) { \ - if (vm || vext_elem_mask(v0, i)) { \ - *((ETYPE *)vd + H(i)) = 0; \ + if (!vm && !vext_elem_mask(v0, i)) { \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ + continue; \ } \ + *((ETYPE *)vd + H(i)) = 0; \ } \ \ env->vstart = 0; \ @@ -5167,11 +5198,11 @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) -#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ @@ -5218,11 +5249,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ diff --git a/target/riscv/zce_helper.c b/target/riscv/zce_helper.c index b433bda..55221f5 100644 --- a/target/riscv/zce_helper.c +++ b/target/riscv/zce_helper.c @@ -18,9 +18,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" target_ulong HELPER(cm_jalt)(CPURISCVState *env, uint32_t index) { diff --git a/target/rx/cpu-param.h b/target/rx/cpu-param.h index ef1970a..84934f3 100644 --- a/target/rx/cpu-param.h +++ b/target/rx/cpu-param.h @@ -24,4 +24,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +#define TARGET_INSN_START_EXTRA_WORDS 0 + #endif diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 0ba0d55..da02ae7 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -24,9 +24,11 @@ #include "exec/cputlb.h" #include "exec/page-protection.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "hw/loader.h" #include "fpu/softfloat.h" #include "tcg/debug-assert.h" +#include "accel/tcg/cpu-ops.h" static void rx_cpu_set_pc(CPUState *cs, vaddr value) { @@ -42,6 +44,17 @@ static vaddr rx_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState rx_get_tb_cpu_state(CPUState *cs) +{ + CPURXState *env = cpu_env(cs); + uint32_t flags = 0; + + flags = FIELD_DP32(flags, PSW, PM, env->psw_pm); + flags = FIELD_DP32(flags, PSW, U, env->psw_u); + + return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; +} + static void rx_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -62,11 +75,10 @@ static void rx_restore_state_to_opc(CPUState *cs, static bool rx_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & - (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR); } -static int riscv_cpu_mmu_index(CPUState *cs, bool ifunc) +static int rx_cpu_mmu_index(CPUState *cs, bool ifunc) { return 0; } @@ -200,21 +212,27 @@ static const struct SysemuCPUOps rx_sysemu_ops = { .get_phys_page_debug = rx_cpu_get_phys_page_debug, }; -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps rx_tcg_ops = { + /* MTTCG not yet supported: require strict ordering */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = false, + .initialize = rx_translate_init, .translate_code = rx_translate_code, + .get_tb_cpu_state = rx_get_tb_cpu_state, .synchronize_from_tb = rx_cpu_synchronize_from_tb, .restore_state_to_opc = rx_restore_state_to_opc, + .mmu_index = rx_cpu_mmu_index, .tlb_fill = rx_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = rx_cpu_exec_interrupt, .cpu_exec_halt = rx_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = rx_cpu_do_interrupt, }; -static void rx_cpu_class_init(ObjectClass *klass, void *data) +static void rx_cpu_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); CPUClass *cc = CPU_CLASS(klass); @@ -227,7 +245,6 @@ static void rx_cpu_class_init(ObjectClass *klass, void *data) &rcc->parent_phases); cc->class_by_name = rx_cpu_class_by_name; - cc->mmu_index = riscv_cpu_mmu_index; cc->dump_state = rx_cpu_dump_state; cc->set_pc = rx_cpu_set_pc; cc->get_pc = rx_cpu_get_pc; diff --git a/target/rx/cpu.h b/target/rx/cpu.h index 349d61c..ba5761b 100644 --- a/target/rx/cpu.h +++ b/target/rx/cpu.h @@ -23,7 +23,9 @@ #include "hw/registerfields.h" #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" #ifdef CONFIG_USER_ONLY @@ -145,23 +147,12 @@ void rx_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte); -#include "exec/cpu-all.h" - #define CPU_INTERRUPT_SOFT CPU_INTERRUPT_TGT_INT_0 #define CPU_INTERRUPT_FIR CPU_INTERRUPT_TGT_INT_1 #define RX_CPU_IRQ 0 #define RX_CPU_FIR 1 -static inline void cpu_get_tb_cpu_state(CPURXState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *cs_base = 0; - *flags = FIELD_DP32(0, PSW, PM, env->psw_pm); - *flags = FIELD_DP32(*flags, PSW, U, env->psw_u); -} - static inline uint32_t rx_cpu_pack_psw(CPURXState *env) { uint32_t psw = 0; diff --git a/target/rx/helper.c b/target/rx/helper.c index e8aabf4..41c9606 100644 --- a/target/rx/helper.c +++ b/target/rx/helper.c @@ -20,7 +20,7 @@ #include "qemu/bitops.h" #include "cpu.h" #include "exec/log.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "hw/irq.h" void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte) @@ -44,7 +44,7 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte) void rx_cpu_do_interrupt(CPUState *cs) { CPURXState *env = cpu_env(cs); - int do_irq = cs->interrupt_request & INT_FLAGS; + int do_irq = cpu_test_interrupt(cs, INT_FLAGS); uint32_t save_psw; env->in_sleep = 0; @@ -63,7 +63,7 @@ void rx_cpu_do_interrupt(CPUState *cs) env->bpsw = save_psw; env->pc = env->fintv; env->psw_ipl = 15; - cs->interrupt_request &= ~CPU_INTERRUPT_FIR; + cpu_reset_interrupt(cs, CPU_INTERRUPT_FIR); qemu_set_irq(env->ack, env->ack_irq); qemu_log_mask(CPU_LOG_INT, "fast interrupt raised\n"); } else if (do_irq & CPU_INTERRUPT_HARD) { @@ -73,7 +73,7 @@ void rx_cpu_do_interrupt(CPUState *cs) cpu_stl_data(env, env->isp, env->pc); env->pc = cpu_ldl_data(env, env->intb + env->ack_irq * 4); env->psw_ipl = env->ack_ipl; - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); qemu_set_irq(env->ack, env->ack_irq); qemu_log_mask(CPU_LOG_INT, "interrupt 0x%02x raised\n", env->ack_irq); diff --git a/target/rx/op_helper.c b/target/rx/op_helper.c index b3ed822..2b190a4 100644 --- a/target/rx/op_helper.c +++ b/target/rx/op_helper.c @@ -19,9 +19,8 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "fpu/softfloat.h" #include "tcg/debug-assert.h" diff --git a/target/rx/translate.c b/target/rx/translate.c index bbda703..19a9584 100644 --- a/target/rx/translate.c +++ b/target/rx/translate.c @@ -20,7 +20,6 @@ #include "qemu/bswap.h" #include "qemu/qemu-print.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" diff --git a/target/s390x/cpu-param.h b/target/s390x/cpu-param.h index 5c331ec..abfae3b 100644 --- a/target/s390x/cpu-param.h +++ b/target/s390x/cpu-param.h @@ -12,10 +12,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 64 #define TARGET_VIRT_ADDR_SPACE_BITS 64 -/* - * The z/Architecture has a strong memory model with some - * store-after-load re-ordering. - */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) +#define TARGET_INSN_START_EXTRA_WORDS 2 #endif diff --git a/target/s390x/cpu-system.c b/target/s390x/cpu-system.c index 9b380e3..f3a9ffb 100644 --- a/target/s390x/cpu-system.c +++ b/target/s390x/cpu-system.c @@ -49,7 +49,7 @@ bool s390_cpu_has_work(CPUState *cs) return false; } - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { + if (!cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { return false; } @@ -196,7 +196,7 @@ static bool disabled_wait(CPUState *cpu) (PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK)); } -static unsigned s390_count_running_cpus(void) +unsigned s390_count_running_cpus(void) { CPUState *cpu; int nr_running = 0; @@ -214,7 +214,7 @@ static unsigned s390_count_running_cpus(void) return nr_running; } -unsigned int s390_cpu_halt(S390CPU *cpu) +void s390_cpu_halt(S390CPU *cpu) { CPUState *cs = CPU(cpu); trace_cpu_halt(cs->cpu_index); @@ -223,8 +223,6 @@ unsigned int s390_cpu_halt(S390CPU *cpu) cs->halted = 1; cs->exception_index = EXCP_HLT; } - - return s390_count_running_cpus(); } void s390_cpu_unhalt(S390CPU *cpu) diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 1f75629..f05ce31 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -126,11 +126,6 @@ static vaddr s390_cpu_get_pc(CPUState *cs) return cpu->env.psw.addr; } -static int s390x_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - return s390x_env_mmu_index(cpu_env(cs), ifetch); -} - static void s390_query_cpu_fast(CPUState *cpu, CpuInfoFast *value) { S390CPU *s390_cpu = S390_CPU(cpu); @@ -307,10 +302,16 @@ static const Property s390x_cpu_properties[] = { #ifdef CONFIG_TCG #include "accel/tcg/cpu-ops.h" +#include "tcg/tcg_s390x.h" + +static int s390x_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return s390x_env_mmu_index(cpu_env(cs), ifetch); +} -void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) +static TCGTBCPUState s390x_get_tb_cpu_state(CPUState *cs) { + CPUS390XState *env = cpu_env(cs); uint32_t flags; if (env->psw.addr & 1) { @@ -322,9 +323,6 @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, tcg_s390_program_interrupt(env, PGM_SPECIFICATION, 0); } - *pc = env->psw.addr; - *cs_base = env->ex_value; - flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; if (env->psw.mask & PSW_MASK_PER) { flags |= env->cregs[9] & (FLAG_MASK_PER_BRANCH | @@ -341,21 +339,46 @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, if (env->cregs[0] & CR0_VECTOR) { flags |= FLAG_MASK_VECTOR; } - *pflags = flags; + + return (TCGTBCPUState){ + .pc = env->psw.addr, + .flags = flags, + .cs_base = env->ex_value, + }; +} + +#ifndef CONFIG_USER_ONLY +static vaddr s390_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return wrap_address(cpu_env(cs), result); } +#endif static const TCGCPUOps s390_tcg_ops = { + .mttcg_supported = true, + .precise_smc = true, + /* + * The z/Architecture has a strong memory model with some + * store-after-load re-ordering. + */ + .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD, + .initialize = s390x_translate_init, .translate_code = s390x_translate_code, + .get_tb_cpu_state = s390x_get_tb_cpu_state, .restore_state_to_opc = s390x_restore_state_to_opc, + .mmu_index = s390x_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = s390_cpu_record_sigsegv, .record_sigbus = s390_cpu_record_sigbus, #else .tlb_fill = s390_cpu_tlb_fill, + .pointer_wrap = s390_pointer_wrap, .cpu_exec_interrupt = s390_cpu_exec_interrupt, .cpu_exec_halt = s390_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = s390_cpu_do_interrupt, .debug_excp_handler = s390x_cpu_debug_excp_handler, .do_unaligned_access = s390x_cpu_do_unaligned_access, @@ -363,7 +386,7 @@ static const TCGCPUOps s390_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void s390_cpu_class_init(ObjectClass *oc, void *data) +static void s390_cpu_class_init(ObjectClass *oc, const void *data) { S390CPUClass *scc = S390_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(scc); @@ -378,7 +401,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) &scc->parent_phases); cc->class_by_name = s390_cpu_class_by_name; - cc->mmu_index = s390x_cpu_mmu_index; + cc->list_cpus = s390_cpu_list; cc->dump_state = s390_cpu_dump_state; cc->query_cpu_fast = s390_query_cpu_fast; cc->set_pc = s390_cpu_set_pc; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 5b7992d..aa931cb 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -27,16 +27,14 @@ #include "cpu-qom.h" #include "cpu_models.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" #include "qapi/qapi-types-machine-common.h" #define ELF_MACHINE_UNAME "S390X" -#define TARGET_HAS_PRECISE_SMC - -#define TARGET_INSN_START_EXTRA_WORDS 2 - #define MMU_USER_IDX 0 #define S390_MAX_CPUS 248 @@ -413,15 +411,6 @@ static inline int s390x_env_mmu_index(CPUS390XState *env, bool ifetch) #endif } -#ifdef CONFIG_TCG - -#include "tcg/tcg_s390x.h" - -void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags); - -#endif /* CONFIG_TCG */ - /* PER bits from control register 9 */ #define PER_CR9_EVENT_BRANCH 0x80000000 #define PER_CR9_EVENT_IFETCH 0x40000000 @@ -900,13 +889,6 @@ static inline uint8_t s390_cpu_get_state(S390CPU *cpu) } -/* cpu_models.c */ -void s390_cpu_list(void); -#define cpu_list s390_cpu_list -void s390_set_qemu_cpu_model(uint16_t type, uint8_t gen, uint8_t ec_ga, - const S390FeatInit feat_init); - - /* helper.c */ #define CPU_RESOLVING_TYPE TYPE_S390_CPU @@ -946,6 +928,4 @@ uint64_t s390_cpu_get_psw_mask(CPUS390XState *env); /* outside of target/s390x/ */ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr); -#include "exec/cpu-all.h" - #endif diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc index e23e603..c017bff 100644 --- a/target/s390x/cpu_features_def.h.inc +++ b/target/s390x/cpu_features_def.h.inc @@ -186,7 +186,7 @@ DEF_FEAT(PLO_CSO, "plo-cso", PLO, 25, "PLO Compare and swap (256 bit in paramete DEF_FEAT(PLO_DCSO, "plo-dcso", PLO, 26, "PLO Double compare and swap (256 bit in parameter list)") DEF_FEAT(PLO_CSSTO, "plo-cssto", PLO, 27, "PLO Compare and swap and store (256 bit in parameter list)") DEF_FEAT(PLO_CSDSTO, "plo-csdsto", PLO, 28, "PLO Compare and swap and double store (256 bit in parameter list)") -DEF_FEAT(PLO_CSTSTO, "plo-cststo", PLO, 29, "PLO Compare and swap and trible store (256 bit in parameter list)") +DEF_FEAT(PLO_CSTSTO, "plo-cststo", PLO, 29, "PLO Compare and swap and triple store (256 bit in parameter list)") DEF_FEAT(PLO_TCS, "plo-tcs", PLO, 30, "Triple compare and swap (32 bit in parameter list)") DEF_FEAT(PLO_TCSG, "plo-tcsg", PLO, 31, "Triple compare and swap (64 bit in parameter list)") DEF_FEAT(PLO_TCSX, "plo-tcsx", PLO, 32, "Triple compare and swap (128 bit in parameter list)") diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 93a05e4..954a7a9 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -373,7 +373,7 @@ static void s390_print_cpu_model_list_entry(gpointer data, gpointer user_data) g_free(name); } -static gint s390_cpu_list_compare(gconstpointer a, gconstpointer b) +static gint s390_cpu_list_compare(gconstpointer a, gconstpointer b, gpointer d) { const S390CPUClass *cc_a = S390_CPU_CLASS((ObjectClass *)a); const S390CPUClass *cc_b = S390_CPU_CLASS((ObjectClass *)b); @@ -415,7 +415,7 @@ void s390_cpu_list(void) qemu_printf("Available CPUs:\n"); list = object_class_get_list(TYPE_S390_CPU, false); - list = g_slist_sort(list, s390_cpu_list_compare); + list = g_slist_sort_with_data(list, s390_cpu_list_compare, NULL); g_slist_foreach(list, s390_print_cpu_model_list_entry, NULL); g_slist_free(list); @@ -578,7 +578,6 @@ static void check_compat_model_failed(Error **errp, error_setg(errp, "%s. Maximum supported model in the current configuration: \'%s\'", msg, max_model->def->name); error_append_hint(errp, "Consider a different accelerator, try \"-accel help\"\n"); - return; } static bool check_compatibility(const S390CPUModel *max_model, @@ -920,7 +919,7 @@ void s390_cpu_model_class_register_props(ObjectClass *oc) } #ifdef CONFIG_KVM -static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +static void s390_host_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -929,7 +928,7 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) } #endif -static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) +static void s390_base_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -940,7 +939,7 @@ static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) xcc->desc = xcc->cpu_def->desc; } -static void s390_cpu_model_class_init(ObjectClass *oc, void *data) +static void s390_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -950,7 +949,7 @@ static void s390_cpu_model_class_init(ObjectClass *oc, void *data) xcc->desc = xcc->cpu_def->desc; } -static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +static void s390_qemu_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -959,7 +958,7 @@ static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) qemu_hw_version()); } -static void s390_max_cpu_model_class_init(ObjectClass *oc, void *data) +static void s390_max_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -1073,7 +1072,7 @@ static void register_types(void) .instance_init = s390_cpu_model_initfn, .instance_finalize = s390_cpu_model_finalize, .class_init = s390_base_cpu_model_class_init, - .class_data = (void *) &s390_cpu_defs[i], + .class_data = &s390_cpu_defs[i], }; char *name = s390_cpu_type_name(s390_cpu_defs[i].name); TypeInfo ti = { @@ -1082,7 +1081,7 @@ static void register_types(void) .instance_init = s390_cpu_model_initfn, .instance_finalize = s390_cpu_model_finalize, .class_init = s390_cpu_model_class_init, - .class_data = (void *) &s390_cpu_defs[i], + .class_data = &s390_cpu_defs[i], }; type_register_static(&ti_base); diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h index 71d4bc2..f701bc0 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h @@ -113,6 +113,9 @@ static inline uint64_t s390_cpuid_from_cpu_model(const S390CPUModel *model) } S390CPUDef const *s390_find_cpu_def(uint16_t type, uint8_t gen, uint8_t ec_ga, S390FeatBitmap features); +void s390_set_qemu_cpu_model(uint16_t type, uint8_t gen, uint8_t ec_ga, + const S390FeatInit feat_init); +void s390_cpu_list(void); bool kvm_s390_cpu_models_supported(void); bool kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp); diff --git a/target/s390x/cpu_models_system.c b/target/s390x/cpu_models_system.c index 4351182..5b84604 100644 --- a/target/s390x/cpu_models_system.c +++ b/target/s390x/cpu_models_system.c @@ -19,7 +19,7 @@ #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" #include "qobject/qdict.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" static void list_add_feat(const char *name, void *opaque); @@ -252,6 +252,9 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, s390_feat_bitmap_to_ascii(deprecated_feats, &expansion_info->deprecated_props, list_add_feat); + + expansion_info->has_deprecated_props = !!expansion_info->deprecated_props; + return expansion_info; } diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 4184067..8218e64 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -844,13 +844,11 @@ static uint16_t default_GEN17_GA1[] = { /* QEMU (CPU model) features */ -static uint16_t qemu_V2_11[] = { +static uint16_t qemu_MIN[] = { + /* Features supported by the default CPU of the oldest machine type */ S390_FEAT_GROUP_PLO, S390_FEAT_ESAN3, S390_FEAT_ZARCH, -}; - -static uint16_t qemu_V3_1[] = { S390_FEAT_DAT_ENH, S390_FEAT_IDTE_SEGMENT, S390_FEAT_STFLE, @@ -880,18 +878,12 @@ static uint16_t qemu_V3_1[] = { S390_FEAT_ADAPTER_INT_SUPPRESSION, S390_FEAT_MSA_EXT_3, S390_FEAT_MSA_EXT_4, -}; - -static uint16_t qemu_V4_0[] = { /* * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not * implemented yet). */ S390_FEAT_FLOATING_POINT_EXT, S390_FEAT_ZPCI, -}; - -static uint16_t qemu_V4_1[] = { S390_FEAT_STFLE_53, S390_FEAT_VECTOR, }; @@ -1053,10 +1045,7 @@ static FeatGroupDefSpec FeatGroupDef[] = { * QEMU (CPU model) features *******************************/ static FeatGroupDefSpec QemuFeatDef[] = { - QEMU_FEAT_INITIALIZER(V2_11), - QEMU_FEAT_INITIALIZER(V3_1), - QEMU_FEAT_INITIALIZER(V4_0), - QEMU_FEAT_INITIALIZER(V4_1), + QEMU_FEAT_INITIALIZER(MIN), QEMU_FEAT_INITIALIZER(V6_0), QEMU_FEAT_INITIALIZER(V6_2), QEMU_FEAT_INITIALIZER(V7_0), diff --git a/target/s390x/helper.c b/target/s390x/helper.c index c689e11..184428c 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -24,9 +24,11 @@ #include "gdbstub/helpers.h" #include "qemu/timer.h" #include "hw/s390x/ioinst.h" -#include "target/s390x/kvm/pv.h" #include "system/hw_accel.h" +#include "system/memory.h" #include "system/runstate.h" +#include "exec/target_page.h" +#include "exec/watchpoint.h" void s390x_tod_timer(void *opaque) { @@ -89,7 +91,9 @@ void s390_handle_wait(S390CPU *cpu) { CPUState *cs = CPU(cpu); - if (s390_cpu_halt(cpu) == 0) { + s390_cpu_halt(cpu); + + if (s390_count_running_cpus() == 0) { if (is_special_wait_psw(cpu->env.psw.addr)) { qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { @@ -103,19 +107,23 @@ LowCore *cpu_map_lowcore(CPUS390XState *env) { LowCore *lowcore; hwaddr len = sizeof(LowCore); + CPUState *cs = env_cpu(env); + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; - lowcore = cpu_physical_memory_map(env->psa, &len, true); + lowcore = address_space_map(cs->as, env->psa, &len, true, attrs); if (len < sizeof(LowCore)) { - cpu_abort(env_cpu(env), "Could not map lowcore\n"); + cpu_abort(cs, "Could not map lowcore\n"); } return lowcore; } -void cpu_unmap_lowcore(LowCore *lowcore) +void cpu_unmap_lowcore(CPUS390XState *env, LowCore *lowcore) { - cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore)); + AddressSpace *as = env_cpu(env)->as; + + address_space_unmap(as, lowcore, sizeof(LowCore), true, sizeof(LowCore)); } void do_restart_interrupt(CPUS390XState *env) @@ -130,7 +138,7 @@ void do_restart_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->restart_new_psw.mask); addr = be64_to_cpu(lowcore->restart_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); env->pending_int &= ~INTERRUPT_RESTART; s390_cpu_set_psw(env, mask, addr); @@ -173,109 +181,3 @@ void s390_cpu_recompute_watchpoints(CPUState *cs) wp_flags, NULL); } } - -typedef struct SigpSaveArea { - uint64_t fprs[16]; /* 0x0000 */ - uint64_t grs[16]; /* 0x0080 */ - PSW psw; /* 0x0100 */ - uint8_t pad_0x0110[0x0118 - 0x0110]; /* 0x0110 */ - uint32_t prefix; /* 0x0118 */ - uint32_t fpc; /* 0x011c */ - uint8_t pad_0x0120[0x0124 - 0x0120]; /* 0x0120 */ - uint32_t todpr; /* 0x0124 */ - uint64_t cputm; /* 0x0128 */ - uint64_t ckc; /* 0x0130 */ - uint8_t pad_0x0138[0x0140 - 0x0138]; /* 0x0138 */ - uint32_t ars[16]; /* 0x0140 */ - uint64_t crs[16]; /* 0x0384 */ -} SigpSaveArea; -QEMU_BUILD_BUG_ON(sizeof(SigpSaveArea) != 512); - -int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) -{ - static const uint8_t ar_id = 1; - SigpSaveArea *sa; - hwaddr len = sizeof(*sa); - int i; - - /* For PVMs storing will occur when this cpu enters SIE again */ - if (s390_is_pv()) { - return 0; - } - - sa = cpu_physical_memory_map(addr, &len, true); - if (!sa) { - return -EFAULT; - } - if (len != sizeof(*sa)) { - cpu_physical_memory_unmap(sa, len, 1, 0); - return -EFAULT; - } - - if (store_arch) { - cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1); - } - for (i = 0; i < 16; ++i) { - sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i)); - } - for (i = 0; i < 16; ++i) { - sa->grs[i] = cpu_to_be64(cpu->env.regs[i]); - } - sa->psw.addr = cpu_to_be64(cpu->env.psw.addr); - sa->psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(&cpu->env)); - sa->prefix = cpu_to_be32(cpu->env.psa); - sa->fpc = cpu_to_be32(cpu->env.fpc); - sa->todpr = cpu_to_be32(cpu->env.todpr); - sa->cputm = cpu_to_be64(cpu->env.cputm); - sa->ckc = cpu_to_be64(cpu->env.ckc >> 8); - for (i = 0; i < 16; ++i) { - sa->ars[i] = cpu_to_be32(cpu->env.aregs[i]); - } - for (i = 0; i < 16; ++i) { - sa->crs[i] = cpu_to_be64(cpu->env.cregs[i]); - } - - cpu_physical_memory_unmap(sa, len, 1, len); - - return 0; -} - -typedef struct SigpAdtlSaveArea { - uint64_t vregs[32][2]; /* 0x0000 */ - uint8_t pad_0x0200[0x0400 - 0x0200]; /* 0x0200 */ - uint64_t gscb[4]; /* 0x0400 */ - uint8_t pad_0x0420[0x1000 - 0x0420]; /* 0x0420 */ -} SigpAdtlSaveArea; -QEMU_BUILD_BUG_ON(sizeof(SigpAdtlSaveArea) != 4096); - -#define ADTL_GS_MIN_SIZE 2048 /* minimal size of adtl save area for GS */ -int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len) -{ - SigpAdtlSaveArea *sa; - hwaddr save = len; - int i; - - sa = cpu_physical_memory_map(addr, &save, true); - if (!sa) { - return -EFAULT; - } - if (save != len) { - cpu_physical_memory_unmap(sa, len, 1, 0); - return -EFAULT; - } - - if (s390_has_feat(S390_FEAT_VECTOR)) { - for (i = 0; i < 32; i++) { - sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]); - sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]); - } - } - if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) { - for (i = 0; i < 4; i++) { - sa->gscb[i] = cpu_to_be64(cpu->env.gscb[i]); - } - } - - cpu_physical_memory_unmap(sa, len, 1, len); - return 0; -} diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c index 4ae6e2d..1dca835 100644 --- a/target/s390x/interrupt.c +++ b/target/s390x/interrupt.c @@ -11,7 +11,6 @@ #include "cpu.h" #include "kvm/kvm_s390x.h" #include "s390x-internal.h" -#include "exec/exec-all.h" #include "system/kvm.h" #include "system/tcg.h" #include "hw/s390x/ioinst.h" diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index a944f16..2320dd4 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -12,11 +12,13 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "exec/target_page.h" #include "s390x-internal.h" #include "hw/s390x/ioinst.h" #include "trace.h" #include "hw/s390x/s390-pci-bus.h" #include "target/s390x/kvm/pv.h" +#include "hw/s390x/ap-bridge.h" /* All I/O instructions but chsc use the s format */ static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, @@ -573,13 +575,19 @@ out: static int chsc_sei_nt0_get_event(void *res) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_get_event(res); + } + return 1; } static int chsc_sei_nt0_have_event(void) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_have_event(); + } + return 0; } diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 4d56e65..916dac1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -41,7 +41,7 @@ #include "system/runstate.h" #include "system/device_tree.h" #include "gdbstub/enums.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "trace.h" #include "hw/s390x/s390-pci-inst.h" #include "hw/s390x/s390-pci-bus.h" @@ -298,12 +298,6 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) return; } - if (!hpage_1m_allowed()) { - error_setg(errp, "This QEMU machine does not support huge page " - "mappings"); - return; - } - if (pagesize != 1 * MiB) { error_setg(errp, "Memory backing with 2G pages was specified, " "but KVM does not support this memory backing"); @@ -404,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return cpu->cpu_index; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus; @@ -469,7 +468,7 @@ static int can_sync_regs(CPUState *cs, int regs) #define KVM_SYNC_REQUIRED_REGS (KVM_SYNC_GPRS | KVM_SYNC_ACRS | \ KVM_SYNC_CRS | KVM_SYNC_PREFIX) -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu = {}; @@ -890,7 +889,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) return 0; } -static struct kvm_hw_breakpoint *find_hw_breakpoint(target_ulong addr, +static struct kvm_hw_breakpoint *find_hw_breakpoint(vaddr addr, int len, int type) { int n; @@ -905,7 +904,7 @@ static struct kvm_hw_breakpoint *find_hw_breakpoint(target_ulong addr, return NULL; } -static int insert_hw_breakpoint(target_ulong addr, int len, int type) +static int insert_hw_breakpoint(vaddr addr, int len, int type) { int size; diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c index b191a4a..2bc916a 100644 --- a/target/s390x/kvm/pv.c +++ b/target/s390x/kvm/pv.c @@ -30,7 +30,7 @@ static struct kvm_s390_pv_info_vm info_vm; static struct kvm_s390_pv_info_dump info_dump; static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, - int *pvrc) + struct S390PVResponse *pv_resp) { struct kvm_pv_cmd pv_cmd = { .cmd = cmd, @@ -47,8 +47,10 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, rc); } - if (pvrc) { - *pvrc = pv_cmd.rc; + if (pv_resp) { + pv_resp->cmd = cmd; + pv_resp->rc = pv_cmd.rc; + pv_resp->rrc = pv_cmd.rrc; } return rc; } @@ -57,16 +59,15 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, * This macro lets us pass the command as a string to the function so * we can print it on an error. */ -#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) -#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc) -#define s390_pv_cmd_exit(cmd, data) \ -{ \ - int rc; \ - \ - rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \ - if (rc) { \ - exit(1); \ - } \ +#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) +#define s390_pv_cmd_pv_resp(cmd, data, pv_resp) \ + __s390_pv_cmd(cmd, #cmd, data, pv_resp) + +static void s390_pv_cmd_exit(uint32_t cmd, void *data) +{ + if (s390_pv_cmd(cmd, data)) { + exit(1); + } } int s390_pv_query_info(void) @@ -147,18 +148,20 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) return true; } -int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) +#define UV_RC_SSC_INVAL_HOSTKEY 0x0108 +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, + struct S390PVResponse *pv_resp, Error **errp) { - int ret, pvrc; + int ret; struct kvm_s390_pv_sec_parm args = { .origin = origin, .length = length, }; - ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc); + ret = s390_pv_cmd_pv_resp(KVM_PV_SET_SEC_PARMS, &args, pv_resp); if (ret) { error_setg(errp, "Failed to set secure execution parameters"); - if (pvrc == 0x108) { + if (pv_resp->rc == UV_RC_SSC_INVAL_HOSTKEY) { error_append_hint(errp, "Please check whether the image is " "correctly encrypted for this host\n"); } @@ -170,7 +173,8 @@ int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) /* * Called for each component in the SE type IPL parameter block 0. */ -int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) +int s390_pv_unpack(uint64_t addr, uint64_t size, + uint64_t tweak, struct S390PVResponse *pv_resp) { struct kvm_s390_pv_unp args = { .addr = addr, @@ -178,7 +182,7 @@ int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) .tweak = tweak, }; - return s390_pv_cmd(KVM_PV_UNPACK, &args); + return s390_pv_cmd_pv_resp(KVM_PV_UNPACK, &args, pv_resp); } void s390_pv_prep_reset(void) @@ -186,9 +190,9 @@ void s390_pv_prep_reset(void) s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); } -int s390_pv_verify(void) +int s390_pv_verify(struct S390PVResponse *pv_resp) { - return s390_pv_cmd(KVM_PV_VERIFY, NULL); + return s390_pv_cmd_pv_resp(KVM_PV_VERIFY, NULL, pv_resp); } void s390_pv_unshare(void) @@ -196,13 +200,29 @@ void s390_pv_unshare(void) s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); } -void s390_pv_inject_reset_error(CPUState *cs) +void s390_pv_inject_reset_error(CPUState *cs, + struct S390PVResponse pv_resp) { int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; CPUS390XState *env = &S390_CPU(cs)->env; + union { + struct { + uint16_t pv_cmd; + uint16_t pv_rrc; + uint16_t pv_rc; + uint16_t diag_rc; + }; + uint64_t regs; + } resp = { + .pv_cmd = pv_resp.cmd, + .pv_rrc = pv_resp.rrc, + .pv_rc = pv_resp.rc, + .diag_rc = DIAG_308_RC_INVAL_FOR_PV + }; + /* Report that we are unable to enter protected mode */ - env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; + env->regs[r1 + 1] = resp.regs; } uint64_t kvm_s390_pv_dmp_get_size_cpu(void) @@ -367,7 +387,7 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, { TYPE_USER_CREATABLE }, { NULL }) -static void s390_pv_guest_class_init(ObjectClass *oc, void *data) +static void s390_pv_guest_class_init(ObjectClass *oc, const void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h index 5e9c8bd..94e885e 100644 --- a/target/s390x/kvm/pv.h +++ b/target/s390x/kvm/pv.h @@ -16,6 +16,12 @@ #include "system/kvm.h" #include "hw/s390x/s390-virtio-ccw.h" +struct S390PVResponse { + uint16_t cmd; + uint16_t rrc; + uint16_t rc; +}; + #ifdef CONFIG_KVM #include "cpu.h" @@ -42,12 +48,15 @@ int s390_pv_query_info(void); int s390_pv_vm_enable(void); void s390_pv_vm_disable(void); bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); -int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp); -int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, + struct S390PVResponse *pv_resp, Error **errp); +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak, + struct S390PVResponse *pv_resp); void s390_pv_prep_reset(void); -int s390_pv_verify(void); +int s390_pv_verify(struct S390PVResponse *pv_resp); void s390_pv_unshare(void); -void s390_pv_inject_reset_error(CPUState *cs); +void s390_pv_inject_reset_error(CPUState *cs, + struct S390PVResponse pv_resp); uint64_t kvm_s390_pv_dmp_get_size_cpu(void); uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); @@ -63,12 +72,15 @@ static inline int s390_pv_vm_enable(void) { return 0; } static inline void s390_pv_vm_disable(void) {} static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, + struct S390PVResponse *pv_resp, Error **errp) { return 0; } -static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak, + struct S390PVResponse *pv_resp) { return 0; } static inline void s390_pv_prep_reset(void) {} -static inline int s390_pv_verify(void) { return 0; } +static inline int s390_pv_verify(struct S390PVResponse *pv_resp) { return 0; } static inline void s390_pv_unshare(void) {} -static inline void s390_pv_inject_reset_error(CPUState *cs) {}; +static inline void s390_pv_inject_reset_error(CPUState *cs, + struct S390PVResponse pv_resp) {}; static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c index d8f4838..487c41b 100644 --- a/target/s390x/mmu_helper.c +++ b/target/s390x/mmu_helper.c @@ -17,14 +17,15 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "cpu.h" #include "s390x-internal.h" #include "kvm/kvm_s390x.h" #include "system/kvm.h" #include "system/tcg.h" -#include "exec/exec-all.h" +#include "system/memory.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "hw/hw.h" #include "hw/s390x/storage-keys.h" #include "hw/boards.h" @@ -522,6 +523,7 @@ int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, int len, bool is_write) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int currlen, nr_pages, i; target_ulong *pages; uint64_t tec; @@ -542,11 +544,13 @@ int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, if (ret) { trigger_access_exception(&cpu->env, ret, tec); } else if (hostbuf != NULL) { + AddressSpace *as = CPU(cpu)->as; + /* Copy data by stepping through the area page by page */ for (i = 0; i < nr_pages; i++) { currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE)); - cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK), - hostbuf, currlen, is_write); + address_space_rw(as, pages[i] | (laddr & ~TARGET_PAGE_MASK), + attrs, hostbuf, currlen, is_write); laddr += currlen; hostbuf += currlen; len -= currlen; diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h index a4ba622..9691366 100644 --- a/target/s390x/s390x-internal.h +++ b/target/s390x/s390x-internal.h @@ -238,7 +238,8 @@ uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst, /* cpu.c */ #ifndef CONFIG_USER_ONLY -unsigned int s390_cpu_halt(S390CPU *cpu); +unsigned int s390_count_running_cpus(void); +void s390_cpu_halt(S390CPU *cpu); void s390_cpu_unhalt(S390CPU *cpu); void s390_cpu_system_init(Object *obj); bool s390_cpu_system_realize(DeviceState *dev, Error **errp); @@ -246,16 +247,6 @@ void s390_cpu_finalize(Object *obj); void s390_cpu_system_class_init(CPUClass *cc); void s390_cpu_machine_reset_cb(void *opaque); bool s390_cpu_has_work(CPUState *cs); - -#else -static inline unsigned int s390_cpu_halt(S390CPU *cpu) -{ - return 0; -} - -static inline void s390_cpu_unhalt(S390CPU *cpu) -{ -} #endif /* CONFIG_USER_ONLY */ @@ -332,11 +323,8 @@ void s390x_cpu_timer(void *opaque); void s390_handle_wait(S390CPU *cpu); hwaddr s390_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); hwaddr s390_cpu_get_phys_addr_debug(CPUState *cpu, vaddr addr); -#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area) -int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch); -int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len); LowCore *cpu_map_lowcore(CPUS390XState *env); -void cpu_unmap_lowcore(LowCore *lowcore); +void cpu_unmap_lowcore(CPUS390XState *env, LowCore *lowcore); #endif /* CONFIG_USER_ONLY */ diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c index 6a4d9c5..f5d7bc0 100644 --- a/target/s390x/sigp.c +++ b/target/s390x/sigp.c @@ -13,13 +13,14 @@ #include "s390x-internal.h" #include "hw/boards.h" #include "system/hw_accel.h" +#include "system/memory.h" #include "system/runstate.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "system/tcg.h" #include "trace.h" #include "qapi/qapi-types-machine.h" +#include "target/s390x/kvm/pv.h" QemuMutex qemu_sigp_mutex; @@ -127,6 +128,78 @@ static void sigp_stop(CPUState *cs, run_on_cpu_data arg) si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; } +typedef struct SigpSaveArea { + uint64_t fprs[16]; /* 0x0000 */ + uint64_t grs[16]; /* 0x0080 */ + PSW psw; /* 0x0100 */ + uint8_t pad_0x0110[0x0118 - 0x0110]; /* 0x0110 */ + uint32_t prefix; /* 0x0118 */ + uint32_t fpc; /* 0x011c */ + uint8_t pad_0x0120[0x0124 - 0x0120]; /* 0x0120 */ + uint32_t todpr; /* 0x0124 */ + uint64_t cputm; /* 0x0128 */ + uint64_t ckc; /* 0x0130 */ + uint8_t pad_0x0138[0x0140 - 0x0138]; /* 0x0138 */ + uint32_t ars[16]; /* 0x0140 */ + uint64_t crs[16]; /* 0x0384 */ +} SigpSaveArea; +QEMU_BUILD_BUG_ON(sizeof(SigpSaveArea) != 512); + +#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area) +static int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) +{ + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = CPU(cpu)->as; + SigpSaveArea *sa; + hwaddr len = sizeof(*sa); + int i; + + /* For PVMs storing will occur when this cpu enters SIE again */ + if (s390_is_pv()) { + return 0; + } + + sa = address_space_map(as, addr, &len, true, attrs); + if (!sa) { + return -EFAULT; + } + if (len != sizeof(*sa)) { + address_space_unmap(as, sa, len, true, 0); + return -EFAULT; + } + + if (store_arch) { + static const uint8_t ar_id = 1; + + address_space_stb(as, offsetof(LowCore, ar_access_id), + ar_id, attrs, NULL); + + } + for (i = 0; i < 16; ++i) { + sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i)); + } + for (i = 0; i < 16; ++i) { + sa->grs[i] = cpu_to_be64(cpu->env.regs[i]); + } + sa->psw.addr = cpu_to_be64(cpu->env.psw.addr); + sa->psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(&cpu->env)); + sa->prefix = cpu_to_be32(cpu->env.psa); + sa->fpc = cpu_to_be32(cpu->env.fpc); + sa->todpr = cpu_to_be32(cpu->env.todpr); + sa->cputm = cpu_to_be64(cpu->env.cputm); + sa->ckc = cpu_to_be64(cpu->env.ckc >> 8); + for (i = 0; i < 16; ++i) { + sa->ars[i] = cpu_to_be32(cpu->env.aregs[i]); + } + for (i = 0; i < 16; ++i) { + sa->crs[i] = cpu_to_be64(cpu->env.cregs[i]); + } + + address_space_unmap(as, sa, len, true, len); + + return 0; +} + static void sigp_stop_and_store_status(CPUState *cs, run_on_cpu_data arg) { S390CPU *cpu = S390_CPU(cs); @@ -173,6 +246,49 @@ static void sigp_store_status_at_address(CPUState *cs, run_on_cpu_data arg) si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; } +typedef struct SigpAdtlSaveArea { + uint64_t vregs[32][2]; /* 0x0000 */ + uint8_t pad_0x0200[0x0400 - 0x0200]; /* 0x0200 */ + uint64_t gscb[4]; /* 0x0400 */ + uint8_t pad_0x0420[0x1000 - 0x0420]; /* 0x0420 */ +} SigpAdtlSaveArea; +QEMU_BUILD_BUG_ON(sizeof(SigpAdtlSaveArea) != 4096); + +#define ADTL_GS_MIN_SIZE 2048 /* minimal size of adtl save area for GS */ +static int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len) +{ + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = CPU(cpu)->as; + SigpAdtlSaveArea *sa; + hwaddr save = len; + int i; + + sa = address_space_map(as, addr, &save, true, attrs); + if (!sa) { + return -EFAULT; + } + if (save != len) { + address_space_unmap(as, sa, len, true, 0); + return -EFAULT; + } + + if (s390_has_feat(S390_FEAT_VECTOR)) { + for (i = 0; i < 32; i++) { + sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]); + sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]); + } + } + if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) { + for (i = 0; i < 4; i++) { + sa->gscb[i] = cpu_to_be64(cpu->env.gscb[i]); + } + } + + address_space_unmap(as, sa, len, true, len); + + return 0; +} + #define ADTL_SAVE_LC_MASK 0xfUL static void sigp_store_adtl_status(CPUState *cs, run_on_cpu_data arg) { diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c index b36f8cd..6595ac7 100644 --- a/target/s390x/tcg/cc_helper.c +++ b/target/s390x/tcg/cc_helper.c @@ -22,7 +22,6 @@ #include "cpu.h" #include "s390x-internal.h" #include "tcg_s390x.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c index 93aabd2..4447bb6 100644 --- a/target/s390x/tcg/crypto_helper.c +++ b/target/s390x/tcg/crypto_helper.c @@ -17,8 +17,7 @@ #include "s390x-internal.h" #include "tcg_s390x.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" static uint64_t R(uint64_t x, int c) { diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index f969850..0ae4e26 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -23,12 +23,14 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" +#include "exec/watchpoint.h" #include "s390x-internal.h" #include "tcg_s390x.h" #ifndef CONFIG_USER_ONLY #include "qemu/timer.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "hw/s390x/ioinst.h" #include "hw/s390x/s390_flic.h" #include "hw/boards.h" @@ -283,7 +285,7 @@ static void do_program_interrupt(CPUS390XState *env) addr = be64_to_cpu(lowcore->program_new_psw.addr); lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -302,7 +304,7 @@ static void do_svc_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->svc_new_psw.mask); addr = be64_to_cpu(lowcore->svc_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); @@ -376,7 +378,7 @@ static void do_ext_interrupt(CPUS390XState *env) lowcore->external_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -403,7 +405,7 @@ static void do_io_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->io_new_psw.mask); addr = be64_to_cpu(lowcore->io_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); g_free(io); s390_cpu_set_psw(env, mask, addr); @@ -417,16 +419,18 @@ QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024); static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = env_cpu(env)->as; hwaddr len = sizeof(MchkExtSaveArea); MchkExtSaveArea *sa; int i; - sa = cpu_physical_memory_map(mcesao, &len, true); + sa = address_space_map(as, mcesao, &len, true, attrs); if (!sa) { return -EFAULT; } if (len != sizeof(MchkExtSaveArea)) { - cpu_physical_memory_unmap(sa, len, 1, 0); + address_space_unmap(as, sa, len, true, 0); return -EFAULT; } @@ -435,7 +439,7 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]); } - cpu_physical_memory_unmap(sa, len, 1, len); + address_space_unmap(as, sa, len, true, len); return 0; } @@ -487,7 +491,7 @@ static void do_mchk_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->mcck_new_psw.mask); addr = be64_to_cpu(lowcore->mcck_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -558,7 +562,7 @@ try_deliver: /* we might still have pending interrupts, but not deliverable */ if (!env->pending_int && !qemu_s390_flic_has_any(flic)) { - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } /* WAIT PSW during interrupt injection or STOP interrupt */ diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c index 5041c13..1ba4371 100644 --- a/target/s390x/tcg/fpu_helper.c +++ b/target/s390x/tcg/fpu_helper.c @@ -22,7 +22,6 @@ #include "cpu.h" #include "s390x-internal.h" #include "tcg_s390x.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c index 2af970f..fbda396 100644 --- a/target/s390x/tcg/int_helper.c +++ b/target/s390x/tcg/int_helper.c @@ -22,10 +22,9 @@ #include "cpu.h" #include "s390x-internal.h" #include "tcg_s390x.h" -#include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" /* #define DEBUG_HELPER */ #ifdef DEBUG_HELPER diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 8187b91..f1acb16 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -25,11 +25,14 @@ #include "tcg_s390x.h" #include "exec/helper-proto.h" #include "exec/cpu-common.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" #include "exec/page-protection.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "accel/tcg/cpu-ops.h" +#include "accel/tcg/helper-retaddr.h" #include "qemu/int128.h" #include "qemu/atomic128.h" @@ -123,8 +126,8 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr, /* An access covers at most 4096 bytes and therefore at most two pages. */ typedef struct S390Access { - target_ulong vaddr1; - target_ulong vaddr2; + vaddr vaddr1; + vaddr vaddr2; void *haddr1; void *haddr2; uint16_t size1; @@ -145,7 +148,7 @@ typedef struct S390Access { * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec. * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr. */ -static inline int s390_probe_access(CPUArchState *env, target_ulong addr, +static inline int s390_probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t ra) @@ -255,7 +258,7 @@ static void access_memset(CPUS390XState *env, S390Access *desta, static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, int offset, uintptr_t ra) { - target_ulong vaddr = access->vaddr1; + vaddr vaddr = access->vaddr1; void *haddr = access->haddr1; if (unlikely(offset >= access->size1)) { @@ -275,7 +278,7 @@ static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, static void access_set_byte(CPUS390XState *env, S390Access *access, int offset, uint8_t byte, uintptr_t ra) { - target_ulong vaddr = access->vaddr1; + vaddr vaddr = access->vaddr1; void *haddr = access->haddr1; if (unlikely(offset >= access->size1)) { diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index 31266ae..6d9d601 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -26,9 +26,9 @@ #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "qemu/timer.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "exec/target_page.h" #include "qapi/error.h" #include "tcg_s390x.h" #include "s390-tod.h" @@ -570,7 +570,7 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) lowcore->subchannel_nr = cpu_to_be16(io->nr); lowcore->io_int_parm = cpu_to_be32(io->parm); lowcore->io_int_word = cpu_to_be32(io->word); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); } g_free(io); @@ -700,7 +700,7 @@ void HELPER(stfl)(CPUS390XState *env) lowcore = cpu_map_lowcore(env); prepare_stfl(); memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list)); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); } #endif diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 00073c5..c7e8574 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -31,7 +31,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "s390x-internal.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "qemu/log.h" @@ -1250,11 +1249,7 @@ static DisasJumpType op_addc32(DisasContext *s, DisasOps *o) static DisasJumpType op_addc64(DisasContext *s, DisasOps *o) { compute_carry(s); - - TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero); - tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero); - + tcg_gen_addcio_i64(o->out, cc_src, o->in1, o->in2, cc_src); return DISAS_NEXT; } diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c index 1bbaa82..744f800 100644 --- a/target/s390x/tcg/vec_fpu_helper.c +++ b/target/s390x/tcg/vec_fpu_helper.c @@ -15,7 +15,6 @@ #include "vec.h" #include "tcg_s390x.h" #include "tcg/tcg-gvec-desc.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c index dafc4c3..46ec4a9 100644 --- a/target/s390x/tcg/vec_helper.c +++ b/target/s390x/tcg/vec_helper.c @@ -16,8 +16,7 @@ #include "tcg/tcg.h" #include "tcg/tcg-gvec-desc.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3, uint32_t desc) diff --git a/target/sh4/cpu-param.h b/target/sh4/cpu-param.h index 2b6e11d..f328715 100644 --- a/target/sh4/cpu-param.h +++ b/target/sh4/cpu-param.h @@ -16,4 +16,6 @@ # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif +#define TARGET_INSN_START_EXTRA_WORDS 1 + #endif diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index ce84bdf..21ccb86 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -24,9 +24,9 @@ #include "qemu/qemu-print.h" #include "cpu.h" #include "migration/vmstate.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "fpu/softfloat-helpers.h" +#include "accel/tcg/cpu-ops.h" #include "tcg/tcg.h" static void superh_cpu_set_pc(CPUState *cs, vaddr value) @@ -43,6 +43,29 @@ static vaddr superh_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState superh_get_tb_cpu_state(CPUState *cs) +{ + CPUSH4State *env = cpu_env(cs); + uint32_t flags; + + flags = env->flags + | (env->fpscr & TB_FLAG_FPSCR_MASK) + | (env->sr & TB_FLAG_SR_MASK) + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */ +#ifdef CONFIG_USER_ONLY + flags |= TB_FLAG_UNALIGN * !cs->prctl_unalign_sigbus; +#endif + + return (TCGTBCPUState){ + .pc = env->pc, + .flags = flags, +#ifdef CONFIG_USER_ONLY + /* For a gUSA region, notice the end of the region. */ + .cs_base = flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0, +#endif + }; +} + static void superh_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -85,7 +108,7 @@ static bool superh_io_recompile_replay_branch(CPUState *cs, static bool superh_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ @@ -177,7 +200,7 @@ static void sh7750r_cpu_initfn(Object *obj) env->features = SH_FEATURE_BCR3_AND_BCR4; } -static void sh7750r_class_init(ObjectClass *oc, void *data) +static void sh7750r_class_init(ObjectClass *oc, const void *data) { SuperHCPUClass *scc = SUPERH_CPU_CLASS(oc); @@ -194,7 +217,7 @@ static void sh7751r_cpu_initfn(Object *obj) env->features = SH_FEATURE_BCR3_AND_BCR4; } -static void sh7751r_class_init(ObjectClass *oc, void *data) +static void sh7751r_class_init(ObjectClass *oc, const void *data) { SuperHCPUClass *scc = SUPERH_CPU_CLASS(oc); @@ -211,7 +234,7 @@ static void sh7785_cpu_initfn(Object *obj) env->features = SH_FEATURE_SH4A; } -static void sh7785_class_init(ObjectClass *oc, void *data) +static void sh7785_class_init(ObjectClass *oc, const void *data) { SuperHCPUClass *scc = SUPERH_CPU_CLASS(oc); @@ -259,25 +282,31 @@ static const struct SysemuCPUOps sh4_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps superh_tcg_ops = { + /* MTTCG not yet supported: require strict ordering */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = false, + .initialize = sh4_translate_init, .translate_code = sh4_translate_code, + .get_tb_cpu_state = superh_get_tb_cpu_state, .synchronize_from_tb = superh_cpu_synchronize_from_tb, .restore_state_to_opc = superh_restore_state_to_opc, + .mmu_index = sh4_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = superh_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = superh_cpu_exec_interrupt, .cpu_exec_halt = superh_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = superh_cpu_do_interrupt, .do_unaligned_access = superh_cpu_do_unaligned_access, .io_recompile_replay_branch = superh_io_recompile_replay_branch, #endif /* !CONFIG_USER_ONLY */ }; -static void superh_cpu_class_init(ObjectClass *oc, void *data) +static void superh_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -291,7 +320,6 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data) &scc->parent_phases); cc->class_by_name = superh_cpu_class_by_name; - cc->mmu_index = sh4_cpu_mmu_index; cc->dump_state = superh_cpu_dump_state; cc->set_pc = superh_cpu_set_pc; cc->get_pc = superh_cpu_get_pc; diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index d536d5d..c41ab70 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -21,7 +21,9 @@ #define SH4_CPU_H #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" /* CPU Subtypes */ @@ -125,8 +127,6 @@ typedef struct tlb_t { #define UTLB_SIZE 64 #define ITLB_SIZE 4 -#define TARGET_INSN_START_EXTRA_WORDS 1 - enum sh_features { SH_FEATURE_SH4A = 1, SH_FEATURE_BCR3_AND_BCR4 = 2, @@ -286,8 +286,6 @@ void cpu_load_tlb(CPUSH4State * env); /* MMU modes definitions */ #define MMU_USER_IDX 1 -#include "exec/cpu-all.h" - /* MMU control register */ #define MMUCR 0x1F000010 #define MMUCR_AT (1<<0) @@ -382,19 +380,4 @@ static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)); } -static inline void cpu_get_tb_cpu_state(CPUSH4State *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - /* For a gUSA region, notice the end of the region. */ - *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0; - *flags = env->flags - | (env->fpscr & TB_FLAG_FPSCR_MASK) - | (env->sr & TB_FLAG_SR_MASK) - | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */ -#ifdef CONFIG_USER_ONLY - *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus; -#endif -} - #endif /* SH4_CPU_H */ diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 7567e6c..1744ef0 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -21,8 +21,8 @@ #include "cpu.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "exec/log.h" #if !defined(CONFIG_USER_ONLY) @@ -58,7 +58,7 @@ int cpu_sh4_is_cached(CPUSH4State *env, target_ulong addr) void superh_cpu_do_interrupt(CPUState *cs) { CPUSH4State *env = cpu_env(cs); - int do_irq = cs->interrupt_request & CPU_INTERRUPT_HARD; + int do_irq = cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); int do_exp, irq_vector = cs->exception_index; /* prioritize exceptions over interrupts */ diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 99394b7..557b1bf 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -19,8 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "fpu/softfloat.h" #ifndef CONFIG_USER_ONLY diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bcdd558..70fd13a 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -19,12 +19,12 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/translation-block.h" #include "exec/translator.h" +#include "exec/target_page.h" #include "exec/log.h" #include "qemu/qemu-print.h" @@ -54,7 +54,7 @@ typedef struct DisasContext { #define UNALIGN(C) (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN) #else #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD))) -#define UNALIGN(C) 0 +#define UNALIGN(C) MO_ALIGN #endif /* Target-specific values for ctx->base.is_jmp. */ @@ -694,14 +694,8 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x300e: /* addc Rm,Rn */ - { - TCGv t0, t1; - t0 = tcg_constant_tl(0); - t1 = tcg_temp_new(); - tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0); - tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, - REG(B11_8), t0, t1, cpu_sr_t); - } + tcg_gen_addcio_i32(REG(B11_8), cpu_sr_t, + REG(B11_8), REG(B7_4), cpu_sr_t); return; case 0x300f: /* addv Rm,Rn */ { @@ -1792,7 +1786,6 @@ static void _decode_opc(DisasContext * ctx) gen_helper_raise_fpu_disable(tcg_env); } ctx->base.is_jmp = DISAS_NORETURN; - return; } static void decode_opc(DisasContext * ctx) @@ -1940,16 +1933,16 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) NEXT_INSN; switch (ctx->opcode & 0xf00f) { case 0x300c: /* add Rm,Rn */ - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; goto do_reg_op; case 0x2009: /* and Rm,Rn */ - op_opc = INDEX_op_and_i32; + op_opc = INDEX_op_and; goto do_reg_op; case 0x200a: /* xor Rm,Rn */ - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; goto do_reg_op; case 0x200b: /* or Rm,Rn */ - op_opc = INDEX_op_or_i32; + op_opc = INDEX_op_or; do_reg_op: /* The operation register should be as expected, and the other input cannot depend on the load. */ @@ -1976,7 +1969,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) goto fail; } op_dst = B11_8; - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; op_arg = tcg_constant_i32(-1); break; @@ -1984,7 +1977,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (op_dst != B11_8 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; op_arg = tcg_constant_i32(B7_0s); break; @@ -1995,7 +1988,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; /* placeholder */ + op_opc = INDEX_op_setcond; /* placeholder */ op_src = (ld_dst == B11_8 ? B7_4 : B11_8); op_arg = REG(op_src); @@ -2030,7 +2023,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; + op_opc = INDEX_op_setcond; op_arg = tcg_constant_i32(0); NEXT_INSN; @@ -2087,7 +2080,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) ctx->memidx, ld_mop); break; - case INDEX_op_add_i32: + case INDEX_op_add: if (op_dst != st_src) { goto fail; } @@ -2105,7 +2098,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_and_i32: + case INDEX_op_and: if (op_dst != st_src) { goto fail; } @@ -2119,7 +2112,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_or_i32: + case INDEX_op_or: if (op_dst != st_src) { goto fail; } @@ -2133,7 +2126,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_xor_i32: + case INDEX_op_xor: if (op_dst != st_src) { goto fail; } @@ -2147,7 +2140,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_setcond_i32: + case INDEX_op_setcond: if (st_src == ld_dst) { goto fail; } diff --git a/target/sparc/cpu-param.h b/target/sparc/cpu-param.h index 6952ee2..45eea9d 100644 --- a/target/sparc/cpu-param.h +++ b/target/sparc/cpu-param.h @@ -21,27 +21,6 @@ # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -/* - * From Oracle SPARC Architecture 2015: - * - * Compatibility notes: The PSO memory model described in SPARC V8 and - * SPARC V9 compatibility architecture specifications was never implemented - * in a SPARC V9 implementation and is not included in the Oracle SPARC - * Architecture specification. - * - * The RMO memory model described in the SPARC V9 specification was - * implemented in some non-Sun SPARC V9 implementations, but is not - * directly supported in Oracle SPARC Architecture 2015 implementations. - * - * Therefore always use TSO in QEMU. - * - * D.5 Specification of Partial Store Order (PSO) - * ... [loads] are followed by an implied MEMBAR #LoadLoad | #LoadStore. - * - * D.6 Specification of Total Store Order (TSO) - * ... PSO with the additional requirement that all [stores] are followed - * by an implied MEMBAR #StoreStore. - */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST) +#define TARGET_INSN_START_EXTRA_WORDS 1 #endif diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 5716120..c9773f1 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "qemu/module.h" #include "qemu/qemu-print.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/translation-block.h" #include "hw/qdev-properties.h" #include "qapi/visitor.h" @@ -579,7 +579,7 @@ static void print_features(uint32_t features, const char *prefix) } } -void sparc_cpu_list(void) +static void sparc_cpu_list(void) { unsigned int i; @@ -716,13 +716,11 @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs, cpu->env.npc = tb->cs_base; } -void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) +static TCGTBCPUState sparc_get_tb_cpu_state(CPUState *cs) { - uint32_t flags; - *pc = env->pc; - *cs_base = env->npc; - flags = cpu_mmu_index(env_cpu(env), false); + CPUSPARCState *env = cpu_env(cs); + uint32_t flags = cpu_mmu_index(cs, false); + #ifndef CONFIG_USER_ONLY if (cpu_supervisor_mode(env)) { flags |= TB_FLAG_SUPER; @@ -751,7 +749,12 @@ void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc, } #endif /* !CONFIG_USER_ONLY */ #endif /* TARGET_SPARC64 */ - *pflags = flags; + + return (TCGTBCPUState){ + .pc = env->pc, + .flags = flags, + .cs_base = env->npc, + }; } static void sparc_restore_state_to_opc(CPUState *cs, @@ -780,7 +783,7 @@ static void sparc_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool sparc_cpu_has_work(CPUState *cs) { - return (cs->interrupt_request & CPU_INTERRUPT_HARD) && + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_interrupts_enabled(cpu_env(cs)); } #endif /* !CONFIG_USER_ONLY */ @@ -999,16 +1002,56 @@ static const struct SysemuCPUOps sparc_sysemu_ops = { #ifdef CONFIG_TCG #include "accel/tcg/cpu-ops.h" +#ifndef CONFIG_USER_ONLY +static vaddr sparc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ +#ifdef TARGET_SPARC64 + return cpu_env(cs)->pstate & PS_AM ? (uint32_t)result : result; +#else + return (uint32_t)result; +#endif +} +#endif + static const TCGCPUOps sparc_tcg_ops = { + /* + * From Oracle SPARC Architecture 2015: + * + * Compatibility notes: The PSO memory model described in SPARC V8 and + * SPARC V9 compatibility architecture specifications was never + * implemented in a SPARC V9 implementation and is not included in the + * Oracle SPARC Architecture specification. + * + * The RMO memory model described in the SPARC V9 specification was + * implemented in some non-Sun SPARC V9 implementations, but is not + * directly supported in Oracle SPARC Architecture 2015 implementations. + * + * Therefore always use TSO in QEMU. + * + * D.5 Specification of Partial Store Order (PSO) + * ... [loads] are followed by an implied MEMBAR #LoadLoad | #LoadStore. + * + * D.6 Specification of Total Store Order (TSO) + * ... PSO with the additional requirement that all [stores] are followed + * by an implied MEMBAR #StoreStore. + */ + .guest_default_memory_order = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST, + .mttcg_supported = true, + .initialize = sparc_tcg_init, .translate_code = sparc_translate_code, + .get_tb_cpu_state = sparc_get_tb_cpu_state, .synchronize_from_tb = sparc_cpu_synchronize_from_tb, .restore_state_to_opc = sparc_restore_state_to_opc, + .mmu_index = sparc_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = sparc_cpu_tlb_fill, + .pointer_wrap = sparc_pointer_wrap, .cpu_exec_interrupt = sparc_cpu_exec_interrupt, .cpu_exec_halt = sparc_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = sparc_cpu_do_interrupt, .do_transaction_failed = sparc_cpu_do_transaction_failed, .do_unaligned_access = sparc_cpu_do_unaligned_access, @@ -1016,7 +1059,7 @@ static const TCGCPUOps sparc_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void sparc_cpu_class_init(ObjectClass *oc, void *data) +static void sparc_cpu_class_init(ObjectClass *oc, const void *data) { SPARCCPUClass *scc = SPARC_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -1031,8 +1074,8 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data) &scc->parent_phases); cc->class_by_name = sparc_cpu_class_by_name; + cc->list_cpus = sparc_cpu_list, cc->parse_features = sparc_cpu_parse_features; - cc->mmu_index = sparc_cpu_mmu_index; cc->dump_state = sparc_cpu_dump_state; #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) cc->memory_rw_debug = sparc_cpu_memory_rw_debug; @@ -1047,6 +1090,7 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data) cc->disas_set_info = cpu_sparc_disas_set_info; #if defined(TARGET_SPARC64) && !defined(TARGET_ABI32) + cc->gdb_core_xml_file = "sparc64-core.xml"; cc->gdb_num_core_regs = 86; #else cc->gdb_num_core_regs = 72; @@ -1065,7 +1109,7 @@ static const TypeInfo sparc_cpu_type_info = { .class_init = sparc_cpu_class_init, }; -static void sparc_cpu_cpudef_class_init(ObjectClass *oc, void *data) +static void sparc_cpu_cpudef_class_init(ObjectClass *oc, const void *data) { SPARCCPUClass *scc = SPARC_CPU_CLASS(oc); scc->cpu_def = data; @@ -1078,7 +1122,7 @@ static void sparc_register_cpudef_type(const struct sparc_def_t *def) .name = typename, .parent = TYPE_SPARC_CPU, .class_init = sparc_cpu_cpudef_class_init, - .class_data = (void *)def, + .class_data = def, }; type_register_static(&ti); diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 68f8c21..31cb3d9 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -3,7 +3,9 @@ #include "qemu/bswap.h" #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" #if !defined(TARGET_SPARC64) @@ -221,7 +223,6 @@ typedef struct trap_state { uint32_t tt; } trap_state; #endif -#define TARGET_INSN_START_EXTRA_WORDS 1 typedef struct sparc_def_t { const char *name; @@ -594,7 +595,6 @@ G_NORETURN void cpu_raise_exception_ra(CPUSPARCState *, int, uintptr_t); /* cpu_init.c */ void cpu_sparc_set_id(CPUSPARCState *env, unsigned int cpu); -void sparc_cpu_list(void); /* mmu_helper.c */ bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, @@ -665,8 +665,6 @@ hwaddr cpu_get_phys_page_nofault(CPUSPARCState *env, target_ulong addr, #define CPU_RESOLVING_TYPE TYPE_SPARC_CPU -#define cpu_list sparc_cpu_list - /* MMU modes definitions */ #if defined (TARGET_SPARC64) #define MMU_USER_IDX 0 @@ -727,8 +725,6 @@ static inline int cpu_pil_allowed(CPUSPARCState *env1, int pil) #endif } -#include "exec/cpu-all.h" - #ifdef TARGET_SPARC64 /* sun4u.c */ void cpu_tick_set_count(CPUTimer *timer, uint64_t count); @@ -745,9 +741,6 @@ trap_state* cpu_tsptr(CPUSPARCState* env); #define TB_FLAG_FSR_QNE (1 << 8) #define TB_FLAG_ASI_SHIFT 24 -void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags); - static inline bool tb_fpu_enabled(int tb_flags) { #if defined(CONFIG_USER_ONLY) diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index c25097d..29fd166 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" @@ -446,7 +445,6 @@ static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) case float_relation_greater: return 2; case float_relation_unordered: - env->fsr |= FSR_NVA; return 3; } g_assert_not_reached(); diff --git a/target/sparc/helper.c b/target/sparc/helper.c index 7846ddd..9163b9d 100644 --- a/target/sparc/helper.c +++ b/target/sparc/helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/timer.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 9e39d23..242ec42 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -88,9 +88,10 @@ CALL 01 i:s30 { [ - STBAR 10 00000 101000 01111 0 0000000000000 + STBAR_v9 10 00000 101000 01111 0 0000000000000 MEMBAR 10 00000 101000 01111 1 000000 cmask:3 mmask:4 + RDY_v9 10 rd:5 101000 00000 0 0000000000000 RDCCR 10 rd:5 101000 00010 0 0000000000000 RDASI 10 rd:5 101000 00011 0 0000000000000 RDTICK 10 rd:5 101000 00100 0 0000000000000 @@ -107,8 +108,26 @@ CALL 01 i:s30 RDSTICK_CMPR 10 rd:5 101000 11001 0 0000000000000 RDSTRAND_STATUS 10 rd:5 101000 11010 0 0000000000000 ] - # Before v8, all rs1 accepted; otherwise rs1==0. - RDY 10 rd:5 101000 rs1:5 0 0000000000000 + + # The v8 manual, section B.30 STBAR instruction, says + # bits [12:0] are ignored, but bit 13 must be 0. + # However, section B.28 Read State Register Instruction has a + # comment that RDASR with rs1 = 15, rd = 0 is STBAR. Here, + # bit 13 is also ignored and rd != 0 is merely reserved. + # + # Solaris 8 executes v9 MEMBAR instruction 0x8143e008 during boot. + # This confirms that bit 13 is ignored, as 0x8143c000 is STBAR. + STBAR_v8 10 ----- 101000 01111 - ------------- + + # For v7, bits [18:0] are ignored. + # For v8, bits [18:14], aka rs1, are repurposed and rs1 = 0 is RDY, + # and other values are RDASR. However, the v8 manual explicitly + # says that rs1 in 1..14 yield undefined results and do not cause + # an illegal instruction trap, and rs1 in 16..31 are available for + # implementation specific usage. + # Implement not causing an illegal instruction trap for v8 by + # continuing to interpret unused values per v7, i.e. as RDY. + RDY_v7 10 rd:5 101000 ----- - ------------- } { @@ -139,14 +158,16 @@ CALL 01 i:s30 } { - RDPSR 10 rd:5 101001 00000 0 0000000000000 - RDHPR_hpstate 10 rd:5 101001 00000 0 0000000000000 + [ + RDHPR_hpstate 10 rd:5 101001 00000 0 0000000000000 + RDHPR_htstate 10 rd:5 101001 00001 0 0000000000000 + RDHPR_hintp 10 rd:5 101001 00011 0 0000000000000 + RDHPR_htba 10 rd:5 101001 00101 0 0000000000000 + RDHPR_hver 10 rd:5 101001 00110 0 0000000000000 + RDHPR_hstick_cmpr 10 rd:5 101001 11111 0 0000000000000 + ] + RDPSR 10 rd:5 101001 ----- - ------------- } -RDHPR_htstate 10 rd:5 101001 00001 0 0000000000000 -RDHPR_hintp 10 rd:5 101001 00011 0 0000000000000 -RDHPR_htba 10 rd:5 101001 00101 0 0000000000000 -RDHPR_hver 10 rd:5 101001 00110 0 0000000000000 -RDHPR_hstick_cmpr 10 rd:5 101001 11111 0 0000000000000 { WRPSR 10 00000 110001 ..... . ............. @n_r_ri @@ -159,26 +180,28 @@ RESTORED 10 00001 110001 00000 0 0000000000000 # UA2005 INVALW { - RDWIM 10 rd:5 101010 00000 0 0000000000000 - RDPR_tpc 10 rd:5 101010 00000 0 0000000000000 + [ + RDPR_tpc 10 rd:5 101010 00000 0 0000000000000 + RDPR_tnpc 10 rd:5 101010 00001 0 0000000000000 + RDPR_tstate 10 rd:5 101010 00010 0 0000000000000 + RDPR_tt 10 rd:5 101010 00011 0 0000000000000 + RDPR_tick 10 rd:5 101010 00100 0 0000000000000 + RDPR_tba 10 rd:5 101010 00101 0 0000000000000 + RDPR_pstate 10 rd:5 101010 00110 0 0000000000000 + RDPR_tl 10 rd:5 101010 00111 0 0000000000000 + RDPR_pil 10 rd:5 101010 01000 0 0000000000000 + RDPR_cwp 10 rd:5 101010 01001 0 0000000000000 + RDPR_cansave 10 rd:5 101010 01010 0 0000000000000 + RDPR_canrestore 10 rd:5 101010 01011 0 0000000000000 + RDPR_cleanwin 10 rd:5 101010 01100 0 0000000000000 + RDPR_otherwin 10 rd:5 101010 01101 0 0000000000000 + RDPR_wstate 10 rd:5 101010 01110 0 0000000000000 + RDPR_gl 10 rd:5 101010 10000 0 0000000000000 + RDPR_strand_status 10 rd:5 101010 11010 0 0000000000000 + RDPR_ver 10 rd:5 101010 11111 0 0000000000000 + ] + RDWIM 10 rd:5 101010 ----- - ------------- } -RDPR_tnpc 10 rd:5 101010 00001 0 0000000000000 -RDPR_tstate 10 rd:5 101010 00010 0 0000000000000 -RDPR_tt 10 rd:5 101010 00011 0 0000000000000 -RDPR_tick 10 rd:5 101010 00100 0 0000000000000 -RDPR_tba 10 rd:5 101010 00101 0 0000000000000 -RDPR_pstate 10 rd:5 101010 00110 0 0000000000000 -RDPR_tl 10 rd:5 101010 00111 0 0000000000000 -RDPR_pil 10 rd:5 101010 01000 0 0000000000000 -RDPR_cwp 10 rd:5 101010 01001 0 0000000000000 -RDPR_cansave 10 rd:5 101010 01010 0 0000000000000 -RDPR_canrestore 10 rd:5 101010 01011 0 0000000000000 -RDPR_cleanwin 10 rd:5 101010 01100 0 0000000000000 -RDPR_otherwin 10 rd:5 101010 01101 0 0000000000000 -RDPR_wstate 10 rd:5 101010 01110 0 0000000000000 -RDPR_gl 10 rd:5 101010 10000 0 0000000000000 -RDPR_strand_status 10 rd:5 101010 11010 0 0000000000000 -RDPR_ver 10 rd:5 101010 11111 0 0000000000000 { WRWIM 10 00000 110010 ..... . ............. @n_r_ri @@ -203,7 +226,7 @@ WRPR_strand_status 10 11010 110010 ..... . ............. @n_r_ri { FLUSHW 10 00000 101011 00000 0 0000000000000 - RDTBR 10 rd:5 101011 00000 0 0000000000000 + RDTBR 10 rd:5 101011 ----- - ------------- } { diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index f026606..fdcaa0a 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -21,7 +21,7 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "trace.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/log.h" #include "system/runstate.h" @@ -65,6 +65,7 @@ static const char *excp_name_str(int32_t exception_index) return excp_names[exception_index]; } +#if !defined(CONFIG_USER_ONLY) void cpu_check_irqs(CPUSPARCState *env) { CPUState *cs; @@ -96,6 +97,7 @@ void cpu_check_irqs(CPUSPARCState *env) cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } } +#endif void sparc_cpu_do_interrupt(CPUState *cs) { diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c index bd14c7a..23adda4 100644 --- a/target/sparc/int64_helper.c +++ b/target/sparc/int64_helper.c @@ -62,6 +62,7 @@ static const char * const excp_names[0x80] = { }; #endif +#if !defined(CONFIG_USER_ONLY) void cpu_check_irqs(CPUSPARCState *env) { CPUState *cs; @@ -89,7 +90,7 @@ void cpu_check_irqs(CPUSPARCState *env) * the next bit is (2 << psrpil). */ if (pil < (2 << env->psrpil)) { - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { trace_sparc64_cpu_check_irqs_reset_irq(env->interrupt_index); env->interrupt_index = 0; cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); @@ -120,13 +121,14 @@ void cpu_check_irqs(CPUSPARCState *env) break; } } - } else if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + } else if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { trace_sparc64_cpu_check_irqs_disabled(pil, env->pil_in, env->softint, env->interrupt_index); env->interrupt_index = 0; cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } } +#endif void sparc_cpu_do_interrupt(CPUState *cs) { diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index 45882e2..2c63eb9 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -23,10 +23,11 @@ #include "cpu.h" #include "tcg/tcg.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "exec/cputlb.h" #include "exec/page-protection.h" -#include "exec/cpu_ldst.h" +#include "exec/target_page.h" +#include "accel/tcg/cpu-ldst.h" +#include "system/memory.h" #ifdef CONFIG_USER_ONLY #include "user/page-protection.h" #endif diff --git a/target/sparc/machine.c b/target/sparc/machine.c index 222e570..4dd75af 100644 --- a/target/sparc/machine.c +++ b/target/sparc/machine.c @@ -1,6 +1,5 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/timer.h" #include "migration/cpu.h" diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c index 3821cd9..217580a 100644 --- a/target/sparc/mmu_helper.c +++ b/target/sparc/mmu_helper.c @@ -21,7 +21,11 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/cputlb.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" +#include "system/memory.h" #include "qemu/qemu-print.h" #include "trace.h" diff --git a/target/sparc/translate.c b/target/sparc/translate.c index bfe6364..810e249 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "exec/helper-gen.h" @@ -395,8 +395,7 @@ static void gen_op_addcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) TCGv z = tcg_constant_tl(0); if (cin) { - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z); - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z); + tcg_gen_addcio_tl(cpu_cc_N, cpu_cc_C, src1, src2, cin); } else { tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z); } @@ -2488,7 +2487,7 @@ static int extract_qfpreg(DisasContext *dc, int x) #define TRANS(NAME, AVAIL, FUNC, ...) \ static bool trans_##NAME(DisasContext *dc, arg_##NAME *a) \ - { return avail_##AVAIL(dc) && FUNC(dc, __VA_ARGS__); } + { return avail_##AVAIL(dc) && FUNC(dc, ## __VA_ARGS__); } #define avail_ALL(C) true #ifdef TARGET_SPARC64 @@ -2527,6 +2526,32 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_VIS4(C) false #endif +/* + * We decoded bit 13 as imm, and bits [12:0] as rs2_or_imm. + * For v9, if !imm, then the unused bits [12:5] must be zero. + * For v7 and v8, the unused bits are ignored; clear them here. + */ +static bool check_rs2(DisasContext *dc, int *rs2) +{ + if (unlikely(*rs2 & ~0x1f)) { + if (avail_64(dc)) { + return false; + } + *rs2 &= 0x1f; + } + return true; +} + +static bool check_r_r_ri(DisasContext *dc, arg_r_r_ri *a) +{ + return a->imm || check_rs2(dc, &a->rs2_or_imm); +} + +static bool check_r_r_ri_cc(DisasContext *dc, arg_r_r_ri_cc *a) +{ + return a->imm || check_rs2(dc, &a->rs2_or_imm); +} + /* Default case for non jump instructions. */ static bool advance_pc(DisasContext *dc) { @@ -2824,12 +2849,15 @@ static bool trans_Tcc_i_v9(DisasContext *dc, arg_Tcc_i_v9 *a) return do_tcc(dc, a->cond, a->cc, a->rs1, true, a->i); } -static bool trans_STBAR(DisasContext *dc, arg_STBAR *a) +static bool do_stbar(DisasContext *dc) { tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); return advance_pc(dc); } +TRANS(STBAR_v8, 32, do_stbar) +TRANS(STBAR_v9, 64, do_stbar) + static bool trans_MEMBAR(DisasContext *dc, arg_MEMBAR *a) { if (avail_32(dc)) { @@ -2861,18 +2889,8 @@ static TCGv do_rdy(DisasContext *dc, TCGv dst) return cpu_y; } -static bool trans_RDY(DisasContext *dc, arg_RDY *a) -{ - /* - * TODO: Need a feature bit for sparcv8. In the meantime, treat all - * 32-bit cpus like sparcv7, which ignores the rs1 field. - * This matches after all other ASR, so Leon3 Asr17 is handled first. - */ - if (avail_64(dc) && a->rs1 != 0) { - return false; - } - return do_rd_special(dc, true, a->rd, do_rdy); -} +TRANS(RDY_v7, 32, do_rd_special, true, a->rd, do_rdy) +TRANS(RDY_v9, 64, do_rd_special, true, a->rd, do_rdy) static TCGv do_rd_leon3_config(DisasContext *dc, TCGv dst) { @@ -3257,8 +3275,7 @@ static bool do_wr_special(DisasContext *dc, arg_r_r_ri *a, bool priv, { TCGv src; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && (a->rs2_or_imm & ~0x1f)) { + if (!check_r_r_ri(dc, a)) { return false; } if (!priv) { @@ -3701,8 +3718,7 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, { TCGv dst, src1; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri_cc(dc, a)) { return false; } @@ -3786,11 +3802,11 @@ static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a) { /* OR with %g0 is the canonical alias for MOV. */ if (!a->cc && a->rs1 == 0) { + if (!check_r_r_ri_cc(dc, a)) { + return false; + } if (a->imm || a->rs2_or_imm == 0) { gen_store_gpr(dc, a->rd, tcg_constant_tl(a->rs2_or_imm)); - } else if (a->rs2_or_imm & ~0x1f) { - /* For simplicity, we under-decoded the rs2 form. */ - return false; } else { gen_store_gpr(dc, a->rd, cpu_regs[a->rs2_or_imm]); } @@ -3807,8 +3823,7 @@ static bool trans_UDIV(DisasContext *dc, arg_r_r_ri *a) if (!avail_DIV(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -3859,8 +3874,7 @@ static bool trans_UDIVX(DisasContext *dc, arg_r_r_ri *a) if (!avail_64(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -3897,8 +3911,7 @@ static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a) if (!avail_64(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -4194,8 +4207,7 @@ TRANS(SRA_i, ALL, do_shift_i, a, false, false) static TCGv gen_rs2_or_imm(DisasContext *dc, bool imm, int rs2_or_imm) { - /* For simplicity, we under-decoded the rs2 form. */ - if (!imm && rs2_or_imm & ~0x1f) { + if (!imm && !check_rs2(dc, &rs2_or_imm)) { return NULL; } if (imm || rs2_or_imm == 0) { @@ -4258,8 +4270,7 @@ static bool do_add_special(DisasContext *dc, arg_r_r_ri *a, { TCGv src1, sum; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -4377,8 +4388,7 @@ static TCGv gen_ldst_addr(DisasContext *dc, int rs1, bool imm, int rs2_or_imm) { TCGv addr, tmp = NULL; - /* For simplicity, we under-decoded the rs2 form. */ - if (!imm && rs2_or_imm & ~0x1f) { + if (!imm && !check_rs2(dc, &rs2_or_imm)) { return NULL; } diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c index 0c4b09f..9ad9d01 100644 --- a/target/sparc/win_helper.c +++ b/target/sparc/win_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" #include "trace.h" diff --git a/target/tricore/cpu-param.h b/target/tricore/cpu-param.h index 790242e..eb33a67 100644 --- a/target/tricore/cpu-param.h +++ b/target/tricore/cpu-param.h @@ -12,4 +12,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +#define TARGET_INSN_START_EXTRA_WORDS 0 + #endif diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 16acc4e..4f035b6 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -20,10 +20,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/translation-block.h" #include "qemu/error-report.h" #include "tcg/debug-assert.h" +#include "accel/tcg/cpu-ops.h" static inline void set_feature(CPUTriCoreState *env, int feature) { @@ -45,6 +45,16 @@ static vaddr tricore_cpu_get_pc(CPUState *cs) return cpu_env(cs)->PC; } +static TCGTBCPUState tricore_get_tb_cpu_state(CPUState *cs) +{ + CPUTriCoreState *env = cpu_env(cs); + + return (TCGTBCPUState){ + .pc = env->PC, + .flags = FIELD_DP32(0, TB_FLAGS, PRIV, extract32(env->PSW, 10, 2)), + }; +} + static void tricore_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -169,19 +179,24 @@ static const struct SysemuCPUOps tricore_sysemu_ops = { .get_phys_page_debug = tricore_cpu_get_phys_page_debug, }; -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps tricore_tcg_ops = { + /* MTTCG not yet supported: require strict ordering */ + .guest_default_memory_order = TCG_MO_ALL, + .mttcg_supported = false, .initialize = tricore_tcg_init, .translate_code = tricore_translate_code, + .get_tb_cpu_state = tricore_get_tb_cpu_state, .synchronize_from_tb = tricore_cpu_synchronize_from_tb, .restore_state_to_opc = tricore_restore_state_to_opc, + .mmu_index = tricore_cpu_mmu_index, .tlb_fill = tricore_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = tricore_cpu_exec_interrupt, .cpu_exec_halt = tricore_cpu_has_work, + .cpu_exec_reset = cpu_reset, }; -static void tricore_cpu_class_init(ObjectClass *c, void *data) +static void tricore_cpu_class_init(ObjectClass *c, const void *data) { TriCoreCPUClass *mcc = TRICORE_CPU_CLASS(c); CPUClass *cc = CPU_CLASS(c); @@ -194,7 +209,6 @@ static void tricore_cpu_class_init(ObjectClass *c, void *data) resettable_class_set_parent_phases(rc, NULL, tricore_cpu_reset_hold, NULL, &mcc->parent_phases); cc->class_by_name = tricore_cpu_class_by_name; - cc->mmu_index = tricore_cpu_mmu_index; cc->gdb_read_register = tricore_cpu_gdb_read_register; cc->gdb_write_register = tricore_cpu_gdb_write_register; diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h index cf9dbc6..82085fb 100644 --- a/target/tricore/cpu.h +++ b/target/tricore/cpu.h @@ -22,6 +22,7 @@ #include "cpu-qom.h" #include "hw/registerfields.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" #include "tricore-defs.h" @@ -250,8 +251,6 @@ void fpu_set_state(CPUTriCoreState *env); #define MMU_USER_IDX 2 -#include "exec/cpu-all.h" - FIELD(TB_FLAGS, PRIV, 0, 2) void cpu_state_reset(CPUTriCoreState *s); @@ -259,18 +258,6 @@ void tricore_tcg_init(void); void tricore_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); -static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - uint32_t new_flags = 0; - *pc = env->PC; - *cs_base = 0; - - new_flags |= FIELD_DP32(new_flags, TB_FLAGS, PRIV, - extract32(env->PSW, 10, 2)); - *flags = new_flags; -} - #define CPU_RESOLVING_TYPE TYPE_TRICORE_CPU /* helpers.c */ diff --git a/target/tricore/helper.c b/target/tricore/helper.c index a64412e..e4c53d4 100644 --- a/target/tricore/helper.c +++ b/target/tricore/helper.c @@ -20,7 +20,9 @@ #include "hw/registerfields.h" #include "cpu.h" #include "exec/cputlb.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/page-protection.h" +#include "exec/target_page.h" #include "fpu/softfloat-helpers.h" #include "qemu/qemu-print.h" diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c index a0d5a0d..9910c13 100644 --- a/target/tricore/op_helper.c +++ b/target/tricore/op_helper.c @@ -18,8 +18,7 @@ #include "cpu.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include <zlib.h> /* for crc32 */ diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 6819b77..3d0e7a1 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -20,9 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "qemu/qemu-print.h" #include "exec/helper-proto.h" @@ -31,6 +30,7 @@ #include "tricore-opcodes.h" #include "exec/translator.h" #include "exec/translation-block.h" +#include "exec/target_page.h" #include "exec/log.h" #define HELPER_H "helper.h" @@ -1345,15 +1345,11 @@ static inline void gen_addi_CC(TCGv ret, TCGv r1, int32_t con) static inline void gen_addc_CC(TCGv ret, TCGv r1, TCGv r2) { - TCGv carry = tcg_temp_new_i32(); - TCGv t0 = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); TCGv result = tcg_temp_new_i32(); - tcg_gen_movi_tl(t0, 0); - tcg_gen_setcondi_tl(TCG_COND_NE, carry, cpu_PSW_C, 0); /* Addition, carry and set C/V/SV bits */ - tcg_gen_add2_i32(result, cpu_PSW_C, r1, t0, carry, t0); - tcg_gen_add2_i32(result, cpu_PSW_C, result, cpu_PSW_C, r2, t0); + tcg_gen_addcio_i32(result, cpu_PSW_C, r1, r2, cpu_PSW_C); /* calc V bit */ tcg_gen_xor_tl(cpu_PSW_V, result, r1); tcg_gen_xor_tl(t0, r1, r2); @@ -3980,7 +3976,7 @@ static void decode_bit_andacc(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl); break; case OPC2_32_BIT_AND_NOR_T: - if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl); } else { @@ -4113,7 +4109,7 @@ static void decode_bit_orand(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl); break; case OPC2_32_BIT_OR_NOR_T: - if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl); } else { diff --git a/target/xtensa/core-dc232b/gdb-config.c.inc b/target/xtensa/core-dc232b/gdb-config.c.inc index d871686..8c88cae 100644 --- a/target/xtensa/core-dc232b/gdb-config.c.inc +++ b/target/xtensa/core-dc232b/gdb-config.c.inc @@ -15,9 +15,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ XTREG(0, 0, 32, 4, 4, 0x0020, 0x0006, -2, 9, 0x0100, pc, 0, 0, 0, 0, 0, 0) diff --git a/target/xtensa/core-dc232b/xtensa-modules.c.inc b/target/xtensa/core-dc232b/xtensa-modules.c.inc index 164df3b..bb9ebd2 100644 --- a/target/xtensa/core-dc232b/xtensa-modules.c.inc +++ b/target/xtensa/core-dc232b/xtensa-modules.c.inc @@ -14,9 +14,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" #include "xtensa-isa.h" diff --git a/target/xtensa/core-fsf/xtensa-modules.c.inc b/target/xtensa/core-fsf/xtensa-modules.c.inc index c32683f..531f5e2 100644 --- a/target/xtensa/core-fsf/xtensa-modules.c.inc +++ b/target/xtensa/core-fsf/xtensa-modules.c.inc @@ -14,9 +14,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" #include "xtensa-isa.h" diff --git a/target/xtensa/cpu-param.h b/target/xtensa/cpu-param.h index 5e4848a..7a0c22c 100644 --- a/target/xtensa/cpu-param.h +++ b/target/xtensa/cpu-param.h @@ -16,7 +16,6 @@ #define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -/* Xtensa processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) +#define TARGET_INSN_START_EXTRA_WORDS 0 #endif diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index 7663b62..ea9b6df 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -35,8 +35,9 @@ #include "qemu/module.h" #include "migration/vmstate.h" #include "hw/qdev-clock.h" +#include "accel/tcg/cpu-ops.h" #ifndef CONFIG_USER_ONLY -#include "exec/memory.h" +#include "system/memory.h" #endif @@ -54,6 +55,80 @@ static vaddr xtensa_cpu_get_pc(CPUState *cs) return cpu->env.pc; } +static TCGTBCPUState xtensa_get_tb_cpu_state(CPUState *cs) +{ + CPUXtensaState *env = cpu_env(cs); + uint32_t flags = 0; + target_ulong cs_base = 0; + + flags |= xtensa_get_ring(env); + if (env->sregs[PS] & PS_EXCM) { + flags |= XTENSA_TBFLAG_EXCM; + } else if (xtensa_option_enabled(env->config, XTENSA_OPTION_LOOP)) { + target_ulong lend_dist = + env->sregs[LEND] - (env->pc & -(1u << TARGET_PAGE_BITS)); + + /* + * 0 in the csbase_lend field means that there may not be a loopback + * for any instruction that starts inside this page. Any other value + * means that an instruction that ends at this offset from the page + * start may loop back and will need loopback code to be generated. + * + * lend_dist is 0 when LEND points to the start of the page, but + * no instruction that starts inside this page may end at offset 0, + * so it's still correct. + * + * When an instruction ends at a page boundary it may only start in + * the previous page. lend_dist will be encoded as TARGET_PAGE_SIZE + * for the TB that contains this instruction. + */ + if (lend_dist < (1u << TARGET_PAGE_BITS) + env->config->max_insn_size) { + target_ulong lbeg_off = env->sregs[LEND] - env->sregs[LBEG]; + + cs_base = lend_dist; + if (lbeg_off < 256) { + cs_base |= lbeg_off << XTENSA_CSBASE_LBEG_OFF_SHIFT; + } + } + } + if (xtensa_option_enabled(env->config, XTENSA_OPTION_EXTENDED_L32R) && + (env->sregs[LITBASE] & 1)) { + flags |= XTENSA_TBFLAG_LITBASE; + } + if (xtensa_option_enabled(env->config, XTENSA_OPTION_DEBUG)) { + if (xtensa_get_cintlevel(env) < env->config->debug_level) { + flags |= XTENSA_TBFLAG_DEBUG; + } + if (xtensa_get_cintlevel(env) < env->sregs[ICOUNTLEVEL]) { + flags |= XTENSA_TBFLAG_ICOUNT; + } + } + if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) { + flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT; + } + if (xtensa_option_enabled(env->config, XTENSA_OPTION_WINDOWED_REGISTER) && + (env->sregs[PS] & (PS_WOE | PS_EXCM)) == PS_WOE) { + uint32_t windowstart = xtensa_replicate_windowstart(env) >> + (env->sregs[WINDOW_BASE] + 1); + uint32_t w = ctz32(windowstart | 0x8); + + flags |= (w << XTENSA_TBFLAG_WINDOW_SHIFT) | XTENSA_TBFLAG_CWOE; + flags |= extract32(env->sregs[PS], PS_CALLINC_SHIFT, + PS_CALLINC_LEN) << XTENSA_TBFLAG_CALLINC_SHIFT; + } else { + flags |= 3 << XTENSA_TBFLAG_WINDOW_SHIFT; + } + if (env->yield_needed) { + flags |= XTENSA_TBFLAG_YIELD; + } + + return (TCGTBCPUState){ + .pc = env->pc, + .flags = flags, + .cs_base = cs_base, + }; +} + static void xtensa_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data) @@ -229,18 +304,24 @@ static const struct SysemuCPUOps xtensa_sysemu_ops = { }; #endif -#include "accel/tcg/cpu-ops.h" - static const TCGCPUOps xtensa_tcg_ops = { + /* Xtensa processors have a weak memory model */ + .guest_default_memory_order = 0, + .mttcg_supported = true, + .initialize = xtensa_translate_init, .translate_code = xtensa_translate_code, .debug_excp_handler = xtensa_breakpoint_handler, + .get_tb_cpu_state = xtensa_get_tb_cpu_state, .restore_state_to_opc = xtensa_restore_state_to_opc, + .mmu_index = xtensa_cpu_mmu_index, #ifndef CONFIG_USER_ONLY .tlb_fill = xtensa_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = xtensa_cpu_exec_interrupt, .cpu_exec_halt = xtensa_cpu_has_work, + .cpu_exec_reset = cpu_reset, .do_interrupt = xtensa_cpu_do_interrupt, .do_transaction_failed = xtensa_cpu_do_transaction_failed, .do_unaligned_access = xtensa_cpu_do_unaligned_access, @@ -248,7 +329,7 @@ static const TCGCPUOps xtensa_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void xtensa_cpu_class_init(ObjectClass *oc, void *data) +static void xtensa_cpu_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); @@ -262,7 +343,6 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data) &xcc->parent_phases); cc->class_by_name = xtensa_cpu_class_by_name; - cc->mmu_index = xtensa_cpu_mmu_index; cc->dump_state = xtensa_cpu_dump_state; cc->set_pc = xtensa_cpu_set_pc; cc->get_pc = xtensa_cpu_get_pc; diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index 8d70bfc..74122eb 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -30,7 +30,9 @@ #include "cpu-qom.h" #include "qemu/cpu-float.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "hw/clock.h" #include "xtensa-isa.h" @@ -731,76 +733,6 @@ static inline uint32_t xtensa_replicate_windowstart(CPUXtensaState *env) #define XTENSA_CSBASE_LBEG_OFF_MASK 0x00ff0000 #define XTENSA_CSBASE_LBEG_OFF_SHIFT 16 -#include "exec/cpu-all.h" - -static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - *pc = env->pc; - *cs_base = 0; - *flags = 0; - *flags |= xtensa_get_ring(env); - if (env->sregs[PS] & PS_EXCM) { - *flags |= XTENSA_TBFLAG_EXCM; - } else if (xtensa_option_enabled(env->config, XTENSA_OPTION_LOOP)) { - target_ulong lend_dist = - env->sregs[LEND] - (env->pc & -(1u << TARGET_PAGE_BITS)); - - /* - * 0 in the csbase_lend field means that there may not be a loopback - * for any instruction that starts inside this page. Any other value - * means that an instruction that ends at this offset from the page - * start may loop back and will need loopback code to be generated. - * - * lend_dist is 0 when LEND points to the start of the page, but - * no instruction that starts inside this page may end at offset 0, - * so it's still correct. - * - * When an instruction ends at a page boundary it may only start in - * the previous page. lend_dist will be encoded as TARGET_PAGE_SIZE - * for the TB that contains this instruction. - */ - if (lend_dist < (1u << TARGET_PAGE_BITS) + env->config->max_insn_size) { - target_ulong lbeg_off = env->sregs[LEND] - env->sregs[LBEG]; - - *cs_base = lend_dist; - if (lbeg_off < 256) { - *cs_base |= lbeg_off << XTENSA_CSBASE_LBEG_OFF_SHIFT; - } - } - } - if (xtensa_option_enabled(env->config, XTENSA_OPTION_EXTENDED_L32R) && - (env->sregs[LITBASE] & 1)) { - *flags |= XTENSA_TBFLAG_LITBASE; - } - if (xtensa_option_enabled(env->config, XTENSA_OPTION_DEBUG)) { - if (xtensa_get_cintlevel(env) < env->config->debug_level) { - *flags |= XTENSA_TBFLAG_DEBUG; - } - if (xtensa_get_cintlevel(env) < env->sregs[ICOUNTLEVEL]) { - *flags |= XTENSA_TBFLAG_ICOUNT; - } - } - if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) { - *flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT; - } - if (xtensa_option_enabled(env->config, XTENSA_OPTION_WINDOWED_REGISTER) && - (env->sregs[PS] & (PS_WOE | PS_EXCM)) == PS_WOE) { - uint32_t windowstart = xtensa_replicate_windowstart(env) >> - (env->sregs[WINDOW_BASE] + 1); - uint32_t w = ctz32(windowstart | 0x8); - - *flags |= (w << XTENSA_TBFLAG_WINDOW_SHIFT) | XTENSA_TBFLAG_CWOE; - *flags |= extract32(env->sregs[PS], PS_CALLINC_SHIFT, - PS_CALLINC_LEN) << XTENSA_TBFLAG_CALLINC_SHIFT; - } else { - *flags |= 3 << XTENSA_TBFLAG_WINDOW_SHIFT; - } - if (env->yield_needed) { - *flags |= XTENSA_TBFLAG_YIELD; - } -} - XtensaCPU *xtensa_cpu_create_with_clock(const char *cpu_type, Clock *cpu_refclk); diff --git a/target/xtensa/dbg_helper.c b/target/xtensa/dbg_helper.c index 5546c82..3b91f7c 100644 --- a/target/xtensa/dbg_helper.c +++ b/target/xtensa/dbg_helper.c @@ -30,8 +30,8 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" -#include "exec/address-spaces.h" +#include "exec/watchpoint.h" +#include "system/address-spaces.h" void HELPER(wsr_ibreakenable)(CPUXtensaState *env, uint32_t v) { diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index ca629f0..b611c9b 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -32,7 +32,6 @@ #include "exec/helper-proto.h" #include "qemu/host-utils.h" #include "qemu/atomic.h" -#include "exec/exec-all.h" void HELPER(exception)(CPUXtensaState *env, uint32_t excp) { diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c index 53fc7cf..5358060 100644 --- a/target/xtensa/fpu_helper.c +++ b/target/xtensa/fpu_helper.c @@ -30,7 +30,6 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" #include "fpu/softfloat.h" enum { diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c index 4824b97..2d93b45 100644 --- a/target/xtensa/helper.c +++ b/target/xtensa/helper.c @@ -29,6 +29,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/cputlb.h" +#include "exec/target_page.h" #include "gdbstub/helpers.h" #include "exec/helper-proto.h" #include "qemu/error-report.h" @@ -169,7 +170,7 @@ static void xtensa_finalize_config(XtensaConfig *config) } } -static void xtensa_core_class_init(ObjectClass *oc, void *data) +static void xtensa_core_class_init(ObjectClass *oc, const void *data) { CPUClass *cc = CPU_CLASS(oc); XtensaCPUClass *xcc = XTENSA_CPU_CLASS(oc); @@ -191,7 +192,7 @@ void xtensa_register_core(XtensaConfigList *node) TypeInfo type = { .parent = TYPE_XTENSA_CPU, .class_init = xtensa_core_class_init, - .class_data = (void *)node->config, + .class_data = node->config, }; xtensa_finalize_config(node->config); diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c index 63be741..71330fc 100644 --- a/target/xtensa/mmu_helper.c +++ b/target/xtensa/mmu_helper.c @@ -33,8 +33,11 @@ #include "exec/helper-proto.h" #include "qemu/host-utils.h" #include "exec/cputlb.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" +#include "accel/tcg/probe.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "system/memory.h" #define XTENSA_MPU_SEGMENT_MASK 0x0000001f #define XTENSA_MPU_ACC_RIGHTS_MASK 0x00000f00 diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c index 028d4e0..fc47eba 100644 --- a/target/xtensa/op_helper.c +++ b/target/xtensa/op_helper.c @@ -30,7 +30,7 @@ #include "exec/helper-proto.h" #include "exec/page-protection.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" +#include "system/memory.h" #include "qemu/atomic.h" #include "qemu/timer.h" diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 4f02cef..34ae2f4 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -31,18 +31,18 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "qemu/log.h" #include "qemu/qemu-print.h" -#include "semihosting/semihost.h" #include "exec/translator.h" #include "exec/translation-block.h" - +#include "exec/target_page.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" - #include "exec/log.h" +#ifndef CONFIG_USER_ONLY +#include "semihosting/semihost.h" +#endif #define HELPER_H "helper.h" #include "exec/helper-info.c.inc" @@ -1394,11 +1394,11 @@ static void translate_bbi(DisasContext *dc, const OpcodeArg arg[], const uint32_t par[]) { TCGv_i32 tmp = tcg_temp_new_i32(); -#if TARGET_BIG_ENDIAN - tcg_gen_andi_i32(tmp, arg[0].in, 0x80000000u >> arg[1].imm); -#else - tcg_gen_andi_i32(tmp, arg[0].in, 0x00000001u << arg[1].imm); -#endif + if (TARGET_BIG_ENDIAN) { + tcg_gen_andi_i32(tmp, arg[0].in, 0x80000000u >> arg[1].imm); + } else { + tcg_gen_andi_i32(tmp, arg[0].in, 0x00000001u << arg[1].imm); + } gen_brcondi(dc, par[0], tmp, 0, arg[2].imm); } @@ -2241,17 +2241,15 @@ static uint32_t test_exceptions_simcall(DisasContext *dc, const OpcodeArg arg[], const uint32_t par[]) { - bool is_semi = semihosting_enabled(dc->cring != 0); -#ifdef CONFIG_USER_ONLY - bool ill = true; -#else - /* Between RE.2 and RE.3 simcall opcode's become nop for the hardware. */ - bool ill = dc->config->hw_version <= 250002 && !is_semi; -#endif - if (ill || !is_semi) { - qemu_log_mask(LOG_GUEST_ERROR, "SIMCALL but semihosting is disabled\n"); +#ifndef CONFIG_USER_ONLY + if (semihosting_enabled(dc->cring != 0)) { + return 0; } - return ill ? XTENSA_OP_ILL : 0; +#endif + qemu_log_mask(LOG_GUEST_ERROR, "SIMCALL but semihosting is disabled\n"); + + /* Between RE.2 and RE.3 simcall opcode's become nop for the hardware. */ + return dc->config->hw_version <= 250002 ? XTENSA_OP_ILL : 0; } static void translate_simcall(DisasContext *dc, const OpcodeArg arg[], diff --git a/target/xtensa/win_helper.c b/target/xtensa/win_helper.c index ec9ff44..4b25f8f 100644 --- a/target/xtensa/win_helper.c +++ b/target/xtensa/win_helper.c @@ -30,7 +30,6 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "qemu/host-utils.h" -#include "exec/exec-all.h" static void copy_window_from_phys(CPUXtensaState *env, uint32_t window, uint32_t phys, uint32_t n) diff --git a/target/xtensa/xtensa-semi.c b/target/xtensa/xtensa-semi.c index 2ded8e5..636f421 100644 --- a/target/xtensa/xtensa-semi.c +++ b/target/xtensa/xtensa-semi.c @@ -29,6 +29,7 @@ #include "cpu.h" #include "chardev/char-fe.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" #include "semihosting/semihost.h" #include "semihosting/uaccess.h" #include "qapi/error.h" |