diff options
Diffstat (limited to 'target/arm')
78 files changed, 14886 insertions, 13594 deletions
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index 06cdf4b..1dd7984 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -21,8 +21,9 @@ #include "qemu/osdep.h" #include "cpu.h" #include "elf.h" -#include "sysemu/dump.h" +#include "system/dump.h" #include "cpu-features.h" +#include "internals.h" /* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */ struct aarch64_user_regs { @@ -142,7 +143,6 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, return 0; } -#ifdef TARGET_AARCH64 static off_t sve_zreg_offset(uint32_t vq, int n) { off_t off = sizeof(struct aarch64_user_sve_header); @@ -230,7 +230,6 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, return 0; } -#endif int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, DumpState *s) @@ -272,11 +271,9 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, return ret; } -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sve, cpu)) { ret = aarch64_write_elf64_sve(f, env, cpuid, s); } -#endif return ret; } @@ -450,11 +447,9 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) if (class == ELFCLASS64) { note_size = AARCH64_PRSTATUS_NOTE_SIZE; note_size += AARCH64_PRFPREG_NOTE_SIZE; -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sve, cpu)) { note_size += AARCH64_SVE_NOTE_SIZE(&cpu->env); } -#endif } else { note_size = ARM_PRSTATUS_NOTE_SIZE; if (cpu_isar_feature(aa32_vfp_simd, cpu)) { diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index 2b2055c..20c70c7 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -15,7 +15,7 @@ #include "arm-powerctl.h" #include "qemu/log.h" #include "qemu/main-loop.h" -#include "sysemu/tcg.h" +#include "system/tcg.h" #include "target/arm/multiprocessing.h" #ifndef DEBUG_ARM_POWERCTL diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index 3cc8cc7..cefd235 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -26,10 +26,11 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-commands-misc-target.h" -#include "qapi/qmp/qdict.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-commands-misc-arm.h" +#include "qobject/qdict.h" #include "qom/qom-qobject.h" +#include "cpu.h" static GICCapability *gic_cap_new(int version) { @@ -46,7 +47,7 @@ static inline void gic_cap_kvm_probe(GICCapability *v2, GICCapability *v3) #ifdef CONFIG_KVM int fdarray[3]; - if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, NULL)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, NULL)) { return; } @@ -94,7 +95,7 @@ static const char *cpu_model_advertised_features[] = { "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", - "pauth", "pauth-impdef", "pauth-qarma3", + "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5", NULL }; diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h index cc7c543..c1a7ae3 100644 --- a/target/arm/cpregs.h +++ b/target/arm/cpregs.h @@ -23,6 +23,7 @@ #include "hw/registerfields.h" #include "target/arm/kvm-consts.h" +#include "cpu.h" /* * ARMCPRegInfo type field bits: @@ -126,6 +127,14 @@ enum { * equivalent EL1 register when FEAT_NV2 is enabled. */ ARM_CP_NV2_REDIRECT = 1 << 20, + /* + * Flag: this is a TLBI insn which (when FEAT_XS is present) also has + * an NXS variant at the same encoding except that crn is 1 greater, + * so when registering this cpreg automatically also register one + * for the TLBI NXS variant. (For QEMU the NXS variant behaves + * identically to the normal one, other than FGT trapping handling.) + */ + ARM_CP_ADD_TLBI_NXS = 1 << 21, }; /* @@ -320,20 +329,23 @@ typedef enum CPAccessResult { * Access fails due to a configurable trap or enable which would * result in a categorized exception syndrome giving information about * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6, - * 0xc or 0x18). + * 0xc or 0x18). These traps are always to a specified target EL, + * never to the usual target EL. */ - CP_ACCESS_TRAP = (1 << 2), - CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP | 2, - CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP | 3, + CP_ACCESS_TRAP_BIT = (1 << 2), + CP_ACCESS_TRAP_EL1 = CP_ACCESS_TRAP_BIT | 1, + CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP_BIT | 2, + CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP_BIT | 3, /* - * Access fails and results in an exception syndrome 0x0 ("uncategorized"). + * Access fails with UNDEFINED, i.e. an exception syndrome 0x0 + * ("uncategorized"), which is what an undefined insn produces. * Note that this is not a catch-all case -- the set of cases which may * result in this failure is specifically defined by the architecture. * This trap is always to the usual target EL, never directly to a * specified target EL. */ - CP_ACCESS_TRAP_UNCATEGORIZED = (2 << 2), + CP_ACCESS_UNDEFINED = (2 << 2), } CPAccessResult; /* Indexes into fgt_read[] */ @@ -621,6 +633,7 @@ FIELD(HDFGWTR_EL2, NBRBCTL, 60, 1) FIELD(HDFGWTR_EL2, NBRBDATA, 61, 1) FIELD(HDFGWTR_EL2, NPMSNEVFR_EL1, 62, 1) +FIELD(FGT, NXS, 13, 1) /* Honour HCR_EL2.FGTnXS to suppress FGT */ /* Which fine-grained trap bit register to check, if any */ FIELD(FGT, TYPE, 10, 3) FIELD(FGT, REV, 9, 1) /* Is bit sense reversed? */ @@ -639,6 +652,17 @@ FIELD(FGT, BITPOS, 0, 6) /* Bit position within the uint64_t */ #define DO_REV_BIT(REG, BITNAME) \ FGT_##BITNAME = FGT_##REG | FGT_REV | R_##REG##_EL2_##BITNAME##_SHIFT +/* + * The FGT bits for TLBI maintenance instructions accessible at EL1 always + * affect the "normal" TLBI insns; they affect the corresponding TLBI insns + * with the nXS qualifier only if HCRX_EL2.FGTnXS is 0. We define e.g. + * FGT_TLBIVAE1 to use for the normal insn, and FGT_TLBIVAE1NXS to use + * for the nXS qualified insn. + */ +#define DO_TLBINXS_BIT(REG, BITNAME) \ + FGT_##BITNAME = FGT_##REG | R_##REG##_EL2_##BITNAME##_SHIFT, \ + FGT_##BITNAME##NXS = FGT_##BITNAME | R_FGT_NXS_MASK + typedef enum FGTBit { /* * These bits tell us which register arrays to use: @@ -772,36 +796,36 @@ typedef enum FGTBit { DO_BIT(HFGITR, ATS1E0W), DO_BIT(HFGITR, ATS1E1RP), DO_BIT(HFGITR, ATS1E1WP), - DO_BIT(HFGITR, TLBIVMALLE1OS), - DO_BIT(HFGITR, TLBIVAE1OS), - DO_BIT(HFGITR, TLBIASIDE1OS), - DO_BIT(HFGITR, TLBIVAAE1OS), - DO_BIT(HFGITR, TLBIVALE1OS), - DO_BIT(HFGITR, TLBIVAALE1OS), - DO_BIT(HFGITR, TLBIRVAE1OS), - DO_BIT(HFGITR, TLBIRVAAE1OS), - DO_BIT(HFGITR, TLBIRVALE1OS), - DO_BIT(HFGITR, TLBIRVAALE1OS), - DO_BIT(HFGITR, TLBIVMALLE1IS), - DO_BIT(HFGITR, TLBIVAE1IS), - DO_BIT(HFGITR, TLBIASIDE1IS), - DO_BIT(HFGITR, TLBIVAAE1IS), - DO_BIT(HFGITR, TLBIVALE1IS), - DO_BIT(HFGITR, TLBIVAALE1IS), - DO_BIT(HFGITR, TLBIRVAE1IS), - DO_BIT(HFGITR, TLBIRVAAE1IS), - DO_BIT(HFGITR, TLBIRVALE1IS), - DO_BIT(HFGITR, TLBIRVAALE1IS), - DO_BIT(HFGITR, TLBIRVAE1), - DO_BIT(HFGITR, TLBIRVAAE1), - DO_BIT(HFGITR, TLBIRVALE1), - DO_BIT(HFGITR, TLBIRVAALE1), - DO_BIT(HFGITR, TLBIVMALLE1), - DO_BIT(HFGITR, TLBIVAE1), - DO_BIT(HFGITR, TLBIASIDE1), - DO_BIT(HFGITR, TLBIVAAE1), - DO_BIT(HFGITR, TLBIVALE1), - DO_BIT(HFGITR, TLBIVAALE1), + DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIVAE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIASIDE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIVAAE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIVALE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIVAALE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIRVALE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1OS), + DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIVAE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIASIDE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIVAAE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIVALE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIVAALE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIRVALE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1IS), + DO_TLBINXS_BIT(HFGITR, TLBIRVAE1), + DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1), + DO_TLBINXS_BIT(HFGITR, TLBIRVALE1), + DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1), + DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1), + DO_TLBINXS_BIT(HFGITR, TLBIVAE1), + DO_TLBINXS_BIT(HFGITR, TLBIASIDE1), + DO_TLBINXS_BIT(HFGITR, TLBIVAAE1), + DO_TLBINXS_BIT(HFGITR, TLBIVALE1), + DO_TLBINXS_BIT(HFGITR, TLBIVAALE1), DO_BIT(HFGITR, CFPRCTX), DO_BIT(HFGITR, DVPRCTX), DO_BIT(HFGITR, CPPRCTX), @@ -1134,4 +1158,32 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) return ri->opc1 == 4 || ri->opc1 == 5; } +/* Macros for accessing a specified CP register bank */ +#define A32_BANKED_REG_GET(_env, _regname, _secure) \ + ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns) + +#define A32_BANKED_REG_SET(_env, _regname, _secure, _val) \ + do { \ + if (_secure) { \ + (_env)->cp15._regname##_s = (_val); \ + } else { \ + (_env)->cp15._regname##_ns = (_val); \ + } \ + } while (0) + +/* + * Macros for automatically accessing a specific CP register bank depending on + * the current secure state of the system. These macros are not intended for + * supporting instruction translation reads/writes as these are dependent + * solely on the SCR.NS bit and not the mode. + */ +#define A32_BANKED_CURRENT_REG_GET(_env, _regname) \ + A32_BANKED_REG_GET((_env), _regname, \ + (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3))) + +#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val) \ + A32_BANKED_REG_SET((_env), _regname, \ + (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \ + (_val)) + #endif /* TARGET_ARM_CPREGS_H */ diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index c59ca10..5d8adfb 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -21,6 +21,9 @@ #define TARGET_ARM_FEATURES_H #include "hw/registerfields.h" +#include "qemu/host-utils.h" +#include "cpu.h" +#include "cpu-sysregs.h" /* * Naming convention for isar_feature functions: @@ -43,103 +46,103 @@ */ static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) != 0; } static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) > 1; } static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) { /* (M-profile) low-overhead loops and branch future */ - return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; + return FIELD_EX32_IDREG(id, ID_ISAR0, CMPBRANCH) >= 3; } static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR1, JAZELLE) != 0; } static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) != 0; } static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) > 1; } static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA1) != 0; } static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA2) != 0; } static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, CRC32) != 0; } static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, RDM) != 0; } static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, VCMA) != 0; } static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, JSCVT) != 0; } static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, DP) != 0; } static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, FHM) != 0; } static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SB) != 0; } static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SPECRES) != 0; } static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, BF16) != 0; } static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, I8MM) != 0; } static inline bool isar_feature_aa32_ras(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, RAS) != 0; } static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0; + return FIELD_EX32_IDREG(id, ID_PFR1, MPROGMOD) != 0; } static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) @@ -148,7 +151,7 @@ static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) * Return true if M-profile state handling insns * (VSCCLRM, CLRM, FPCTX access insns) are implemented */ - return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3; + return FIELD_EX32_IDREG(id, ID_PFR1, SECURITY) >= 3; } static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) @@ -281,88 +284,88 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4; + return FIELD_EX32_IDREG(id, ID_MMFR0, VMSA) >= 4; } static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) != 0; } static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) >= 2; } static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 4 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 5 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 6 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, HPDS) != 0; } static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, AC2) != 0; } static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, CCIDX) != 0; } static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, XNX) != 0; } static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 1; } static inline bool isar_feature_aa32_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 2; } static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, DIT) != 0; } static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR2, SSBS) != 0; } static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 5; } static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 8; } static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) @@ -375,102 +378,107 @@ static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) */ static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) != 0; } static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) > 1; } static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA1) != 0; } static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) != 0; } static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) > 1; } static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, CRC32) != 0; } static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) != 0; } static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0; } static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA3) != 0; } static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM3) != 0; } static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM4) != 0; } static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, DP) != 0; } static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, FHM) != 0; } static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) != 0; } static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) >= 2; } static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RNDR) != 0; } static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) == 2; } static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) != 0; } static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, JSCVT) != 0; } static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FCMA) != 0; +} + +static inline bool isar_feature_aa64_xs(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, XS) != 0; } /* @@ -494,9 +502,9 @@ isar_feature_pauth_feature(const ARMISARegisters *id) * Architecturally, only one of {APA,API,APA3} may be active (non-zero) * and the other two must be zero. Thus we may avoid conditionals. */ - return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) | - FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) | - FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3)); + return (FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) | + FIELD_EX64_IDREG(id, ID_AA64ISAR1, API) | + FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3)); } static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) @@ -514,7 +522,7 @@ static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA5 algorithm. * QEMU will always enable or disable both APA and GPA. */ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) != 0; } static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) @@ -523,134 +531,144 @@ static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA3 algorithm. * QEMU will always enable or disable both APA3 and GPA3. */ - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3) != 0; } static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SB) != 0; } static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SPECRES) != 0; } static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FRINTTS) != 0; } static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) != 0; } static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) >= 2; } static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) != 0; +} + +static inline bool isar_feature_aa64_ebf16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) > 1; } static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) != 0; } static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) >= 2; } static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, I8MM) != 0; } static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, WFXT) >= 2; } static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, BC) != 0; } static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, MOPS); +} + +static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES); } static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) != 0xf; } static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically wrt FP16. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) == 1; } static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL0) >= 2; } static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL1) >= 2; } static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL2) >= 2; } static inline bool isar_feature_aa64_ras(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) != 0; } static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) >= 2; } static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SVE) != 0; } static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SEL2) != 0; } static inline bool isar_feature_aa64_rme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) != 0; } static inline bool isar_feature_aa64_dit(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, DIT) != 0; } static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) { - int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2); + int key = FIELD_EX64_IDREG(id, ID_AA64PFR0, CSV2); if (key >= 2) { return true; /* FEAT_CSV2_2 */ } if (key == 1) { - key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC); + key = FIELD_EX64_IDREG(id, ID_AA64PFR1, CSV2_FRAC); return key >= 2; /* FEAT_CSV2_1p2 */ } return false; @@ -658,310 +676,320 @@ static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SSBS) != 0; } static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, BT) != 0; } static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) != 0; } static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 2; } static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 3; } static inline bool isar_feature_aa64_sme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SME) != 0; } static inline bool isar_feature_aa64_nmi(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, NMI) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, NMI) != 0; } static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 1; } static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id)); } static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 2; } static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id)); } static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 0; } static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 1; } static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN64) >= 0; } static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id)); } static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id)); } static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN64_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id)); } static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, FGT) != 0; } static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 0; } static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 1; } static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, VH) != 0; } static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, LO) != 0; } static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) != 0; } static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 2; } static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 3; } static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HCX) != 0; +} + +static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, AFP) != 0; } static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, TIDCP1) != 0; +} + +static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, CMOW) != 0; } static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) != 0; } static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) >= 2; } static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, XNX) != 0; } static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, UAO) != 0; } static inline bool isar_feature_aa64_st(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, ST) != 0; } static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, AT) != 0; } static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, FWB) != 0; } static inline bool isar_feature_aa64_ids(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, IDS) != 0; } static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 1; } static inline bool isar_feature_aa64_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 2; } static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, CCIDX) != 0; } static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, VARANGE) != 0; } static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, E0PD) != 0; } static inline bool isar_feature_aa64_nv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) != 0; } static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) >= 2; } static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 4 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 5 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 6 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, DEBUGVER) >= 8; } static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64DFR0, DOUBLELOCK) >= 0; } static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) != 0; } static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) != 0; } static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) >= 2; } static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BITPERM) != 0; } static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BFLOAT16) != 0; } static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SHA3) != 0; } static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SM4) != 0; } static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, I8MM) != 0; } static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F32MM) != 0; } static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F64MM) != 0; } static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F64F64); } static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf; + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, I16I64) == 0xf; } static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, FA64); } /* @@ -1022,6 +1050,55 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id) return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id); } +typedef enum { + CCSIDR_FORMAT_LEGACY, + CCSIDR_FORMAT_CCIDX, +} CCSIDRFormat; + +static inline uint64_t make_ccsidr(CCSIDRFormat format, unsigned assoc, + unsigned linesize, unsigned cachesize, + uint8_t flags) +{ + unsigned lg_linesize = ctz32(linesize); + unsigned sets; + uint64_t ccsidr = 0; + + assert(assoc != 0); + assert(is_power_of_2(linesize)); + assert(lg_linesize >= 4 && lg_linesize <= 7 + 4); + + /* sets * associativity * linesize == cachesize. */ + sets = cachesize / (assoc * linesize); + assert(cachesize % (assoc * linesize) == 0); + + if (format == CCSIDR_FORMAT_LEGACY) { + /* + * The 32-bit CCSIDR format is: + * [27:13] number of sets - 1 + * [12:3] associativity - 1 + * [2:0] log2(linesize) - 4 + * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc + */ + ccsidr = deposit32(ccsidr, 28, 4, flags); + ccsidr = deposit32(ccsidr, 13, 15, sets - 1); + ccsidr = deposit32(ccsidr, 3, 10, assoc - 1); + ccsidr = deposit32(ccsidr, 0, 3, lg_linesize - 4); + } else { + /* + * The 64-bit CCSIDR_EL1 format is: + * [55:32] number of sets - 1 + * [23:3] associativity - 1 + * [2:0] log2(linesize) - 4 + * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc + */ + ccsidr = deposit64(ccsidr, 32, 24, sets - 1); + ccsidr = deposit64(ccsidr, 3, 21, assoc - 1); + ccsidr = deposit64(ccsidr, 0, 3, lg_linesize - 4); + } + + return ccsidr; +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index 2d5f3aa..8b46c7c 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -2,28 +2,24 @@ * ARM cpu parameters for qemu. * * Copyright (c) 2003 Fabrice Bellard - * SPDX-License-Identifier: LGPL-2.0+ + * SPDX-License-Identifier: LGPL-2.0-or-later */ #ifndef ARM_CPU_PARAM_H #define ARM_CPU_PARAM_H #ifdef TARGET_AARCH64 -# define TARGET_LONG_BITS 64 # define TARGET_PHYS_ADDR_SPACE_BITS 52 # define TARGET_VIRT_ADDR_SPACE_BITS 52 #else -# define TARGET_LONG_BITS 32 # define TARGET_PHYS_ADDR_SPACE_BITS 40 # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif #ifdef CONFIG_USER_ONLY -# ifdef TARGET_AARCH64 -# define TARGET_TAGGED_ADDRESSES +# if defined(TARGET_AARCH64) && defined(CONFIG_LINUX) /* Allow user-only to vary page size from 4k */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 12 # else # define TARGET_PAGE_BITS 12 # endif @@ -33,10 +29,14 @@ * have to support 1K tiny pages. */ # define TARGET_PAGE_BITS_VARY -# define TARGET_PAGE_BITS_MIN 10 +# define TARGET_PAGE_BITS_LEGACY 10 #endif /* !CONFIG_USER_ONLY */ -/* ARM processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) +/* + * ARM-specific extra insn start words: + * 1: Conditional execution bits + * 2: Partial exception syndrome for data aborts + */ +#define TARGET_INSN_START_EXTRA_WORDS 2 #endif diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index b497667..2fcb0e1 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -28,11 +28,6 @@ OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -#define TYPE_AARCH64_CPU "aarch64-cpu" -typedef struct AArch64CPUClass AArch64CPUClass; -DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, - TYPE_AARCH64_CPU) - #define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) diff --git a/target/arm/cpu-sysregs.h b/target/arm/cpu-sysregs.h new file mode 100644 index 0000000..7877a3b --- /dev/null +++ b/target/arm/cpu-sysregs.h @@ -0,0 +1,42 @@ +/* + * Definitions for Arm ID system registers + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef ARM_CPU_SYSREGS_H +#define ARM_CPU_SYSREGS_H + +/* + * Following is similar to the coprocessor regs encodings, but with an argument + * ordering that matches the ARM ARM. We also reuse the various CP_REG_ defines + * that actually are the same as the equivalent KVM_REG_ values. + */ +#define ENCODE_ID_REG(op0, op1, crn, crm, op2) \ + (((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) NAME##_IDX, + +typedef enum ARMIDRegisterIdx { +#include "cpu-sysregs.h.inc" + NUM_ID_IDX, +} ARMIDRegisterIdx; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + SYS_##NAME = ENCODE_ID_REG(OP0, OP1, CRN, CRM, OP2), + +typedef enum ARMSysRegs { +#include "cpu-sysregs.h.inc" +} ARMSysRegs; + +#undef DEF + +extern const uint32_t id_register_sysreg[NUM_ID_IDX]; + +int get_sysreg_idx(ARMSysRegs sysreg); + +#endif /* ARM_CPU_SYSREGS_H */ diff --git a/target/arm/cpu-sysregs.h.inc b/target/arm/cpu-sysregs.h.inc new file mode 100644 index 0000000..cb99286 --- /dev/null +++ b/target/arm/cpu-sysregs.h.inc @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +DEF(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0) +DEF(ID_AA64PFR1_EL1, 3, 0, 0, 4, 1) +DEF(ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5) +DEF(ID_AA64DFR0_EL1, 3, 0, 0, 5, 0) +DEF(ID_AA64DFR1_EL1, 3, 0, 0, 5, 1) +DEF(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0) +DEF(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1) +DEF(ID_AA64ISAR2_EL1, 3, 0, 0, 6, 2) +DEF(ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0) +DEF(ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1) +DEF(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2) +DEF(ID_AA64MMFR3_EL1, 3, 0, 0, 7, 3) +DEF(ID_PFR0_EL1, 3, 0, 0, 1, 0) +DEF(ID_PFR1_EL1, 3, 0, 0, 1, 1) +DEF(ID_DFR0_EL1, 3, 0, 0, 1, 2) +DEF(ID_MMFR0_EL1, 3, 0, 0, 1, 4) +DEF(ID_MMFR1_EL1, 3, 0, 0, 1, 5) +DEF(ID_MMFR2_EL1, 3, 0, 0, 1, 6) +DEF(ID_MMFR3_EL1, 3, 0, 0, 1, 7) +DEF(ID_ISAR0_EL1, 3, 0, 0, 2, 0) +DEF(ID_ISAR1_EL1, 3, 0, 0, 2, 1) +DEF(ID_ISAR2_EL1, 3, 0, 0, 2, 2) +DEF(ID_ISAR3_EL1, 3, 0, 0, 2, 3) +DEF(ID_ISAR4_EL1, 3, 0, 0, 2, 4) +DEF(ID_ISAR5_EL1, 3, 0, 0, 2, 5) +DEF(ID_MMFR4_EL1, 3, 0, 0, 2, 6) +DEF(ID_ISAR6_EL1, 3, 0, 0, 2, 7) +DEF(MVFR0_EL1, 3, 0, 0, 3, 0) +DEF(MVFR1_EL1, 3, 0, 0, 3, 1) +DEF(MVFR2_EL1, 3, 0, 0, 3, 2) +DEF(ID_PFR2_EL1, 3, 0, 0, 3, 4) +DEF(ID_DFR1_EL1, 3, 0, 0, 3, 5) +DEF(ID_MMFR5_EL1, 3, 0, 0, 3, 6) +DEF(ID_AA64ZFR0_EL1, 3, 0, 0, 4, 4) +DEF(CTR_EL0, 3, 3, 0, 0, 1) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 35fa281..a59a5b5 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -23,16 +23,18 @@ #include "qemu/timer.h" #include "qemu/log.h" #include "exec/page-vary.h" +#include "exec/tswap.h" #include "target/arm/idau.h" #include "qemu/module.h" #include "qapi/error.h" #include "cpu.h" #ifdef CONFIG_TCG -#include "hw/core/tcg-cpu-ops.h" +#include "exec/translation-block.h" +#include "accel/tcg/cpu-ops.h" #endif /* CONFIG_TCG */ #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "hw/qdev-properties.h" #if !defined(CONFIG_USER_ONLY) #include "hw/loader.h" @@ -41,9 +43,9 @@ #include "hw/intc/armv7m_nvic.h" #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ -#include "sysemu/tcg.h" -#include "sysemu/qtest.h" -#include "sysemu/hw_accel.h" +#include "system/tcg.h" +#include "system/qtest.h" +#include "system/hw_accel.h" #include "kvm_arm.h" #include "disas/capstone.h" #include "fpu/softfloat.h" @@ -120,8 +122,15 @@ void arm_restore_state_to_opc(CPUState *cs, env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; } } + +int arm_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return arm_env_mmu_index(cpu_env(cs)); +} + #endif /* CONFIG_TCG */ +#ifndef CONFIG_USER_ONLY /* * With SCTLR_ELx.NMI == 0, IRQ with Superpriority is masked identically with * IRQ without Superpriority. Moreover, if the GIC is configured so that @@ -140,11 +149,7 @@ static bool arm_cpu_has_work(CPUState *cs) | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR | CPU_INTERRUPT_EXITTB); } - -static int arm_cpu_mmu_index(CPUState *cs, bool ifetch) -{ - return arm_env_mmu_index(cpu_env(cs)); -} +#endif /* !CONFIG_USER_ONLY */ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void *opaque) @@ -545,18 +550,21 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) env->sau.ctrl = 0; } - set_flush_to_zero(1, &env->vfp.standard_fp_status); - set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); - set_default_nan_mode(1, &env->vfp.standard_fp_status); - set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.fp_status); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.standard_fp_status); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.fp_status_f16); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.standard_fp_status_f16); + set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]); + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); + set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); + set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]); + #ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_arm_reset_vcpu(cpu); @@ -826,7 +834,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { - CPUClass *cc = CPU_GET_CLASS(cs); CPUARMState *env = cpu_env(cs); uint32_t cur_el = arm_current_el(env); bool secure = arm_is_secure(env); @@ -926,7 +933,7 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) found: cs->exception_index = excp_idx; env->exception.target_el = target_el; - cc->tcg_ops->do_interrupt(cs); + cs->cc->tcg_ops->do_interrupt(cs); return true; } @@ -1092,37 +1099,6 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level) } } -static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) -{ -#ifdef CONFIG_KVM - ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - uint32_t linestate_bit; - int irq_id; - - switch (irq) { - case ARM_CPU_IRQ: - irq_id = KVM_ARM_IRQ_CPU_IRQ; - linestate_bit = CPU_INTERRUPT_HARD; - break; - case ARM_CPU_FIQ: - irq_id = KVM_ARM_IRQ_CPU_FIQ; - linestate_bit = CPU_INTERRUPT_FIQ; - break; - default: - g_assert_not_reached(); - } - - if (level) { - env->irq_line_state |= linestate_bit; - } else { - env->irq_line_state &= ~linestate_bit; - } - kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level); -#endif -} - static bool arm_cpu_virtio_is_big_endian(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); @@ -1133,7 +1109,7 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs) } #ifdef CONFIG_TCG -static bool arm_cpu_exec_halt(CPUState *cs) +bool arm_cpu_exec_halt(CPUState *cs) { bool leave_halt = cpu_has_work(cs); @@ -1167,7 +1143,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) { ARMCPU *ac = ARM_CPU(cpu); CPUARMState *env = &ac->env; - bool sctlr_b; + bool sctlr_b = arm_sctlr_b(env); if (is_a64(env)) { info->cap_arch = CS_ARCH_ARM64; @@ -1194,13 +1170,9 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) info->cap_mode = cap_mode; } - sctlr_b = arm_sctlr_b(env); + info->endian = BFD_ENDIAN_LITTLE; if (bswap_code(sctlr_b)) { -#if TARGET_BIG_ENDIAN - info->endian = BFD_ENDIAN_LITTLE; -#else - info->endian = BFD_ENDIAN_BIG; -#endif + info->endian = target_big_endian() ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG; } info->flags &= ~INSN_ARM_BE32; #ifndef CONFIG_USER_ONLY @@ -1210,8 +1182,6 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) #endif } -#ifdef TARGET_AARCH64 - static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) { ARMCPU *cpu = ARM_CPU(cs); @@ -1369,15 +1339,6 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -#else - -static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) -{ - g_assert_not_reached(); -} - -#endif - static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) { ARMCPU *cpu = ARM_CPU(cs); @@ -1539,39 +1500,40 @@ static void arm_cpu_initfn(Object *obj) * 0 means "unset, use the default value". That default might vary depending * on the CPU type, and is set in the realize fn. */ -static Property arm_cpu_gt_cntfrq_property = +#ifndef CONFIG_USER_ONLY +static const Property arm_cpu_gt_cntfrq_property = DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0); -static Property arm_cpu_reset_cbar_property = +static const Property arm_cpu_reset_cbar_property = DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); -static Property arm_cpu_reset_hivecs_property = +static const Property arm_cpu_reset_hivecs_property = DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false); -#ifndef CONFIG_USER_ONLY -static Property arm_cpu_has_el2_property = +static const Property arm_cpu_has_el2_property = DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true); -static Property arm_cpu_has_el3_property = +static const Property arm_cpu_has_el3_property = DEFINE_PROP_BOOL("has_el3", ARMCPU, has_el3, true); #endif -static Property arm_cpu_cfgend_property = +static const Property arm_cpu_cfgend_property = DEFINE_PROP_BOOL("cfgend", ARMCPU, cfgend, false); -static Property arm_cpu_has_vfp_property = +static const Property arm_cpu_has_vfp_property = DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true); -static Property arm_cpu_has_vfp_d32_property = +static const Property arm_cpu_has_vfp_d32_property = DEFINE_PROP_BOOL("vfp-d32", ARMCPU, has_vfp_d32, true); -static Property arm_cpu_has_neon_property = +static const Property arm_cpu_has_neon_property = DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true); -static Property arm_cpu_has_dsp_property = +static const Property arm_cpu_has_dsp_property = DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true); -static Property arm_cpu_has_mpu_property = +#ifndef CONFIG_USER_ONLY +static const Property arm_cpu_has_mpu_property = DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true); /* This is like DEFINE_PROP_UINT32 but it doesn't set the default value, @@ -1579,10 +1541,11 @@ static Property arm_cpu_has_mpu_property = * the right value for that particular CPU type, and we don't want * to override that with an incorrect constant value. */ -static Property arm_cpu_pmsav7_dregion_property = +static const Property arm_cpu_pmsav7_dregion_property = DEFINE_PROP_UNSIGNED_NODEFAULT("pmsav7-dregion", ARMCPU, pmsav7_dregion, qdev_prop_uint32, uint32_t); +#endif static bool arm_get_pmu(Object *obj, Error **errp) { @@ -1607,6 +1570,35 @@ static void arm_set_pmu(Object *obj, bool value, Error **errp) cpu->has_pmu = value; } +static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); +} + +static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + /* + * At this time, this property is only allowed if KVM is enabled. This + * restriction allows us to avoid fixing up functionality that assumes a + * uniform execution state like do_interrupt. + */ + if (value == false) { + if (!kvm_enabled() || !kvm_arm_aarch32_supported()) { + error_setg(errp, "'aarch64' feature cannot be disabled " + "unless KVM is enabled and 32-bit EL1 " + "is supported"); + return; + } + unset_feature(&cpu->env, ARM_FEATURE_AARCH64); + } else { + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + } +} + unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) { /* @@ -1723,7 +1715,7 @@ static void arm_cpu_propagate_feature_implications(ARMCPU *cpu) } } -void arm_cpu_post_init(Object *obj) +static void arm_cpu_post_init(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -1734,6 +1726,14 @@ void arm_cpu_post_init(Object *obj) */ arm_cpu_propagate_feature_implications(cpu); + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + object_property_add_bool(obj, "aarch64", aarch64_cpu_get_aarch64, + aarch64_cpu_set_aarch64); + object_property_set_description(obj, "aarch64", + "Set on/off to enable/disable aarch64 " + "execution state "); + } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property); @@ -1749,7 +1749,6 @@ void arm_cpu_post_init(Object *obj) OBJ_PROP_FLAG_READWRITE); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { /* Add the has_el3 state CPU property only if EL3 is allowed. This will * prevent "has_el3" from existing on CPUs which cannot support EL3. @@ -1821,6 +1820,7 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property); } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property); if (arm_feature(&cpu->env, ARM_FEATURE_V7)) { @@ -1857,8 +1857,6 @@ void arm_cpu_post_init(Object *obj) &cpu->psci_conduit, OBJ_PROP_FLAG_READWRITE); - qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); - if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property); } @@ -1867,7 +1865,6 @@ void arm_cpu_post_init(Object *obj) kvm_arm_add_vcpu_properties(cpu); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && cpu_isar_feature(aa64_mte, cpu)) { object_property_add_link(obj, "tag-memory", @@ -1885,6 +1882,7 @@ void arm_cpu_post_init(Object *obj) } } #endif + qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); } static void arm_cpu_finalizefn(Object *obj) @@ -1916,7 +1914,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) { Error *local_err = NULL; -#ifdef TARGET_AARCH64 if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { arm_cpu_sve_finalize(cpu, &local_err); if (local_err != NULL) { @@ -1952,7 +1949,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) return; } } -#endif if (kvm_enabled()) { kvm_arm_steal_time_finalize(cpu, &local_err); @@ -1967,6 +1963,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); ARMCPU *cpu = ARM_CPU(dev); + ARMISARegisters *isar = &cpu->isar; ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; Error *local_err = NULL; @@ -2069,6 +2066,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) arm_gt_stimer_cb, cpu); cpu->gt_timer[GTIMER_HYPVIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale, arm_gt_hvtimer_cb, cpu); + cpu->gt_timer[GTIMER_S_EL2_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_sel2timer_cb, cpu); + cpu->gt_timer[GTIMER_S_EL2_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_sel2vtimer_cb, cpu); } #endif @@ -2120,21 +2121,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, JSCVT, 0); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, FP, 0xf); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, FPSP, 0); @@ -2168,7 +2164,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_NEON); - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0); @@ -2176,32 +2172,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0); t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0); t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0); - cpu->isar.id_aa64isar1 = t; + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, ADVSIMD, 0xf); - u = cpu->isar.id_isar5; + u = GET_IDREG(isar, ID_ISAR5); u = FIELD_DP32(u, ID_ISAR5, AES, 0); u = FIELD_DP32(u, ID_ISAR5, SHA1, 0); u = FIELD_DP32(u, ID_ISAR5, SHA2, 0); u = FIELD_DP32(u, ID_ISAR5, RDM, 0); u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); - cpu->isar.id_isar5 = u; + SET_IDREG(isar, ID_ISAR5, u); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, DP, 0); u = FIELD_DP32(u, ID_ISAR6, FHM, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); u = FIELD_DP32(u, ID_ISAR6, I8MM, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); if (!arm_feature(env, ARM_FEATURE_M)) { u = cpu->isar.mvfr1; @@ -2218,16 +2212,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_neon && !cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar0; - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); - cpu->isar.id_aa64isar0 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR0, FHM, 0); - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, FRINTTS, 0); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, SIMDREG, 0); @@ -2244,19 +2233,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_THUMB_DSP); - u = cpu->isar.id_isar1; - u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); - cpu->isar.id_isar1 = u; + FIELD_DP32_IDREG(isar, ID_ISAR1, EXTEND, 1); - u = cpu->isar.id_isar2; + u = GET_IDREG(isar, ID_ISAR2); u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); - cpu->isar.id_isar2 = u; + SET_IDREG(isar, ID_ISAR2, u); - u = cpu->isar.id_isar3; + u = GET_IDREG(isar, ID_ISAR3); u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); - cpu->isar.id_isar3 = u; + SET_IDREG(isar, ID_ISAR3, u); } @@ -2331,14 +2318,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the security extension feature bits in the processor * feature registers as well. */ - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0); - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL3, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, SECURITY, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPSDBG, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL3, 0); /* Disable the realm management extension, which requires EL3. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, RME, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, RME, 0); } if (!cpu->has_el2) { @@ -2361,9 +2346,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu); #endif } else { - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMUVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, PERFMON, 0); cpu->pmceid0 = 0; cpu->pmceid1 = 0; } @@ -2373,10 +2357,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the hypervisor feature bits in the processor feature * registers if we don't have EL2. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL2, 0); - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, - ID_PFR1, VIRTUALIZATION, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL2, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, VIRTUALIZATION, 0); } if (cpu_isar_feature(aa64_mte, cpu)) { @@ -2390,13 +2372,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY /* - * If we do not have tag-memory provided by the machine, - * reduce MTE support to instructions enabled at EL0. + * If we run with TCG and do not have tag-memory provided by + * the machine, then reduce MTE support to instructions enabled at EL0. * This matches Cortex-A710 BROADCASTMTE input being LOW. */ - if (cpu->tag_memory == NULL) { - cpu->isar.id_aa64pfr1 = - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + if (tcg_enabled() && cpu->tag_memory == NULL) { + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 1); + } + + /* + * If MTE is supported by the host, however it should not be + * enabled on the guest (i.e mte=off), clear guest's MTE bits." + */ + if (kvm_enabled() && !cpu->kvm_mte) { + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 0); } #endif } @@ -2416,32 +2405,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * try to access the non-existent system registers for them. */ /* FEAT_SPE (Statistical Profiling Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMSVER, 0); /* FEAT_TRBE (Trace Buffer Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEBUFFER, 0); /* FEAT_TRF (Self-hosted Trace Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEFILT, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, TRACEFILT, 0); /* Trace Macrocell system register access */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPTRC, 0); /* Memory mapped trace */ - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, MMAPTRC, 0); /* FEAT_AMU (Activity Monitors Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); - cpu->isar.id_pfr0 = - FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, AMU, 0); + FIELD_DP32_IDREG(isar, ID_PFR0, AMU, 0); /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, MPAM, 0); } /* MPU can be configured out of a PMSA CPU either by setting has-mpu @@ -2518,6 +2497,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) register_cp_regs_for_features(cpu); arm_cpu_register_gdb_regs_for_features(cpu); + arm_cpu_register_gdb_commands(cpu); init_cpreg_list(cpu); @@ -2616,7 +2596,7 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) return oc; } -static Property arm_cpu_properties[] = { +static const Property arm_cpu_properties[] = { DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), @@ -2624,7 +2604,8 @@ static Property arm_cpu_properties[] = { DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), /* True to default to the backward-compat old CNTFRQ rather than 1Ghz */ DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false), - DEFINE_PROP_END_OF_LIST() + DEFINE_PROP_BOOL("backcompat-pauth-default-use-qarma5", ARMCPU, + backcompat_pauth_default_use_qarma5, false), }; static const gchar *arm_gdb_arch_name(CPUState *cs) @@ -2632,16 +2613,58 @@ static const gchar *arm_gdb_arch_name(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + if (arm_gdbstub_is_aarch64(cpu)) { + return "aarch64"; + } if (arm_feature(env, ARM_FEATURE_IWMMXT)) { return "iwmmxt"; } return "arm"; } -#ifndef CONFIG_USER_ONLY +static const char *arm_gdb_get_core_xml_file(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + if (arm_gdbstub_is_aarch64(cpu)) { + return "aarch64-core.xml"; + } + if (arm_feature(env, ARM_FEATURE_M)) { + return "arm-m-profile.xml"; + } + return "arm-core.xml"; +} + +#ifdef CONFIG_USER_ONLY +/** + * aarch64_untagged_addr: + * + * Remove any address tag from @x. This is explicitly related to the + * linux syscall TIF_TAGGED_ADDR setting, not TBI in general. + * + * There should be a better place to put this, but we need this in + * include/exec/cpu_ldst.h, and not some place linux-user specific. + * + * Note that arm-*-user will never set tagged_addr_enable. + */ +static vaddr aarch64_untagged_addr(CPUState *cs, vaddr x) +{ + CPUARMState *env = cpu_env(cs); + if (env->tagged_addr_enable) { + /* + * TBI is enabled for userspace but not kernelspace addresses. + * Only clear the tag if bit 55 is clear. + */ + x &= sextract64(x, 0, 56); + } + return x; +} +#else #include "hw/core/sysemu-cpu-ops.h" static const struct SysemuCPUOps arm_sysemu_ops = { + .has_work = arm_cpu_has_work, .get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug, .asidx_from_attrs = arm_asidx_from_attrs, .write_elf32_note = arm_cpu_write_elf32_note, @@ -2652,19 +2675,52 @@ static const struct SysemuCPUOps arm_sysemu_ops = { #endif #ifdef CONFIG_TCG +#ifndef CONFIG_USER_ONLY +static vaddr aprofile_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + /* + * The Stage2 and Phys indexes are only used for ptw on arm32, + * and all pte's are aligned, so we never produce a wrap for these. + * Double check that we're not truncating a 40-bit physical address. + */ + assert((unsigned)mmu_idx < (ARMMMUIdx_Stage2_S & ARM_MMU_IDX_COREIDX_MASK)); + + if (!is_a64(cpu_env(cs))) { + return (uint32_t)result; + } + + /* + * TODO: For FEAT_CPA2, decide how to we want to resolve + * Unpredictable_CPACHECK in AddressIncrement. + */ + return result; +} +#endif /* !CONFIG_USER_ONLY */ + static const TCGCPUOps arm_tcg_ops = { + .mttcg_supported = true, + /* ARM processors have a weak memory model */ + .guest_default_memory_order = 0, + .initialize = arm_translate_init, + .translate_code = arm_translate_code, + .get_tb_cpu_state = arm_get_tb_cpu_state, .synchronize_from_tb = arm_cpu_synchronize_from_tb, .debug_excp_handler = arm_debug_excp_handler, .restore_state_to_opc = arm_restore_state_to_opc, + .mmu_index = arm_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = arm_cpu_record_sigsegv, .record_sigbus = arm_cpu_record_sigbus, + .untagged_addr = aarch64_untagged_addr, #else - .tlb_fill = arm_cpu_tlb_fill, + .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = aprofile_pointer_wrap, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, + .cpu_exec_reset = cpu_reset, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, @@ -2675,7 +2731,7 @@ static const TCGCPUOps arm_tcg_ops = { }; #endif /* CONFIG_TCG */ -static void arm_cpu_class_init(ObjectClass *oc, void *data) +static void arm_cpu_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(acc); @@ -2691,8 +2747,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) &acc->parent_phases); cc->class_by_name = arm_cpu_class_by_name; - cc->has_work = arm_cpu_has_work; - cc->mmu_index = arm_cpu_mmu_index; cc->dump_state = arm_cpu_dump_state; cc->set_pc = arm_cpu_set_pc; cc->get_pc = arm_cpu_get_pc; @@ -2702,6 +2756,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->sysemu_ops = &arm_sysemu_ops; #endif cc->gdb_arch_name = arm_gdb_arch_name; + cc->gdb_get_core_xml_file = arm_gdb_get_core_xml_file; cc->gdb_stop_before_watchpoint = true; cc->disas_set_info = arm_disas_set_info; @@ -2718,13 +2773,15 @@ static void arm_cpu_instance_init(Object *obj) arm_cpu_post_init(obj); } -static void cpu_register_class_init(ObjectClass *oc, void *data) +static void cpu_register_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(acc); acc->info = data; - cc->gdb_core_xml_file = "arm-core.xml"; + if (acc->info->deprecation_note) { + cc->deprecation_note = acc->info->deprecation_note; + } } void arm_cpu_register(const ARMCPUInfo *info) @@ -2733,11 +2790,11 @@ void arm_cpu_register(const ARMCPUInfo *info) .parent = TYPE_ARM_CPU, .instance_init = arm_cpu_instance_init, .class_init = info->class_init ?: cpu_register_class_init, - .class_data = (void *)info, + .class_data = info, }; type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); - type_register(&type_info); + type_register_static(&type_info); g_free((void *)type_info.name); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 3841359..0338153 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -24,16 +24,15 @@ #include "qemu/cpu-float.h" #include "hw/registerfields.h" #include "cpu-qom.h" +#include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-interrupt.h" #include "exec/gdbstub.h" #include "exec/page-protection.h" #include "qapi/qapi-types-common.h" #include "target/arm/multiprocessing.h" #include "target/arm/gtimer.h" - -#ifdef TARGET_AARCH64 -#define KVM_HAVE_MCE_INJECTION 1 -#endif +#include "target/arm/cpu-sysregs.h" #define EXCP_UDEF 1 /* undefined instruction */ #define EXCP_SWI 2 /* software interrupt */ @@ -62,6 +61,7 @@ #define EXCP_NMI 26 #define EXCP_VINMI 27 #define EXCP_VFNMI 28 +#define EXCP_MON_TRAP 29 /* AArch32 trap to Monitor mode */ /* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 @@ -99,12 +99,6 @@ #define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) #endif -/* ARM-specific extra insn start words: - * 1: Conditional execution bits - * 2: Partial exception syndrome for data aborts - */ -#define TARGET_INSN_START_EXTRA_WORDS 2 - /* The 2nd extra word holding syndrome info for data aborts does not use * the upper 6 bits nor the lower 13 bits. We mask and shift it down to * help the sleb128 encoder do a better job. @@ -170,17 +164,12 @@ typedef struct ARMGenericTimer { * Align the data for use with TCG host vector operations. */ -#ifdef TARGET_AARCH64 -# define ARM_MAX_VQ 16 -#else -# define ARM_MAX_VQ 1 -#endif +#define ARM_MAX_VQ 16 typedef struct ARMVectorReg { uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16); } ARMVectorReg; -#ifdef TARGET_AARCH64 /* In AArch32 mode, predicate registers do not exist at all. */ typedef struct ARMPredicateReg { uint64_t p[DIV_ROUND_UP(2 * ARM_MAX_VQ, 8)] QEMU_ALIGNED(16); @@ -190,18 +179,72 @@ typedef struct ARMPredicateReg { typedef struct ARMPACKey { uint64_t lo, hi; } ARMPACKey; -#endif /* See the commentary above the TBFLAG field definitions. */ typedef struct CPUARMTBFlags { uint32_t flags; - target_ulong flags2; + uint64_t flags2; } CPUARMTBFlags; typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; typedef struct NVICState NVICState; +/* + * Enum for indexing vfp.fp_status[]. + * + * FPST_A32: is the "normal" fp status for AArch32 insns + * FPST_A64: is the "normal" fp status for AArch64 insns + * FPST_A32_F16: used for AArch32 half-precision calculations + * FPST_A64_F16: used for AArch64 half-precision calculations + * FPST_STD: the ARM "Standard FPSCR Value" + * FPST_STD_F16: used for half-precision + * calculations with the ARM "Standard FPSCR Value" + * FPST_AH: used for the A64 insns which change behaviour + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, + * and the reciprocal and square root estimate/step insns) + * FPST_AH_F16: used for the A64 insns which change behaviour + * when FPCR.AH == 1 (bfloat16 conversions and multiplies, + * and the reciprocal and square root estimate/step insns); + * for half-precision + * + * Half-precision operations are governed by a separate + * flush-to-zero control bit in FPSCR:FZ16. We pass a separate + * status structure to control this. + * + * The "Standard FPSCR", ie default-NaN, flush-to-zero, + * round-to-nearest and is used by any operations (generally + * Neon) which the architecture defines as controlled by the + * standard FPSCR value rather than the FPSCR. + * + * The "standard FPSCR but for fp16 ops" is needed because + * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than + * using a fixed value for it. + * + * FPST_AH is needed because some insns have different + * behaviour when FPCR.AH == 1: they don't update cumulative + * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and + * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, + * which means we need an FPST_AH_F16 as well. + * + * To avoid having to transfer exception bits around, we simply + * say that the FPSCR cumulative exception flags are the logical + * OR of the flags in the four fp statuses. This relies on the + * only thing which needs to read the exception flags being + * an explicit FPSCR read. + */ +typedef enum ARMFPStatusFlavour { + FPST_A32, + FPST_A64, + FPST_A32_F16, + FPST_A64_F16, + FPST_AH, + FPST_AH_F16, + FPST_STD, + FPST_STD_F16, +} ARMFPStatusFlavour; +#define FPST_COUNT 8 + typedef struct CPUArchState { /* Regs for current mode. */ uint32_t regs[16]; @@ -606,55 +649,31 @@ typedef struct CPUArchState { struct { ARMVectorReg zregs[32]; -#ifdef TARGET_AARCH64 /* Store FFR as pregs[16] to make it easier to treat as any other. */ #define FFR_PRED_NUM 16 ARMPredicateReg pregs[17]; /* Scratch space for aa64 sve predicate temporary. */ ARMPredicateReg preg_tmp; -#endif /* We store these fpcsr fields separately for convenience. */ uint32_t qc[4] QEMU_ALIGNED(16); int vec_len; int vec_stride; + /* + * Floating point status and control registers. Some bits are + * stored separately in other fields or in the float_status below. + */ + uint64_t fpsr; + uint64_t fpcr; + uint32_t xregs[16]; /* Scratch space for aa32 neon expansion. */ uint32_t scratch[8]; - /* There are a number of distinct float control structures: - * - * fp_status: is the "normal" fp status. - * fp_status_fp16: used for half-precision calculations - * standard_fp_status : the ARM "Standard FPSCR Value" - * standard_fp_status_fp16 : used for half-precision - * calculations with the ARM "Standard FPSCR Value" - * - * Half-precision operations are governed by a separate - * flush-to-zero control bit in FPSCR:FZ16. We pass a separate - * status structure to control this. - * - * The "Standard FPSCR", ie default-NaN, flush-to-zero, - * round-to-nearest and is used by any operations (generally - * Neon) which the architecture defines as controlled by the - * standard FPSCR value rather than the FPSCR. - * - * The "standard FPSCR but for fp16 ops" is needed because - * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than - * using a fixed value for it. - * - * To avoid having to transfer exception bits around, we simply - * say that the FPSCR cumulative exception flags are the logical - * OR of the flags in the four fp statuses. This relies on the - * only thing which needs to read the exception flags being - * an explicit FPSCR read. - */ - float_status fp_status; - float_status fp_status_f16; - float_status standard_fp_status; - float_status standard_fp_status_f16; + /* There are a number of distinct float control structures. */ + float_status fp_status[FPST_COUNT]; uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ @@ -679,7 +698,6 @@ typedef struct CPUArchState { uint32_t cregs[16]; } iwmmxt; -#ifdef TARGET_AARCH64 struct { ARMPACKey apia; ARMPACKey apib; @@ -711,7 +729,6 @@ typedef struct CPUArchState { * to keep the offsets into the rest of the structure smaller. */ ARMVectorReg zarray[ARM_MAX_VQ * 16]; -#endif struct CPUBreakpoint *cpu_breakpoint[16]; struct CPUWatchpoint *cpu_watchpoint[16]; @@ -767,12 +784,9 @@ typedef struct CPUArchState { #else /* CONFIG_USER_ONLY */ /* For usermode syscall translation. */ bool eabi; -#endif /* CONFIG_USER_ONLY */ - -#ifdef TARGET_TAGGED_ADDRESSES /* Linux syscall tagged address support */ bool tagged_addr_enable; -#endif +#endif /* CONFIG_USER_ONLY */ } CPUARMState; static inline void set_feature(CPUARMState *env, int feature) @@ -821,6 +835,53 @@ typedef struct { uint32_t map, init, supported; } ARMVQMap; +/* REG is ID_XXX */ +#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP64(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_DP32_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP32(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_EX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_EX32_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX32(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_SEX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_SEX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define SET_IDREG(ISAR, REG, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX] = VALUE; \ + }) + +#define GET_IDREG(ISAR, REG) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX]; \ + }) + /** * ARMCPU: * @env: #CPUARMState @@ -915,6 +976,8 @@ struct ArchCPU { /* CPU has memory protection unit */ bool has_mpu; + /* CPU has MTE enabled in KVM mode */ + bool kvm_mte; /* PMSAv7 MPU number of supported regions */ uint32_t pmsav7_dregion; /* PMSAv8 MPU number of supported hyp regions */ @@ -937,7 +1000,6 @@ struct ArchCPU { */ uint32_t kvm_target; -#ifdef CONFIG_KVM /* KVM init features for this CPU */ uint32_t kvm_init_features[7]; @@ -950,7 +1012,6 @@ struct ArchCPU { /* KVM steal time */ OnOffAuto kvm_steal_time; -#endif /* CONFIG_KVM */ /* Uniprocessor system with MP extensions */ bool mp_is_up; @@ -963,6 +1024,9 @@ struct ArchCPU { /* QOM property to indicate we should use the back-compat CNTFRQ default */ bool backcompat_cntfrq; + /* QOM property to indicate we should use the back-compat QARMA5 default */ + bool backcompat_pauth_default_use_qarma5; + /* Specify the number of cores in this CPU cluster. Used for the L2CTLR * register. */ @@ -986,44 +1050,14 @@ struct ArchCPU { * field by reading the value from the KVM vCPU. */ struct ARMISARegisters { - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint32_t id_mmfr0; - uint32_t id_mmfr1; - uint32_t id_mmfr2; - uint32_t id_mmfr3; - uint32_t id_mmfr4; - uint32_t id_mmfr5; - uint32_t id_pfr0; - uint32_t id_pfr1; - uint32_t id_pfr2; uint32_t mvfr0; uint32_t mvfr1; uint32_t mvfr2; - uint32_t id_dfr0; - uint32_t id_dfr1; uint32_t dbgdidr; uint32_t dbgdevid; uint32_t dbgdevid1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; - uint64_t id_aa64isar2; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; - uint64_t id_aa64mmfr0; - uint64_t id_aa64mmfr1; - uint64_t id_aa64mmfr2; - uint64_t id_aa64mmfr3; - uint64_t id_aa64dfr0; - uint64_t id_aa64dfr1; - uint64_t id_aa64zfr0; - uint64_t id_aa64smfr0; uint64_t reset_pmcr_el0; + uint64_t idregs[NUM_ID_IDX]; } isar; uint64_t midr; uint32_t revidr; @@ -1053,6 +1087,7 @@ struct ArchCPU { bool prop_pauth; bool prop_pauth_impdef; bool prop_pauth_qarma3; + bool prop_pauth_qarma5; bool prop_lpa2; /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ @@ -1101,8 +1136,9 @@ struct ArchCPU { typedef struct ARMCPUInfo { const char *name; + const char *deprecation_note; void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); + void (*class_init)(ObjectClass *oc, const void *data); } ARMCPUInfo; /** @@ -1120,22 +1156,18 @@ struct ARMCPUClass { ResettablePhases parent_phases; }; -struct AArch64CPUClass { - ARMCPUClass parent_class; -}; - /* Callback functions for the generic timer's timers. */ void arm_gt_ptimer_cb(void *opaque); void arm_gt_vtimer_cb(void *opaque); void arm_gt_htimer_cb(void *opaque); void arm_gt_stimer_cb(void *opaque); void arm_gt_hvtimer_cb(void *opaque); +void arm_gt_sel2timer_cb(void *opaque); +void arm_gt_sel2vtimer_cb(void *opaque); unsigned int gt_cntfrq_period_ns(ARMCPU *cpu); void gt_rme_post_el_change(ARMCPU *cpu, void *opaque); -void arm_cpu_post_init(Object *obj); - #define ARM_AFF0_SHIFT 0 #define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT) #define ARM_AFF1_SHIFT 8 @@ -1193,7 +1225,6 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, */ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el); -#ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); @@ -1225,13 +1256,6 @@ static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) #endif } -#else -static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } -static inline void aarch64_sve_change_el(CPUARMState *env, int o, - int n, bool a) -{ } -#endif - void aarch64_sync_32_to_64(CPUARMState *env); void aarch64_sync_64_to_32(CPUARMState *env); @@ -1358,6 +1382,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */ #define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */ #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */ +#define SCTLR_CMOW (1ULL << 32) /* FEAT_CMOW */ #define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */ #define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */ #define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */ @@ -1680,61 +1705,103 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) uint32_t vfp_get_fpscr(CPUARMState *env); void vfp_set_fpscr(CPUARMState *env, uint32_t val); -/* FPCR, Floating Point Control Register - * FPSR, Floating Poiht Status Register +/* + * FPCR, Floating Point Control Register + * FPSR, Floating Point Status Register * - * For A64 the FPSCR is split into two logically distinct registers, - * FPCR and FPSR. However since they still use non-overlapping bits - * we store the underlying state in fpscr and just mask on read/write. + * For A64 floating point control and status bits are stored in + * two logically distinct registers, FPCR and FPSR. We store these + * in QEMU in vfp.fpcr and vfp.fpsr. + * For A32 there was only one register, FPSCR. The bits are arranged + * such that FPSCR bits map to FPCR or FPSR bits in the same bit positions, + * so we can use appropriate masking to handle FPSCR reads and writes. + * Note that the FPCR has some bits which are not visible in the + * AArch32 view (for FEAT_AFP). Writing the FPSCR leaves these unchanged. */ -#define FPSR_MASK 0xf800009f -#define FPCR_MASK 0x07ff9f00 +/* FPCR bits */ +#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */ +#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */ +#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */ #define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */ #define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */ #define FPCR_OFE (1 << 10) /* Overflow exception trap enable */ #define FPCR_UFE (1 << 11) /* Underflow exception trap enable */ #define FPCR_IXE (1 << 12) /* Inexact exception trap enable */ +#define FPCR_EBF (1 << 13) /* Extended BFloat16 behaviors */ #define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */ +#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */ #define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */ +#define FPCR_STRIDE_MASK (3 << 20) /* Stride */ #define FPCR_RMODE_MASK (3 << 22) /* Rounding mode */ #define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */ #define FPCR_DN (1 << 25) /* Default NaN enable bit */ #define FPCR_AHP (1 << 26) /* Alternative half-precision */ -#define FPCR_QC (1 << 27) /* Cumulative saturation bit */ -#define FPCR_V (1 << 28) /* FP overflow flag */ -#define FPCR_C (1 << 29) /* FP carry flag */ -#define FPCR_Z (1 << 30) /* FP zero flag */ -#define FPCR_N (1 << 31) /* FP negative flag */ #define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */ #define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT) #define FPCR_LTPSIZE_LENGTH 3 -#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V) -#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC) +/* Cumulative exception trap enable bits */ +#define FPCR_EEXC_MASK (FPCR_IOE | FPCR_DZE | FPCR_OFE | FPCR_UFE | FPCR_IXE | FPCR_IDE) + +/* FPSR bits */ +#define FPSR_IOC (1 << 0) /* Invalid Operation cumulative exception */ +#define FPSR_DZC (1 << 1) /* Divide by Zero cumulative exception */ +#define FPSR_OFC (1 << 2) /* Overflow cumulative exception */ +#define FPSR_UFC (1 << 3) /* Underflow cumulative exception */ +#define FPSR_IXC (1 << 4) /* Inexact cumulative exception */ +#define FPSR_IDC (1 << 7) /* Input Denormal cumulative exception */ +#define FPSR_QC (1 << 27) /* Cumulative saturation bit */ +#define FPSR_V (1 << 28) /* FP overflow flag */ +#define FPSR_C (1 << 29) /* FP carry flag */ +#define FPSR_Z (1 << 30) /* FP zero flag */ +#define FPSR_N (1 << 31) /* FP negative flag */ + +/* Cumulative exception status bits */ +#define FPSR_CEXC_MASK (FPSR_IOC | FPSR_DZC | FPSR_OFC | FPSR_UFC | FPSR_IXC | FPSR_IDC) + +#define FPSR_NZCV_MASK (FPSR_N | FPSR_Z | FPSR_C | FPSR_V) +#define FPSR_NZCVQC_MASK (FPSR_NZCV_MASK | FPSR_QC) + +/* A32 FPSCR bits which architecturally map to FPSR bits */ +#define FPSCR_FPSR_MASK (FPSR_NZCVQC_MASK | FPSR_CEXC_MASK) +/* A32 FPSCR bits which architecturally map to FPCR bits */ +#define FPSCR_FPCR_MASK (FPCR_EEXC_MASK | FPCR_LEN_MASK | FPCR_FZ16 | \ + FPCR_STRIDE_MASK | FPCR_RMODE_MASK | \ + FPCR_FZ | FPCR_DN | FPCR_AHP) +/* These masks don't overlap: each bit lives in only one place */ +QEMU_BUILD_BUG_ON(FPSCR_FPSR_MASK & FPSCR_FPCR_MASK); -static inline uint32_t vfp_get_fpsr(CPUARMState *env) -{ - return vfp_get_fpscr(env) & FPSR_MASK; -} +/** + * vfp_get_fpsr: read the AArch64 FPSR + * @env: CPU context + * + * Return the current AArch64 FPSR value + */ +uint32_t vfp_get_fpsr(CPUARMState *env); -static inline void vfp_set_fpsr(CPUARMState *env, uint32_t val) -{ - uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPSR_MASK) | (val & FPSR_MASK); - vfp_set_fpscr(env, new_fpscr); -} +/** + * vfp_get_fpcr: read the AArch64 FPCR + * @env: CPU context + * + * Return the current AArch64 FPCR value + */ +uint32_t vfp_get_fpcr(CPUARMState *env); -static inline uint32_t vfp_get_fpcr(CPUARMState *env) -{ - return vfp_get_fpscr(env) & FPCR_MASK; -} +/** + * vfp_set_fpsr: write the AArch64 FPSR + * @env: CPU context + * @value: new value + */ +void vfp_set_fpsr(CPUARMState *env, uint32_t value); -static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val) -{ - uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPCR_MASK) | (val & FPCR_MASK); - vfp_set_fpscr(env, new_fpscr); -} +/** + * vfp_set_fpcr: write the AArch64 FPCR + * @env: CPU context + * @value: new value + */ +void vfp_set_fpcr(CPUARMState *env, uint32_t value); enum arm_cpu_mode { ARM_CPU_MODE_USR = 0x10, @@ -2299,6 +2366,8 @@ FIELD(DBGDEVID, DOUBLELOCK, 20, 4) FIELD(DBGDEVID, AUXREGS, 24, 4) FIELD(DBGDEVID, CIDMASK, 28, 4) +FIELD(DBGDEVID1, PCSROFFSET, 0, 4) + FIELD(MVFR0, SIMDREG, 0, 4) FIELD(MVFR0, FPSP, 4, 4) FIELD(MVFR0, FPDP, 8, 4) @@ -2510,6 +2579,11 @@ static inline bool arm_is_secure_below_el3(CPUARMState *env) return false; } +static inline bool arm_is_el3_or_mon(CPUARMState *env) +{ + return false; +} + static inline ARMSecuritySpace arm_security_space(CPUARMState *env) { return ARMSS_NonSecure; @@ -2542,81 +2616,15 @@ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space); uint64_t arm_hcr_el2_eff(CPUARMState *env); uint64_t arm_hcrx_el2_eff(CPUARMState *env); -/* Return true if the specified exception level is running in AArch64 state. */ -static inline bool arm_el_is_aa64(CPUARMState *env, int el) -{ - /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want, - * and if we're not in EL0 then the state of EL0 isn't well defined.) - */ - assert(el >= 1 && el <= 3); - bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64); - - /* The highest exception level is always at the maximum supported - * register width, and then lower levels have a register width controlled - * by bits in the SCR or HCR registers. - */ - if (el == 3) { - return aa64; - } - - if (arm_feature(env, ARM_FEATURE_EL3) && - ((env->cp15.scr_el3 & SCR_NS) || !(env->cp15.scr_el3 & SCR_EEL2))) { - aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW); - } - - if (el == 2) { - return aa64; - } - - if (arm_is_el2_enabled(env)) { - aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW); - } - - return aa64; -} - -/* Function for determining whether guest cp register reads and writes should +/* + * Function for determining whether guest cp register reads and writes should * access the secure or non-secure bank of a cp register. When EL3 is * operating in AArch32 state, the NS-bit determines whether the secure * instance of a cp register should be used. When EL3 is AArch64 (or if * it doesn't exist at all) then there is no register banking, and all * accesses are to the non-secure version. */ -static inline bool access_secure_reg(CPUARMState *env) -{ - bool ret = (arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3) && - !(env->cp15.scr_el3 & SCR_NS)); - - return ret; -} - -/* Macros for accessing a specified CP register bank */ -#define A32_BANKED_REG_GET(_env, _regname, _secure) \ - ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns) - -#define A32_BANKED_REG_SET(_env, _regname, _secure, _val) \ - do { \ - if (_secure) { \ - (_env)->cp15._regname##_s = (_val); \ - } else { \ - (_env)->cp15._regname##_ns = (_val); \ - } \ - } while (0) - -/* Macros for automatically accessing a specific CP register bank depending on - * the current secure state of the system. These macros are not intended for - * supporting instruction translation reads/writes as these are dependent - * solely on the SCR.NS bit and not the mode. - */ -#define A32_BANKED_CURRENT_REG_GET(_env, _regname) \ - A32_BANKED_REG_GET((_env), _regname, \ - (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3))) - -#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val) \ - A32_BANKED_REG_SET((_env), _regname, \ - (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \ - (_val)) +bool access_secure_reg(CPUARMState *env); uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, uint32_t cur_el, bool secure); @@ -2639,39 +2647,6 @@ static inline bool arm_v7m_is_handler_mode(CPUARMState *env) return env->v7m.exception != 0; } -/* Return the current Exception Level (as per ARMv8; note that this differs - * from the ARMv7 Privilege Level). - */ -static inline int arm_current_el(CPUARMState *env) -{ - if (arm_feature(env, ARM_FEATURE_M)) { - return arm_v7m_is_handler_mode(env) || - !(env->v7m.control[env->v7m.secure] & 1); - } - - if (is_a64(env)) { - return extract32(env->pstate, 2, 2); - } - - switch (env->uncached_cpsr & 0x1f) { - case ARM_CPU_MODE_USR: - return 0; - case ARM_CPU_MODE_HYP: - return 2; - case ARM_CPU_MODE_MON: - return 3; - default: - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - /* If EL3 is 32-bit then all secure privileged modes run in - * EL3 - */ - return 3; - } - - return 1; - } -} - /** * write_list_to_cpustate * @cpu: ARMCPU @@ -2725,14 +2700,19 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * + NonSecure EL1 & 0 stage 2 * + NonSecure EL2 * + NonSecure EL2 & 0 (ARMv8.1-VHE) - * + Secure EL1 & 0 - * + Secure EL3 + * + Secure EL1 & 0 stage 1 + * + Secure EL1 & 0 stage 2 (FEAT_SEL2) + * + Secure EL2 (FEAT_SEL2) + * + Secure EL2 & 0 (FEAT_SEL2) + * + Realm EL1 & 0 stage 1 (FEAT_RME) + * + Realm EL1 & 0 stage 2 (FEAT_RME) + * + Realm EL2 (FEAT_RME) + * + EL3 * If EL3 is 32-bit: * + NonSecure PL1 & 0 stage 1 * + NonSecure PL1 & 0 stage 2 * + NonSecure PL2 - * + Secure PL0 - * + Secure PL1 + * + Secure PL1 & 0 * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) * * For QEMU, an mmu_idx is not quite the same as a translation regime because: @@ -2758,29 +2738,34 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * table over and over. * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access * Never (PAN) bit within PSTATE. - * 7. we fold together the secure and non-secure regimes for A-profile, + * 7. we fold together most secure and non-secure regimes for A-profile, * because there are no banked system registers for aarch64, so the * process of switching between secure and non-secure is * already heavyweight. + * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, + * because both are in use simultaneously for Secure EL2. * * This gives us the following list of cases: * - * EL0 EL1&0 stage 1+2 (aka NS PL0) - * EL1 EL1&0 stage 1+2 (aka NS PL1) - * EL1 EL1&0 stage 1+2 +PAN + * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) * EL0 EL2&0 * EL2 EL2&0 * EL2 EL2&0 +PAN * EL2 (aka NS PL2) - * EL3 (aka S PL1) - * Physical (NS & S) - * Stage2 (NS & S) + * EL3 (aka AArch32 S PL1 PL1&0) + * AArch32 S PL0 PL1&0 (we call this EL30_0) + * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) + * Stage2 Secure + * Stage2 NonSecure + * plus one TLB per Physical address space: S, NS, Realm, Root * - * for a total of 12 different mmu_idx. + * for a total of 16 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish EL0 and EL1 (and - * EL2 if we ever model a Cortex-R52). + * EL2 for cores like the Cortex-R52). * * M profile CPUs are rather different as they do not have a true MMU. * They have the following different MMU indexes: @@ -2840,6 +2825,8 @@ typedef enum ARMMMUIdx { ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A, /* * Used for second stage of an S12 page table walk, or for descriptor @@ -2847,14 +2834,14 @@ typedef enum ARMMMUIdx { * are in use simultaneously for SecureEL2: the security state for * the S2 ptw is selected by the NS bit from the S1 ptw. */ - ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, /* TLBs with 1-1 mapping to the physical address spaces. */ - ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A, /* * These are not allocated TLBs and are used only for AT system @@ -2893,6 +2880,8 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(E20_2), TO_CORE_BIT(E20_2_PAN), TO_CORE_BIT(E3), + TO_CORE_BIT(E30_0), + TO_CORE_BIT(E30_3_PAN), TO_CORE_BIT(Stage2), TO_CORE_BIT(Stage2_S), @@ -2958,60 +2947,15 @@ static inline bool arm_sctlr_b(CPUARMState *env) uint64_t arm_sctlr(CPUARMState *env, int el); -static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env, - bool sctlr_b) -{ -#ifdef CONFIG_USER_ONLY - /* - * In system mode, BE32 is modelled in line with the - * architecture (as word-invariant big-endianness), where loads - * and stores are done little endian but from addresses which - * are adjusted by XORing with the appropriate constant. So the - * endianness to use for the raw data access is not affected by - * SCTLR.B. - * In user mode, however, we model BE32 as byte-invariant - * big-endianness (because user-only code cannot tell the - * difference), and so we need to use a data access endianness - * that depends on SCTLR.B. - */ - if (sctlr_b) { - return true; - } -#endif - /* In 32bit endianness is determined by looking at CPSR's E bit */ - return env->uncached_cpsr & CPSR_E; -} - -static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr) -{ - return sctlr & (el ? SCTLR_EE : SCTLR_E0E); -} - -/* Return true if the processor is in big-endian mode. */ -static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) -{ - if (!is_a64(env)) { - return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env)); - } else { - int cur_el = arm_current_el(env); - uint64_t sctlr = arm_sctlr(env, cur_el); - return arm_cpu_data_is_big_endian_a64(cur_el, sctlr); - } -} - -#include "exec/cpu-all.h" - /* * We have more than 32-bits worth of state per TB, so we split the data * between tb->flags and tb->cs_base, which is otherwise unused for ARM. * We collect these two parts in CPUARMTBFlags where they are named * flags and flags2 respectively. * - * The flags that are shared between all execution modes, TBFLAG_ANY, - * are stored in flags. The flags that are specific to a given mode - * are stores in flags2. Since cs_base is sized on the configured - * address size, flags2 always has 64-bits for A64, and a minimum of - * 32-bits for A32 and M32. + * The flags that are shared between all execution modes, TBFLAG_ANY, are stored + * in flags. The flags that are specific to a given mode are stored in flags2. + * flags2 always has 64-bits, even though only 32-bits are used for A32 and M32. * * The bits for 32-bit A-profile and M-profile partially overlap: * @@ -3121,6 +3065,8 @@ FIELD(TBFLAG_A64, NV2, 34, 1) FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) /* Set if FEAT_NV2 RAM accesses are big-endian */ FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) +FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ +FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ /* * Helpers for using the above. Note that only the A64 accessors use @@ -3182,16 +3128,6 @@ static inline bool bswap_code(bool sctlr_b) #endif } -#ifdef CONFIG_USER_ONLY -static inline bool arm_cpu_bswap_data(CPUARMState *env) -{ - return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env); -} -#endif - -void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags); - enum { QEMU_PSCI_CONDUIT_DISABLED = 0, QEMU_PSCI_CONDUIT_SMC = 1, @@ -3289,34 +3225,4 @@ extern const uint64_t pred_esz_masks[5]; #define LOG2_TAG_GRANULE 4 #define TAG_GRANULE (1 << LOG2_TAG_GRANULE) -#ifdef CONFIG_USER_ONLY -#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1)) -#endif - -#ifdef TARGET_TAGGED_ADDRESSES -/** - * cpu_untagged_addr: - * @cs: CPU context - * @x: tagged address - * - * Remove any address tag from @x. This is explicitly related to the - * linux syscall TIF_TAGGED_ADDR setting, not TBI in general. - * - * There should be a better place to put this, but we need this in - * include/exec/cpu_ldst.h, and not some place linux-user specific. - */ -static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x) -{ - ARMCPU *cpu = ARM_CPU(cs); - if (cpu->env.tagged_addr_enable) { - /* - * TBI is enabled for userspace but not kernelspace addresses. - * Only clear the tag if bit 55 is clear. - */ - x &= sextract64(x, 0, 56); - } - return x; -} -#endif - #endif diff --git a/target/arm/cpu32-stubs.c b/target/arm/cpu32-stubs.c new file mode 100644 index 0000000..81be44d --- /dev/null +++ b/target/arm/cpu32-stubs.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "target/arm/cpu.h" +#include "target/arm/internals.h" +#include <glib.h> + +void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 262a1d6..1f34067 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -23,10 +23,11 @@ #include "cpu.h" #include "cpregs.h" #include "qemu/module.h" -#include "sysemu/kvm.h" -#include "sysemu/hvf.h" -#include "sysemu/qtest.h" -#include "sysemu/tcg.h" +#include "qemu/units.h" +#include "system/kvm.h" +#include "system/hvf.h" +#include "system/qtest.h" +#include "system/tcg.h" #include "kvm_arm.h" #include "hvf_arm.h" #include "qapi/visitor.h" @@ -35,6 +36,28 @@ #include "cpu-features.h" #include "cpregs.h" +/* convert between <register>_IDX and SYS_<register> */ +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + [NAME##_IDX] = SYS_##NAME, + +const uint32_t id_register_sysreg[NUM_ID_IDX] = { +#include "cpu-sysregs.h.inc" +}; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + case SYS_##NAME: return NAME##_IDX; + +int get_sysreg_idx(ARMSysRegs sysreg) +{ + switch (sysreg) { +#include "cpu-sysregs.h.inc" + } + g_assert_not_reached(); +} + +#undef DEF + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { /* @@ -113,7 +136,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * SVE is disabled and so are all vector lengths. Good. * Disable all SVE extensions as well. */ - cpu->isar.id_aa64zfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64ZFR0, 0); return; } @@ -287,16 +310,13 @@ static bool cpu_arm_get_sve(Object *obj, Error **errp) static void cpu_arm_set_sve(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; if (value && kvm_enabled() && !kvm_arm_sve_supported()) { error_setg(errp, "'sve' feature not supported by KVM on this host"); return; } - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, value); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, SVE, value); } void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) @@ -308,7 +328,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) if (vq_map == 0) { if (!cpu_isar_feature(aa64_sme, cpu)) { - cpu->isar.id_aa64smfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64SMFR0, 0); return; } @@ -347,11 +367,8 @@ static bool cpu_arm_get_sme(Object *obj, Error **errp) static void cpu_arm_set_sme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr1; - t = FIELD_DP64(t, ID_AA64PFR1, SME, value); - cpu->isar.id_aa64pfr1 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR1, SME, value); } static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) @@ -364,11 +381,8 @@ static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64smfr0; - t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value); - cpu->isar.id_aa64smfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64SMFR0, FA64, value); } #ifdef CONFIG_USER_ONLY @@ -479,6 +493,7 @@ void aarch64_add_sme_properties(Object *obj) void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu); + ARMISARegisters *isar = &cpu->isar; uint64_t isar1, isar2; /* @@ -489,13 +504,13 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) * * Begin by disabling all fields. */ - isar1 = cpu->isar.id_aa64isar1; + isar1 = GET_IDREG(isar, ID_AA64ISAR1); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0); - isar2 = cpu->isar.id_aa64isar2; + isar2 = GET_IDREG(isar, ID_AA64ISAR2); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0); @@ -519,39 +534,56 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) } if (cpu->prop_pauth) { - if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) { + if ((cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) || + (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma5) || + (cpu->prop_pauth_qarma3 && cpu->prop_pauth_qarma5)) { error_setg(errp, - "cannot enable both pauth-impdef and pauth-qarma3"); + "cannot enable pauth-impdef, pauth-qarma3 and " + "pauth-qarma5 at the same time"); return; } - if (cpu->prop_pauth_impdef) { - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); + bool use_default = !cpu->prop_pauth_qarma5 && + !cpu->prop_pauth_qarma3 && + !cpu->prop_pauth_impdef; + + if (cpu->prop_pauth_qarma5 || + (use_default && + cpu->backcompat_pauth_default_use_qarma5)) { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); } else if (cpu->prop_pauth_qarma3) { isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); + } else if (cpu->prop_pauth_impdef || + (use_default && + !cpu->backcompat_pauth_default_use_qarma5)) { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); } else { - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); + g_assert_not_reached(); } - } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) { - error_setg(errp, "cannot enable pauth-impdef or " - "pauth-qarma3 without pauth"); + } else if (cpu->prop_pauth_impdef || + cpu->prop_pauth_qarma3 || + cpu->prop_pauth_qarma5) { + error_setg(errp, "cannot enable pauth-impdef, pauth-qarma3 or " + "pauth-qarma5 without pauth"); error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); } } - cpu->isar.id_aa64isar1 = isar1; - cpu->isar.id_aa64isar2 = isar2; + SET_IDREG(isar, ID_AA64ISAR1, isar1); + SET_IDREG(isar, ID_AA64ISAR2, isar2); } -static Property arm_cpu_pauth_property = +static const Property arm_cpu_pauth_property = DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true); -static Property arm_cpu_pauth_impdef_property = +static const Property arm_cpu_pauth_impdef_property = DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false); -static Property arm_cpu_pauth_qarma3_property = +static const Property arm_cpu_pauth_qarma3_property = DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false); +static Property arm_cpu_pauth_qarma5_property = + DEFINE_PROP_BOOL("pauth-qarma5", ARMCPU, prop_pauth_qarma5, false); void aarch64_add_pauth_properties(Object *obj) { @@ -572,6 +604,7 @@ void aarch64_add_pauth_properties(Object *obj) } else { qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property); qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property); + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma5_property); } } @@ -587,17 +620,18 @@ void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) return; } - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(&cpu->isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(&cpu->isar, ID_AA64MMFR0, t); } static void aarch64_a57_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a57"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -618,33 +652,36 @@ static void aarch64_a57_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41013000; cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ - cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ + /* 32KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); + /* 48KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 3, 64, 48 * KiB, 2); + /* 2048KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 2 * MiB, 7); cpu->dcz_blocksize = 4; /* 64 bytes */ cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; @@ -656,6 +693,7 @@ static void aarch64_a57_initfn(Object *obj) static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a53"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -676,33 +714,36 @@ static void aarch64_a53_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001122); /* 40 bit physical addr */ cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x00110f13; cpu->isar.dbgdevid1 = 0x1; cpu->isar.reset_pmcr_el0 = 0x41033000; cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ - cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */ + /* 32KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); + /* 32KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 1, 64, 32 * KiB, 2); + /* 1024KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 1 * MiB, 7); cpu->dcz_blocksize = 4; /* 64 bytes */ cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; @@ -756,104 +797,12 @@ static const ARMCPUInfo aarch64_cpus[] = { #endif }; -static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - - return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); -} - -static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - - /* At this time, this property is only allowed if KVM is enabled. This - * restriction allows us to avoid fixing up functionality that assumes a - * uniform execution state like do_interrupt. - */ - if (value == false) { - if (!kvm_enabled() || !kvm_arm_aarch32_supported()) { - error_setg(errp, "'aarch64' feature cannot be disabled " - "unless KVM is enabled and 32-bit EL1 " - "is supported"); - return; - } - unset_feature(&cpu->env, ARM_FEATURE_AARCH64); - } else { - set_feature(&cpu->env, ARM_FEATURE_AARCH64); - } -} - -static void aarch64_cpu_finalizefn(Object *obj) -{ -} - -static const gchar *aarch64_gdb_arch_name(CPUState *cs) -{ - return "aarch64"; -} - -static void aarch64_cpu_class_init(ObjectClass *oc, void *data) -{ - CPUClass *cc = CPU_CLASS(oc); - - cc->gdb_read_register = aarch64_cpu_gdb_read_register; - cc->gdb_write_register = aarch64_cpu_gdb_write_register; - cc->gdb_core_xml_file = "aarch64-core.xml"; - cc->gdb_arch_name = aarch64_gdb_arch_name; - - object_class_property_add_bool(oc, "aarch64", aarch64_cpu_get_aarch64, - aarch64_cpu_set_aarch64); - object_class_property_set_description(oc, "aarch64", - "Set on/off to enable/disable aarch64 " - "execution state "); -} - -static void aarch64_cpu_instance_init(Object *obj) -{ - ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); - - acc->info->initfn(obj); - arm_cpu_post_init(obj); -} - -static void cpu_register_class_init(ObjectClass *oc, void *data) -{ - ARMCPUClass *acc = ARM_CPU_CLASS(oc); - - acc->info = data; -} - -void aarch64_cpu_register(const ARMCPUInfo *info) -{ - TypeInfo type_info = { - .parent = TYPE_AARCH64_CPU, - .instance_init = aarch64_cpu_instance_init, - .class_init = info->class_init ?: cpu_register_class_init, - .class_data = (void *)info, - }; - - type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); - type_register(&type_info); - g_free((void *)type_info.name); -} - -static const TypeInfo aarch64_cpu_type_info = { - .name = TYPE_AARCH64_CPU, - .parent = TYPE_ARM_CPU, - .instance_finalize = aarch64_cpu_finalizefn, - .abstract = true, - .class_init = aarch64_cpu_class_init, -}; - static void aarch64_cpu_register_types(void) { size_t i; - type_register_static(&aarch64_cpu_type_info); - for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { - aarch64_cpu_register(&aarch64_cpus[i]); + arm_cpu_register(&aarch64_cpus[i]); } } diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index 7d856ac..69fb1d0 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -11,9 +11,11 @@ #include "internals.h" #include "cpu-features.h" #include "cpregs.h" -#include "exec/exec-all.h" -#include "exec/helper-proto.h" -#include "sysemu/tcg.h" +#include "exec/watchpoint.h" +#include "system/tcg.h" + +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" #ifdef CONFIG_TCG /* Return the Exception Level targeted by debug exceptions. */ @@ -378,7 +380,7 @@ bool arm_debug_check_breakpoint(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - target_ulong pc; + vaddr pc; int n; /* @@ -875,12 +877,13 @@ static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri, (env->cp15.mdcr_el3 & MDCR_TDCC); if (el < 1 && mdscr_el1_tdcc) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) { return CP_ACCESS_TRAP_EL2; } - if (el < 3 && ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) { + if (!arm_is_el3_or_mon(env) && + ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) { return CP_ACCESS_TRAP_EL3; } return CP_ACCESS_OK; @@ -1036,7 +1039,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { { .name = "DBGVCR", .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, .access = PL1_RW, .accessfn = access_tda, - .type = ARM_CP_NOP }, + .type = ARM_CP_CONST, .resetvalue = 0 }, /* * Dummy MDCCINT_EL1, since we don't implement the Debug Communications * Channel but Linux may try to access this register. The 32-bit @@ -1045,7 +1048,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_RW, .accessfn = access_tdcc, - .type = ARM_CP_NOP }, + .type = ARM_CP_CONST, .resetvalue = 0 }, /* * Dummy DBGCLAIM registers. * "The architecture does not define any functionality for the CLAIM tag bits.", @@ -1074,7 +1077,8 @@ static const ARMCPRegInfo debug_aa32_el1_reginfo[] = { { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0, .access = PL2_RW, .accessfn = access_dbgvcr32, - .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP }, + .type = ARM_CP_CONST | ARM_CP_EL3_NO_EL2_KEEP, + .resetvalue = 0 }, }; static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index a3bb73c..ce4497a 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -21,7 +21,8 @@ #include "cpu.h" #include "exec/gdbstub.h" #include "gdbstub/helpers.h" -#include "sysemu/tcg.h" +#include "gdbstub/commands.h" +#include "system/tcg.h" #include "internals.h" #include "cpu-features.h" #include "cpregs.h" @@ -43,6 +44,12 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; +#ifdef TARGET_AARCH64 + if (arm_gdbstub_is_aarch64(cpu)) { + return aarch64_cpu_gdb_read_register(cs, mem_buf, n); + } +#endif + if (n < 16) { /* Core integer register. */ return gdb_get_reg32(mem_buf, env->regs[n]); @@ -65,6 +72,12 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) CPUARMState *env = &cpu->env; uint32_t tmp; +#ifdef TARGET_AARCH64 + if (arm_gdbstub_is_aarch64(cpu)) { + return aarch64_cpu_gdb_write_register(cs, mem_buf, n); + } +#endif + tmp = ldl_p(mem_buf); /* @@ -474,6 +487,35 @@ static GDBFeature *arm_gen_dynamic_m_secextreg_feature(CPUState *cs, #endif #endif /* CONFIG_TCG */ +void arm_cpu_register_gdb_commands(ARMCPU *cpu) +{ + g_autoptr(GPtrArray) query_table = g_ptr_array_new(); + g_autoptr(GPtrArray) set_table = g_ptr_array_new(); + g_autoptr(GString) qsupported_features = g_string_new(NULL); + + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + #ifdef TARGET_AARCH64 + aarch64_cpu_register_gdb_commands(cpu, qsupported_features, query_table, + set_table); + #endif + } + + /* Set arch-specific handlers for 'q' commands. */ + if (query_table->len) { + gdb_extend_query_table(query_table); + } + + /* Set arch-specific handlers for 'Q' commands. */ + if (set_table->len) { + gdb_extend_set_table(set_table); + } + + /* Set arch-specific qSupported feature. */ + if (qsupported_features->len) { + gdb_extend_qsupported_features(qsupported_features->str); + } +} + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) { CPUState *cs = CPU(cpu); @@ -507,6 +549,16 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) gdb_find_static_feature("aarch64-pauth.xml"), 0); } + +#ifdef CONFIG_USER_ONLY + /* Memory Tagging Extension (MTE) 'tag_ctl' pseudo-register. */ + if (cpu_isar_feature(aa64_mte, cpu)) { + gdb_register_coprocessor(cs, aarch64_gdb_get_tag_ctl_reg, + aarch64_gdb_set_tag_ctl_reg, + gdb_find_static_feature("aarch64-mte.xml"), + 0); + } +#endif #endif } else { if (arm_feature(env, ARM_FEATURE_NEON)) { diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c index caa31ff..64ee9b3 100644 --- a/target/arm/gdbstub64.c +++ b/target/arm/gdbstub64.c @@ -21,6 +21,16 @@ #include "cpu.h" #include "internals.h" #include "gdbstub/helpers.h" +#include "gdbstub/commands.h" +#include "tcg/mte_helper.h" +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX) +#include <sys/prctl.h> +#include "mte_user_helper.h" +#endif +#ifdef CONFIG_TCG +#include "accel/tcg/cpu-mmu-index.h" +#include "exec/target_page.h" +#endif int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { @@ -381,3 +391,234 @@ GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg) return &cpu->dyn_svereg_feature.desc; } + +#ifdef CONFIG_USER_ONLY +int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t tcf0; + + assert(reg == 0); + + tcf0 = extract64(env->cp15.sctlr_el[1], 38, 2); + + return gdb_get_reg64(buf, tcf0); +} + +int aarch64_gdb_set_tag_ctl_reg(CPUState *cs, uint8_t *buf, int reg) +{ +#if defined(CONFIG_LINUX) + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + uint8_t tcf; + + assert(reg == 0); + + tcf = *buf << PR_MTE_TCF_SHIFT; + + if (!tcf) { + return 0; + } + + /* + * 'tag_ctl' register is actually a "pseudo-register" provided by GDB to + * expose options regarding the type of MTE fault that can be controlled at + * runtime. + */ + arm_set_mte_tcf0(env, tcf); + + return 1; +#else + return 0; +#endif +} +#endif /* CONFIG_USER_ONLY */ + +#ifdef CONFIG_TCG +static void handle_q_memtag(GArray *params, void *user_ctx) +{ + ARMCPU *cpu = ARM_CPU(user_ctx); + CPUARMState *env = &cpu->env; + uint32_t mmu_index; + + uint64_t addr = gdb_get_cmd_param(params, 0)->val_ull; + uint64_t len = gdb_get_cmd_param(params, 1)->val_ul; + int type = gdb_get_cmd_param(params, 2)->val_ul; + + uint8_t *tags; + uint8_t addr_tag; + + g_autoptr(GString) str_buf = g_string_new(NULL); + + /* + * GDB does not query multiple tags for a memory range on remote targets, so + * that's not supported either by gdbstub. + */ + if (len != 1) { + gdb_put_packet("E02"); + } + + /* GDB never queries a tag different from an allocation tag (type 1). */ + if (type != 1) { + gdb_put_packet("E03"); + } + + /* Find out the current translation regime for probe. */ + mmu_index = cpu_mmu_index(env_cpu(env), false); + /* Note that tags are packed here (2 tags packed in one byte). */ + tags = allocation_tag_mem_probe(env, mmu_index, addr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, true, 0); + if (!tags) { + /* Address is not in a tagged region. */ + gdb_put_packet("E04"); + return; + } + + /* Unpack tag from byte. */ + addr_tag = load_tag1(addr, tags); + g_string_printf(str_buf, "m%.2x", addr_tag); + + gdb_put_packet(str_buf->str); +} + +static void handle_q_isaddresstagged(GArray *params, void *user_ctx) +{ + ARMCPU *cpu = ARM_CPU(user_ctx); + CPUARMState *env = &cpu->env; + uint32_t mmu_index; + + uint64_t addr = gdb_get_cmd_param(params, 0)->val_ull; + + uint8_t *tags; + const char *reply; + + /* Find out the current translation regime for probe. */ + mmu_index = cpu_mmu_index(env_cpu(env), false); + tags = allocation_tag_mem_probe(env, mmu_index, addr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, true, 0); + reply = tags ? "01" : "00"; + + gdb_put_packet(reply); +} + +static void handle_Q_memtag(GArray *params, void *user_ctx) +{ + ARMCPU *cpu = ARM_CPU(user_ctx); + CPUARMState *env = &cpu->env; + uint32_t mmu_index; + + uint64_t start_addr = gdb_get_cmd_param(params, 0)->val_ull; + uint64_t len = gdb_get_cmd_param(params, 1)->val_ul; + int type = gdb_get_cmd_param(params, 2)->val_ul; + char const *new_tags_str = gdb_get_cmd_param(params, 3)->data; + + uint64_t end_addr; + + int num_new_tags; + uint8_t *tags; + + g_autoptr(GByteArray) new_tags = g_byte_array_new(); + + /* + * Only the allocation tag (i.e. type 1) can be set at the stub side. + */ + if (type != 1) { + gdb_put_packet("E02"); + return; + } + + end_addr = start_addr + (len - 1); /* 'len' is always >= 1 */ + /* Check if request's memory range does not cross page boundaries. */ + if ((start_addr ^ end_addr) & TARGET_PAGE_MASK) { + gdb_put_packet("E03"); + return; + } + + /* + * Get all tags in the page starting from the tag of the start address. + * Note that there are two tags packed into a single byte here. + */ + /* Find out the current translation regime for probe. */ + mmu_index = cpu_mmu_index(env_cpu(env), false); + tags = allocation_tag_mem_probe(env, mmu_index, start_addr, MMU_DATA_STORE, + 1, MMU_DATA_STORE, true, 0); + if (!tags) { + /* Address is not in a tagged region. */ + gdb_put_packet("E04"); + return; + } + + /* Convert tags provided by GDB, 2 hex digits per tag. */ + num_new_tags = strlen(new_tags_str) / 2; + gdb_hextomem(new_tags, new_tags_str, num_new_tags); + + uint64_t address = start_addr; + int new_tag_index = 0; + while (address <= end_addr) { + uint8_t new_tag; + int packed_index; + + /* + * Find packed tag index from unpacked tag index. There are two tags + * in one packed index (one tag per nibble). + */ + packed_index = new_tag_index / 2; + + new_tag = new_tags->data[new_tag_index % num_new_tags]; + store_tag1(address, tags + packed_index, new_tag); + + address += TAG_GRANULE; + new_tag_index++; + } + + gdb_put_packet("OK"); +} + +enum Command { + qMemTags, + qIsAddressTagged, + QMemTags, + NUM_CMDS +}; + +static const GdbCmdParseEntry cmd_handler_table[NUM_CMDS] = { + [qMemTags] = { + .handler = handle_q_memtag, + .cmd_startswith = true, + .cmd = "MemTags:", + .schema = "L,l:l0", + .need_cpu_context = true + }, + [qIsAddressTagged] = { + .handler = handle_q_isaddresstagged, + .cmd_startswith = true, + .cmd = "IsAddressTagged:", + .schema = "L0", + .need_cpu_context = true + }, + [QMemTags] = { + .handler = handle_Q_memtag, + .cmd_startswith = true, + .cmd = "MemTags:", + .schema = "L,l:l:s0", + .need_cpu_context = true + }, +}; +#endif /* CONFIG_TCG */ + +void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *qsupported, + GPtrArray *qtable, GPtrArray *stable) +{ + /* MTE */ +#ifdef CONFIG_TCG + if (cpu_isar_feature(aa64_mte, cpu)) { + g_string_append(qsupported, ";memory-tagging+"); + + g_ptr_array_add(qtable, (gpointer) &cmd_handler_table[qMemTags]); + g_ptr_array_add(qtable, (gpointer) &cmd_handler_table[qIsAddressTagged]); + g_ptr_array_add(stable, (gpointer) &cmd_handler_table[QMemTags]); + } +#endif +} diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h index b992941..d49c63c 100644 --- a/target/arm/gtimer.h +++ b/target/arm/gtimer.h @@ -10,12 +10,14 @@ #define TARGET_ARM_GTIMER_H enum { - GTIMER_PHYS = 0, - GTIMER_VIRT = 1, - GTIMER_HYP = 2, - GTIMER_SEC = 3, - GTIMER_HYPVIRT = 4, -#define NUM_GTIMERS 5 + GTIMER_PHYS = 0, /* CNTP_* ; EL1 physical timer */ + GTIMER_VIRT = 1, /* CNTV_* ; EL1 virtual timer */ + GTIMER_HYP = 2, /* CNTHP_* ; EL2 physical timer */ + GTIMER_SEC = 3, /* CNTPS_* ; EL3 physical timer */ + GTIMER_HYPVIRT = 4, /* CNTHV_* ; EL2 virtual timer ; only if FEAT_VHE */ + GTIMER_S_EL2_PHYS = 5, /* CNTHPS_* ; only if FEAT_SEL2 */ + GTIMER_S_EL2_VIRT = 6, /* CNTHVS_* ; only if FEAT_SEL2 */ +#define NUM_GTIMERS 7 }; #endif diff --git a/target/arm/helper.c b/target/arm/helper.c index ce31957..c311d2d 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -12,26 +12,32 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/helper-proto.h" +#include "exec/page-protection.h" +#include "exec/mmap-lock.h" #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/bitops.h" -#include "qemu/crc32c.h" #include "qemu/qemu-print.h" -#include "exec/exec-all.h" -#include <zlib.h> /* For crc32 */ +#include "exec/cputlb.h" +#include "exec/translation-block.h" #include "hw/irq.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/kvm.h" -#include "sysemu/tcg.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "system/kvm.h" +#include "system/tcg.h" #include "qapi/error.h" #include "qemu/guest-random.h" #ifdef CONFIG_TCG +#include "accel/tcg/probe.h" +#include "accel/tcg/getpc.h" #include "semihosting/common-semi.h" #endif #include "cpregs.h" #include "target/arm/gtimer.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ static void switch_mode(CPUARMState *env, int mode); @@ -219,7 +225,7 @@ static void count_cpreg(gpointer key, gpointer opaque) } } -static gint cpreg_key_compare(gconstpointer a, gconstpointer b) +static gint cpreg_key_compare(gconstpointer a, gconstpointer b, gpointer d) { uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a); uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b); @@ -243,7 +249,7 @@ void init_cpreg_list(ARMCPU *cpu) int arraylen; keys = g_hash_table_get_keys(cpu->cp_regs); - keys = g_list_sort(keys, cpreg_key_compare); + keys = g_list_sort_with_data(keys, cpreg_key_compare, NULL); cpu->cpreg_array_len = 0; @@ -285,7 +291,7 @@ static CPAccessResult access_el3_aa32ns(CPUARMState *env, { if (!is_a64(env) && arm_current_el(env) == 3 && arm_is_secure_below_el3(env)) { - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } return CP_ACCESS_OK; } @@ -310,7 +316,7 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env, return CP_ACCESS_TRAP_EL3; } /* This will be EL1 NS and EL2 NS, which just UNDEF */ - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } /* @@ -365,40 +371,6 @@ static CPAccessResult access_tacr(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } -/* Check for traps from EL1 due to HCR_EL2.TTLB. */ -static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TTLB)) { - return CP_ACCESS_TRAP_EL2; - } - return CP_ACCESS_OK; -} - -/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */ -static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1 && - (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) { - return CP_ACCESS_TRAP_EL2; - } - return CP_ACCESS_OK; -} - -#ifdef TARGET_AARCH64 -/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */ -static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1 && - (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) { - return CP_ACCESS_TRAP_EL2; - } - return CP_ACCESS_OK; -} -#endif - static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = env_archcpu(env); @@ -438,12 +410,15 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } -static int alle1_tlbmask(CPUARMState *env) +int alle1_tlbmask(CPUARMState *env) { /* * Note that the 'ALL' scope must invalidate both stage 1 and * stage 2 translations, whereas most other scopes only invalidate * stage 1 translations. + * + * For AArch32 this is only used for TLBIALLNSNH and VTTBR + * writes, so only needs to apply to NS PL1&0, not S PL1&0. */ return (ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | @@ -452,174 +427,6 @@ static int alle1_tlbmask(CPUARMState *env) ARMMMUIdxBit_Stage2_S); } - -/* IS variants of TLB operations must affect all cores */ -static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_all_cpus_synced(cs); -} - -static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); -} - -static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); -} - -/* - * Non-IS variants of TLB operations are upgraded to - * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to - * force broadcast of these operations. - */ -static bool tlb_force_broadcast(CPUARMState *env) -{ - return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB); -} - -static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate all (TLBIALL) */ - CPUState *cs = env_cpu(env); - - if (tlb_force_broadcast(env)) { - tlb_flush_all_cpus_synced(cs); - } else { - tlb_flush(cs); - } -} - -static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ - CPUState *cs = env_cpu(env); - - value &= TARGET_PAGE_MASK; - if (tlb_force_broadcast(env)) { - tlb_flush_page_all_cpus_synced(cs, value); - } else { - tlb_flush_page(cs, value); - } -} - -static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by ASID (TLBIASID) */ - CPUState *cs = env_cpu(env); - - if (tlb_force_broadcast(env)) { - tlb_flush_all_cpus_synced(cs); - } else { - tlb_flush(cs); - } -} - -static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ - CPUState *cs = env_cpu(env); - - value &= TARGET_PAGE_MASK; - if (tlb_force_broadcast(env)) { - tlb_flush_page_all_cpus_synced(cs, value); - } else { - tlb_flush_page(cs, value); - } -} - -static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_by_mmuidx(cs, alle1_tlbmask(env)); -} - -static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env)); -} - - -static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2); -} - -static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2); -} - -static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2); -} - -static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E2); -} - -static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - static const ARMCPRegInfo cp_reginfo[] = { /* * Define the secure and non-secure FCSE identifier CP registers @@ -729,22 +536,6 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { */ { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* - * MMU TLB control. Note that the wildcarding means we cover not just - * the unified TLB ops but also the dside/iside/inner-shareable variants. - */ - { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, - .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write, - .type = ARM_CP_NO_RAW }, - { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY, - .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write, - .type = ARM_CP_NO_RAW }, - { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY, - .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write, - .type = ARM_CP_NO_RAW }, - { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY, - .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write, - .type = ARM_CP_NO_RAW }, { .name = "PRRR", .cp = 15, .crn = 10, .crm = 2, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NOP }, { .name = "NMRR", .cp = 15, .crn = 10, .crm = 2, @@ -1096,7 +887,7 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } if (el < 2 && (mdcr_el2 & MDCR_TPM)) { return CP_ACCESS_TRAP_EL2; @@ -2113,7 +1904,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .accessfn = pmreg_access, .fgt = FGT_PMCNTEN, - .writefn = pmcntenclr_write, + .writefn = pmcntenclr_write, .raw_writefn = raw_write, .type = ARM_CP_ALIAS | ARM_CP_IO }, { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, @@ -2121,7 +1912,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .fgt = FGT_PMCNTEN, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenclr_write }, + .writefn = pmcntenclr_write, .raw_writefn = raw_write }, { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, .access = PL0_RW, .type = ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), @@ -2238,16 +2029,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write, }, + .writefn = pmintenclr_write, .raw_writefn = raw_write }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write }, + .writefn = pmintenclr_write, .raw_writefn = raw_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, @@ -2328,55 +2119,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0, .fgt = FGT_ISR_EL1, .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, - /* 32 bit ITLB invalidates */ - { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiall_write }, - { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimva_write }, - { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiasid_write }, - /* 32 bit DTLB invalidates */ - { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiall_write }, - { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimva_write }, - { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiasid_write }, - /* 32 bit TLB invalidates */ - { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiall_write }, - { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimva_write }, - { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbiasid_write }, - { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimvaa_write }, -}; - -static const ARMCPRegInfo v7mp_cp_reginfo[] = { - /* 32 bit TLB invalidates, Inner Shareable */ - { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbiall_is_write }, - { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbimva_is_write }, - { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbiasid_is_write }, - { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbimvaa_is_write }, }; static const ARMCPRegInfo pmovsset_cp_reginfo[] = { @@ -2423,7 +2165,7 @@ static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (arm_current_el(env) == 0 && (env->teecr & 1)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } return teecr_access(env, ri, isread); } @@ -2503,14 +2245,14 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri, cntkctl = env->cp15.c14_cntkctl; } if (!extract32(cntkctl, 0, 2)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } break; case 1: if (!isread && ri->state == ARM_CP_STATE_AA32 && arm_is_secure_below_el3(env)) { /* Accesses from 32-bit Secure EL1 UNDEF (*not* trap to EL3!) */ - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } break; case 2: @@ -2519,7 +2261,7 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri, } if (!isread && el < arm_highest_el(env)) { - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } return CP_ACCESS_OK; @@ -2542,7 +2284,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx, /* CNT[PV]CT: not visible from PL0 if EL0[PV]CTEN is zero */ if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } /* fall through */ case 1: @@ -2583,7 +2325,7 @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx, * EL0 if EL0[PV]TEN is zero. */ if (!extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } /* fall through */ @@ -2649,7 +2391,10 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, switch (arm_current_el(env)) { case 1: if (!arm_is_secure(env)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_UNDEFINED; + } + if (arm_is_el2_enabled(env)) { + return CP_ACCESS_UNDEFINED; } if (!(env->cp15.scr_el3 & SCR_ST)) { return CP_ACCESS_TRAP_EL3; @@ -2657,7 +2402,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, return CP_ACCESS_OK; case 0: case 2: - return CP_ACCESS_TRAP; + return CP_ACCESS_UNDEFINED; case 3: return CP_ACCESS_OK; default: @@ -2665,6 +2410,45 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, } } +static CPAccessResult gt_sel2timer_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* + * The AArch64 register view of the secure EL2 timers are mostly + * accessible from EL3 and EL2 although can also be trapped to EL2 + * from EL1 depending on nested virt config. + */ + switch (arm_current_el(env)) { + case 0: /* UNDEFINED */ + return CP_ACCESS_UNDEFINED; + case 1: + if (!arm_is_secure(env)) { + /* UNDEFINED */ + return CP_ACCESS_UNDEFINED; + } else if (arm_hcr_el2_eff(env) & HCR_NV) { + /* Aarch64.SystemAccessTrap(EL2, 0x18) */ + return CP_ACCESS_TRAP_EL2; + } + /* UNDEFINED */ + return CP_ACCESS_UNDEFINED; + case 2: + if (!arm_is_secure(env)) { + /* UNDEFINED */ + return CP_ACCESS_UNDEFINED; + } + return CP_ACCESS_OK; + case 3: + if (env->cp15.scr_el3 & SCR_EEL2) { + return CP_ACCESS_OK; + } else { + return CP_ACCESS_UNDEFINED; + } + default: + g_assert_not_reached(); + } +} + uint64_t gt_get_countervalue(CPUARMState *env) { ARMCPU *cpu = env_archcpu(env); @@ -2716,12 +2500,80 @@ static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env) return 0; } -static uint64_t gt_phys_cnt_offset(CPUARMState *env) +static uint64_t gt_indirect_access_timer_offset(CPUARMState *env, int timeridx) { - if (arm_current_el(env) >= 2) { + /* + * Return the timer offset to use for indirect accesses to the timer. + * This is the Offset value as defined in D12.2.4.1 "Operation of the + * CompareValue views of the timers". + * + * The condition here is not always the same as the condition for + * whether to apply an offset register when doing a direct read of + * the counter sysreg; those conditions are described in the + * access pseudocode for each counter register. + */ + switch (timeridx) { + case GTIMER_PHYS: + return gt_phys_raw_cnt_offset(env); + case GTIMER_VIRT: + return env->cp15.cntvoff_el2; + case GTIMER_HYP: + case GTIMER_SEC: + case GTIMER_HYPVIRT: + case GTIMER_S_EL2_PHYS: + case GTIMER_S_EL2_VIRT: return 0; + default: + g_assert_not_reached(); + } +} + +uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx) +{ + /* + * Return the timer offset to use for direct accesses to the + * counter registers CNTPCT and CNTVCT, and for direct accesses + * to the CNT*_TVAL registers. + * + * This isn't exactly the same as the indirect-access offset, + * because here we also care about what EL the register access + * is being made from. + * + * This corresponds to the access pseudocode for the registers. + */ + uint64_t hcr; + + switch (timeridx) { + case GTIMER_PHYS: + if (arm_current_el(env) >= 2) { + return 0; + } + return gt_phys_raw_cnt_offset(env); + case GTIMER_VIRT: + switch (arm_current_el(env)) { + case 2: + hcr = arm_hcr_el2_eff(env); + if (hcr & HCR_E2H) { + return 0; + } + break; + case 0: + hcr = arm_hcr_el2_eff(env); + if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { + return 0; + } + break; + } + return env->cp15.cntvoff_el2; + case GTIMER_HYP: + case GTIMER_SEC: + case GTIMER_HYPVIRT: + case GTIMER_S_EL2_PHYS: + case GTIMER_S_EL2_VIRT: + return 0; + default: + g_assert_not_reached(); } - return gt_phys_raw_cnt_offset(env); } static void gt_recalc_timer(ARMCPU *cpu, int timeridx) @@ -2733,8 +2585,7 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) * Timer enabled: calculate and set current ISTATUS, irq, and * reset timer to when ISTATUS next has to change */ - uint64_t offset = timeridx == GTIMER_VIRT ? - cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env); + uint64_t offset = gt_indirect_access_timer_offset(&cpu->env, timeridx); uint64_t count = gt_get_countervalue(&cpu->env); /* Note that this must be unsigned 64 bit arithmetic: */ int istatus = count - offset >= gt->cval; @@ -2797,34 +2648,14 @@ static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri, static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { - return gt_get_countervalue(env) - gt_phys_cnt_offset(env); -} - -uint64_t gt_virt_cnt_offset(CPUARMState *env) -{ - uint64_t hcr; - - switch (arm_current_el(env)) { - case 2: - hcr = arm_hcr_el2_eff(env); - if (hcr & HCR_E2H) { - return 0; - } - break; - case 0: - hcr = arm_hcr_el2_eff(env); - if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { - return 0; - } - break; - } - - return env->cp15.cntvoff_el2; + uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_PHYS); + return gt_get_countervalue(env) - offset; } static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { - return gt_get_countervalue(env) - gt_virt_cnt_offset(env); + uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT); + return gt_get_countervalue(env) - offset; } static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2836,47 +2667,38 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, gt_recalc_timer(env_archcpu(env), timeridx); } -static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri, - int timeridx) +static uint64_t do_tval_read(CPUARMState *env, int timeridx, uint64_t offset) { - uint64_t offset = 0; - - switch (timeridx) { - case GTIMER_VIRT: - case GTIMER_HYPVIRT: - offset = gt_virt_cnt_offset(env); - break; - case GTIMER_PHYS: - offset = gt_phys_cnt_offset(env); - break; - } - return (uint32_t)(env->cp15.c14_timer[timeridx].cval - (gt_get_countervalue(env) - offset)); } -static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, - int timeridx, - uint64_t value) +static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri, + int timeridx) { - uint64_t offset = 0; + uint64_t offset = gt_direct_access_timer_offset(env, timeridx); - switch (timeridx) { - case GTIMER_VIRT: - case GTIMER_HYPVIRT: - offset = gt_virt_cnt_offset(env); - break; - case GTIMER_PHYS: - offset = gt_phys_cnt_offset(env); - break; - } + return do_tval_read(env, timeridx, offset); +} +static void do_tval_write(CPUARMState *env, int timeridx, uint64_t value, + uint64_t offset) +{ trace_arm_gt_tval_write(timeridx, value); env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset + sextract64(value, 0, 32); gt_recalc_timer(env_archcpu(env), timeridx); } +static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, + int timeridx, + uint64_t value) +{ + uint64_t offset = gt_direct_access_timer_offset(env, timeridx); + + do_tval_write(env, timeridx, value, offset); +} + static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, int timeridx, uint64_t value) @@ -3006,13 +2828,21 @@ static void gt_virt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, static uint64_t gt_virt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri) { - return gt_tval_read(env, ri, GTIMER_VIRT); + /* + * This is CNTV_TVAL_EL02; unlike the underlying CNTV_TVAL_EL0 + * we always apply CNTVOFF_EL2. Special case that here rather + * than going into the generic gt_tval_read() and then having + * to re-detect that it's this register. + * Note that the accessfn/perms mean we know we're at EL2 or EL3 here. + */ + return do_tval_read(env, GTIMER_VIRT, env->cp15.cntvoff_el2); } static void gt_virt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - gt_tval_write(env, ri, GTIMER_VIRT, value); + /* Similarly for writes to CNTV_TVAL_EL02 */ + do_tval_write(env, GTIMER_VIRT, value, env->cp15.cntvoff_el2); } static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -3172,6 +3002,62 @@ static void gt_sec_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, gt_ctl_write(env, ri, GTIMER_SEC, value); } +static void gt_sec_pel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + gt_timer_reset(env, ri, GTIMER_S_EL2_PHYS); +} + +static void gt_sec_pel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_cval_write(env, ri, GTIMER_S_EL2_PHYS, value); +} + +static uint64_t gt_sec_pel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_tval_read(env, ri, GTIMER_S_EL2_PHYS); +} + +static void gt_sec_pel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_tval_write(env, ri, GTIMER_S_EL2_PHYS, value); +} + +static void gt_sec_pel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_ctl_write(env, ri, GTIMER_S_EL2_PHYS, value); +} + +static void gt_sec_vel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + gt_timer_reset(env, ri, GTIMER_S_EL2_VIRT); +} + +static void gt_sec_vel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_cval_write(env, ri, GTIMER_S_EL2_VIRT, value); +} + +static uint64_t gt_sec_vel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_tval_read(env, ri, GTIMER_S_EL2_VIRT); +} + +static void gt_sec_vel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_tval_write(env, ri, GTIMER_S_EL2_VIRT, value); +} + +static void gt_sec_vel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + gt_ctl_write(env, ri, GTIMER_S_EL2_VIRT, value); +} + static void gt_hv_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri) { gt_timer_reset(env, ri, GTIMER_HYPVIRT); @@ -3228,6 +3114,20 @@ void arm_gt_stimer_cb(void *opaque) gt_recalc_timer(cpu, GTIMER_SEC); } +void arm_gt_sel2timer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + gt_recalc_timer(cpu, GTIMER_S_EL2_PHYS); +} + +void arm_gt_sel2vtimer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + gt_recalc_timer(cpu, GTIMER_S_EL2_VIRT); +} + void arm_gt_hvtimer_cb(void *opaque) { ARMCPU *cpu = opaque; @@ -3568,7 +3468,7 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, } return CP_ACCESS_TRAP_EL3; } - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } } return CP_ACCESS_OK; @@ -3599,11 +3499,12 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, GetPhysAddrResult res = {}; /* - * I_MXTJT: Granule protection checks are not performed on the final address - * of a successful translation. + * I_MXTJT: Granule protection checks are not performed on the final + * address of a successful translation. This is a translation not a + * memory reference, so "memop = none = 0". */ - ret = get_phys_addr_with_space_nogpc(env, value, access_type, mmu_idx, ss, - &res, &fi); + ret = get_phys_addr_with_space_nogpc(env, value, access_type, 0, + mmu_idx, ss, &res, &fi); /* * ATS operations only do S1 or S1+S2 translations, so we never @@ -3775,7 +3676,11 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_E3; + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_E30_3_PAN; + } else { + mmu_idx = ARMMMUIdx_E3; + } break; case 2: g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ @@ -3795,7 +3700,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_E10_0; + mmu_idx = ARMMMUIdx_E30_0; break; case 2: g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ @@ -3860,7 +3765,7 @@ static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, * scr_write() ensures that the NSE bit is not set otherwise. */ if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { - return CP_ACCESS_TRAP; + return CP_ACCESS_UNDEFINED; } return CP_ACCESS_OK; } @@ -3870,7 +3775,7 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, { if (arm_current_el(env) == 3 && !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { - return CP_ACCESS_TRAP; + return CP_ACCESS_UNDEFINED; } return at_e012_access(env, ri, isread); } @@ -4758,7 +4663,7 @@ static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } return CP_ACCESS_OK; } @@ -4848,9 +4753,9 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env, /* Cache invalidate/clean to Point of Coherency or Persistence... */ switch (arm_current_el(env)) { case 0: - /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */ + /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set. */ if (!(arm_sctlr(env, 0) & SCTLR_UCI)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } /* fall through */ case 1: @@ -4868,9 +4773,9 @@ static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags) /* Cache invalidate/clean to Point of Unification... */ switch (arm_current_el(env)) { case 0: - /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */ + /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set. */ if (!(arm_sctlr(env, 0) & SCTLR_UCI)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } /* fall through */ case 1: @@ -4895,489 +4800,6 @@ static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri, return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU); } -/* - * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions - * Page D4-1736 (DDI0487A.b) - */ - -static int vae1_tlbmask(CPUARMState *env) -{ - uint64_t hcr = arm_hcr_el2_eff(env); - uint16_t mask; - - if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { - mask = ARMMMUIdxBit_E20_2 | - ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; - } else { - mask = ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; - } - return mask; -} - -static int vae2_tlbmask(CPUARMState *env) -{ - uint64_t hcr = arm_hcr_el2_eff(env); - uint16_t mask; - - if (hcr & HCR_E2H) { - mask = ARMMMUIdxBit_E20_2 | - ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; - } else { - mask = ARMMMUIdxBit_E2; - } - return mask; -} - -/* Return 56 if TBI is enabled, 64 otherwise. */ -static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, - uint64_t addr) -{ - uint64_t tcr = regime_tcr(env, mmu_idx); - int tbi = aa64_va_parameter_tbi(tcr, mmu_idx); - int select = extract64(addr, 55, 1); - - return (tbi >> select) & 1 ? 56 : 64; -} - -static int vae1_tlbbits(CPUARMState *env, uint64_t addr) -{ - uint64_t hcr = arm_hcr_el2_eff(env); - ARMMMUIdx mmu_idx; - - /* Only the regime of the mmu_idx below is significant. */ - if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { - mmu_idx = ARMMMUIdx_E20_0; - } else { - mmu_idx = ARMMMUIdx_E10_0; - } - - return tlbbits_for_regime(env, mmu_idx, addr); -} - -static int vae2_tlbbits(CPUARMState *env, uint64_t addr) -{ - uint64_t hcr = arm_hcr_el2_eff(env); - ARMMMUIdx mmu_idx; - - /* - * Only the regime of the mmu_idx below is significant. - * Regime EL2&0 has two ranges with separate TBI configuration, while EL2 - * only has one. - */ - if (hcr & HCR_E2H) { - mmu_idx = ARMMMUIdx_E20_2; - } else { - mmu_idx = ARMMMUIdx_E2; - } - - return tlbbits_for_regime(env, mmu_idx, addr); -} - -static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = vae1_tlbmask(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); -} - -static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = vae1_tlbmask(env); - - if (tlb_force_broadcast(env)) { - tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); - } else { - tlb_flush_by_mmuidx(cs, mask); - } -} - -static int e2_tlbmask(CPUARMState *env) -{ - return (ARMMMUIdxBit_E20_0 | - ARMMMUIdxBit_E20_2 | - ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2); -} - -static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = alle1_tlbmask(env); - - tlb_flush_by_mmuidx(cs, mask); -} - -static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = e2_tlbmask(env); - - tlb_flush_by_mmuidx(cs, mask); -} - -static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); -} - -static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = alle1_tlbmask(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); -} - -static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = e2_tlbmask(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); -} - -static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); -} - -static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA, EL2 - * Currently handles both VAE2 and VALE2, since we don't support - * flush-last-level-only. - */ - CPUState *cs = env_cpu(env); - int mask = vae2_tlbmask(env); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = vae2_tlbbits(env, pageaddr); - - tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); -} - -static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA, EL3 - * Currently handles both VAE3 and VALE3, since we don't support - * flush-last-level-only. - */ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); -} - -static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = vae1_tlbmask(env); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = vae1_tlbbits(env, pageaddr); - - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); -} - -static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA, EL1&0 (AArch64 version). - * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - CPUState *cs = env_cpu(env); - int mask = vae1_tlbmask(env); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = vae1_tlbbits(env, pageaddr); - - if (tlb_force_broadcast(env)) { - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); - } else { - tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); - } -} - -static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = vae2_tlbmask(env); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = vae2_tlbbits(env, pageaddr); - - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); -} - -static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); - - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E3, bits); -} - -static int ipas2e1_tlbmask(CPUARMState *env, int64_t value) -{ - /* - * The MSB of value is the NS field, which only applies if SEL2 - * is implemented and SCR_EL3.NS is not set (i.e. in secure mode). - */ - return (value >= 0 - && cpu_isar_feature(aa64_sel2, env_archcpu(env)) - && arm_is_secure_below_el3(env) - ? ARMMMUIdxBit_Stage2_S - : ARMMMUIdxBit_Stage2); -} - -static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = ipas2e1_tlbmask(env, value); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - if (tlb_force_broadcast(env)) { - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); - } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, mask); - } -} - -static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - int mask = ipas2e1_tlbmask(env, value); - uint64_t pageaddr = sextract64(value << 12, 0, 56); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); -} - -#ifdef TARGET_AARCH64 -typedef struct { - uint64_t base; - uint64_t length; -} TLBIRange; - -static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg) -{ - /* - * Note that the TLBI range TG field encoding differs from both - * TG0 and TG1 encodings. - */ - switch (tg) { - case 1: - return Gran4K; - case 2: - return Gran16K; - case 3: - return Gran64K; - default: - return GranInvalid; - } -} - -static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx, - uint64_t value) -{ - unsigned int page_size_granule, page_shift, num, scale, exponent; - /* Extract one bit to represent the va selector in use. */ - uint64_t select = sextract64(value, 36, 1); - ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false); - TLBIRange ret = { }; - ARMGranuleSize gran; - - page_size_granule = extract64(value, 46, 2); - gran = tlbi_range_tg_to_gran_size(page_size_granule); - - /* The granule encoded in value must match the granule in use. */ - if (gran != param.gran) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n", - page_size_granule); - return ret; - } - - page_shift = arm_granule_bits(gran); - num = extract64(value, 39, 5); - scale = extract64(value, 44, 2); - exponent = (5 * scale) + 1; - - ret.length = (num + 1) << (exponent + page_shift); - - if (param.select) { - ret.base = sextract64(value, 0, 37); - } else { - ret.base = extract64(value, 0, 37); - } - if (param.ds) { - /* - * With DS=1, BaseADDR is always shifted 16 so that it is able - * to address all 52 va bits. The input address is perforce - * aligned on a 64k boundary regardless of translation granule. - */ - page_shift = 16; - } - ret.base <<= page_shift; - - return ret; -} - -static void do_rvae_write(CPUARMState *env, uint64_t value, - int idxmap, bool synced) -{ - ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap); - TLBIRange range; - int bits; - - range = tlbi_aa64_get_range(env, one_idx, value); - bits = tlbbits_for_regime(env, one_idx, range.base); - - if (synced) { - tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env), - range.base, - range.length, - idxmap, - bits); - } else { - tlb_flush_range_by_mmuidx(env_cpu(env), range.base, - range.length, idxmap, bits); - } -} - -static void tlbi_aa64_rvae1_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, EL1&0. - * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - - do_rvae_write(env, value, vae1_tlbmask(env), - tlb_force_broadcast(env)); -} - -static void tlbi_aa64_rvae1is_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, Inner/Outer Shareable EL1&0. - * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS, - * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support - * flush-for-specific-ASID-only, flush-last-level-only or inner/outer - * shareable specific flushes. - */ - - do_rvae_write(env, value, vae1_tlbmask(env), true); -} - -static void tlbi_aa64_rvae2_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, EL2. - * Currently handles all of RVAE2 and RVALE2, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - - do_rvae_write(env, value, vae2_tlbmask(env), - tlb_force_broadcast(env)); - - -} - -static void tlbi_aa64_rvae2is_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, Inner/Outer Shareable, EL2. - * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS, - * since we don't support flush-for-specific-ASID-only, - * flush-last-level-only or inner/outer shareable specific flushes. - */ - - do_rvae_write(env, value, vae2_tlbmask(env), true); - -} - -static void tlbi_aa64_rvae3_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, EL3. - * Currently handles all of RVAE3 and RVALE3, - * since we don't support flush-for-specific-ASID-only or - * flush-last-level-only. - */ - - do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); -} - -static void tlbi_aa64_rvae3is_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * Invalidate by VA range, EL3, Inner/Outer Shareable. - * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS, - * since we don't support flush-for-specific-ASID-only, - * flush-last-level-only or inner/outer specific flushes. - */ - - do_rvae_write(env, value, ARMMMUIdxBit_E3, true); -} - -static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - do_rvae_write(env, value, ipas2e1_tlbmask(env, value), - tlb_force_broadcast(env)); -} - -static void tlbi_aa64_ripas2e1is_write(CPUARMState *env, - const ARMCPRegInfo *ri, - uint64_t value) -{ - do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true); -} -#endif - static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -5393,7 +4815,7 @@ static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, } } else { if (!(env->cp15.sctlr_el[1] & SCTLR_DZE)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } if (hcr & HCR_TDZ) { return CP_ACCESS_TRAP_EL2; @@ -5426,7 +4848,7 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, * Access to SP_EL0 is undefined if it's being used as * the stack pointer. */ - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } return CP_ACCESS_OK; } @@ -5568,7 +4990,7 @@ static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri, mmap_lock(); - tb_invalidate_phys_range(start_address, end_address); + tb_invalidate_phys_range(env_cpu(env), start_address, end_address); mmap_unlock(); } @@ -5590,7 +5012,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, { .name = "FPCR", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, - .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END, + .access = PL0_RW, .type = ARM_CP_FPU, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, { .name = "FPSR", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, @@ -5672,99 +5094,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, .fgt = FGT_DCCISW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, - /* TLBI operations */ - { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVMALLE1IS, - .writefn = tlbi_aa64_vmalle1is_write }, - { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAE1IS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIASIDE1IS, - .writefn = tlbi_aa64_vmalle1is_write }, - { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAAE1IS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVALE1IS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAALE1IS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVMALLE1, - .writefn = tlbi_aa64_vmalle1_write }, - { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAE1, - .writefn = tlbi_aa64_vae1_write }, - { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIASIDE1, - .writefn = tlbi_aa64_vmalle1_write }, - { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAAE1, - .writefn = tlbi_aa64_vae1_write }, - { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVALE1, - .writefn = tlbi_aa64_vae1_write }, - { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAALE1, - .writefn = tlbi_aa64_vae1_write }, - { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, - { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, - { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1is_write }, - { .name = "TLBI_VMALLS12E1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1is_write }, - { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, - { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, - { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1_write }, - { .name = "TLBI_VMALLS12E1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1is_write }, #ifndef CONFIG_USER_ONLY /* 64 bit address translation operations */ { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, @@ -5820,42 +5149,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.par_el[1]), .writefn = par_write }, #endif - /* TLB invalidate last level of translation table walk */ - { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbimva_is_write }, - { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, - .writefn = tlbimvaa_is_write }, - { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimva_write }, - { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, - .writefn = tlbimvaa_write }, - { .name = "TLBIMVALH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbimva_hyp_write }, - { .name = "TLBIMVALHIS", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbimva_hyp_is_write }, - { .name = "TLBIIPAS2", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_hyp_write }, - { .name = "TLBIIPAS2IS", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2is_hyp_write }, - { .name = "TLBIIPAS2L", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_hyp_write }, - { .name = "TLBIIPAS2LIS", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2is_hyp_write }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab }, @@ -6038,6 +5331,11 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) /* Clear RES0 bits. */ value &= valid_mask; + /* RW is RAO/WI if EL1 is AArch64 only */ + if (!cpu_isar_feature(aa64_aa32_el1, cpu)) { + value |= HCR_RW; + } + /* * These bits change the MMU setup: * HCR_VM enables stage 2 translation @@ -6095,6 +5393,12 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, do_hcr_write(env, value, MAKE_64BIT_MASK(32, 32)); } +static void hcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* hcr_write will set the RES1 bits on an AArch64-only CPU */ + hcr_write(env, ri, 0); +} + /* * Return the effective value of HCR_EL2, at the given security state. * Bits that are not included here: @@ -6216,6 +5520,14 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, if (cpu_isar_feature(aa64_nmi, cpu)) { valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI; } + /* FEAT_CMOW adds CMOW */ + if (cpu_isar_feature(aa64_cmow, cpu)) { + valid_mask |= HCRX_CMOW; + } + /* FEAT_XS adds FGTnXS, FnXS */ + if (cpu_isar_feature(aa64_xs, cpu)) { + valid_mask |= HCRX_FGTNXS | HCRX_FNXS; + } /* Clear RES0 bits. */ env->cp15.hcrx_el2 = value & valid_mask; @@ -6322,6 +5634,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), .nv2_redirect_offset = 0x78, + .resetfn = hcr_reset, .writefn = hcr_write, .raw_writefn = raw_write }, { .name = "HCR", .state = ARM_CP_STATE_AA32, .type = ARM_CP_ALIAS | ARM_CP_IO, @@ -6437,50 +5750,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, - { .name = "TLBIALLNSNH", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiall_nsnh_write }, - { .name = "TLBIALLNSNHIS", - .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiall_nsnh_is_write }, - { .name = "TLBIALLH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiall_hyp_write }, - { .name = "TLBIALLHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiall_hyp_is_write }, - { .name = "TLBIMVAH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbimva_hyp_write }, - { .name = "TLBIMVAHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbimva_hyp_is_write }, - { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_alle2_write }, - { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2_write }, - { .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2_write }, - { .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_alle2is_write }, - { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2is_write }, - { .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2is_write }, #ifndef CONFIG_USER_ONLY /* * Unlike the other EL2-related AT operations, these must @@ -6581,7 +5850,7 @@ static CPAccessResult sel2_access(CPUARMState *env, const ARMCPRegInfo *ri, if (arm_current_el(env) == 3 || arm_is_secure_below_el3(env)) { return CP_ACCESS_OK; } - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } static const ARMCPRegInfo el2_sec_cp_reginfo[] = { @@ -6595,6 +5864,56 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = { .access = PL2_RW, .accessfn = sel2_access, .nv2_redirect_offset = 0x48, .fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) }, +#ifndef CONFIG_USER_ONLY + /* Secure EL2 Physical Timer */ + { .name = "CNTHPS_TVAL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .readfn = gt_sec_pel2_tval_read, + .writefn = gt_sec_pel2_tval_write, + .resetfn = gt_sec_pel2_timer_reset, + }, + { .name = "CNTHPS_CTL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 1, + .type = ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].ctl), + .resetvalue = 0, + .writefn = gt_sec_pel2_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTHPS_CVAL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 2, + .type = ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].cval), + .writefn = gt_sec_pel2_cval_write, .raw_writefn = raw_write, + }, + /* Secure EL2 Virtual Timer */ + { .name = "CNTHVS_TVAL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .readfn = gt_sec_vel2_tval_read, + .writefn = gt_sec_vel2_tval_write, + .resetfn = gt_sec_vel2_timer_reset, + }, + { .name = "CNTHVS_CTL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 1, + .type = ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].ctl), + .resetvalue = 0, + .writefn = gt_sec_vel2_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTHVS_CVAL_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 2, + .type = ARM_CP_IO, .access = PL2_RW, + .accessfn = gt_sel2timer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].cval), + .writefn = gt_sec_vel2_cval_write, .raw_writefn = raw_write, + }, +#endif }; static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -6617,7 +5936,7 @@ static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri, if (isread) { return CP_ACCESS_OK; } - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } static const ARMCPRegInfo el3_cp_reginfo[] = { @@ -6693,30 +6012,6 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 1, .opc2 = 1, .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "TLBI_ALLE3IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle3is_write }, - { .name = "TLBI_VAE3IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3is_write }, - { .name = "TLBI_VALE3IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3is_write }, - { .name = "TLBI_ALLE3", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 0, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle3_write }, - { .name = "TLBI_VAE3", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3_write }, - { .name = "TLBI_VALE3", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3_write }, }; #ifndef CONFIG_USER_ONLY @@ -6729,7 +6024,7 @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } return CP_ACCESS_OK; } @@ -6827,7 +6122,7 @@ static CPAccessResult el2_e2h_e12_access(CPUARMState *env, } /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */ if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { - return CP_ACCESS_TRAP_UNCATEGORIZED; + return CP_ACCESS_UNDEFINED; } if (ri->orig_accessfn) { return ri->orig_accessfn(env, ri->opaque, isread); @@ -7004,7 +6299,7 @@ static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, } } else { if (!(env->cp15.sctlr_el[1] & SCTLR_UCT)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } if (hcr & HCR_TID2) { return CP_ACCESS_TRAP_EL2; @@ -7034,7 +6329,7 @@ static CPAccessResult access_terr(CPUARMState *env, const ARMCPRegInfo *ri, if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TERR)) { return CP_ACCESS_TRAP_EL2; } - if (el < 3 && (env->cp15.scr_el3 & SCR_TERR)) { + if (!arm_is_el3_or_mon(env) && (env->cp15.scr_el3 & SCR_TERR)) { return CP_ACCESS_TRAP_EL3; } return CP_ACCESS_OK; @@ -7232,7 +6527,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm) if (el <= 1 && !el_is_in_host(env, el)) { len = MIN(len, 0xf & (uint32_t)cr[1]); } - if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) { + if (el <= 2 && arm_is_el2_enabled(env)) { len = MIN(len, 0xf & (uint32_t)cr[2]); } if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -7294,7 +6589,6 @@ static const ARMCPRegInfo zcr_reginfo[] = { .writefn = zcr_write, .raw_writefn = raw_write }, }; -#ifdef TARGET_AARCH64 static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7303,7 +6597,7 @@ static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri, if (el == 0) { uint64_t sctlr = arm_sctlr(env, el); if (!(sctlr & SCTLR_EnTP2)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } } /* TODO: FEAT_FGT */ @@ -7344,7 +6638,7 @@ static void arm_reset_sve_state(CPUARMState *env) memset(env->vfp.zregs, 0, sizeof(env->vfp.zregs)); /* Recall that FFR is stored as pregs[16]. */ memset(env->vfp.pregs, 0, sizeof(env->vfp.pregs)); - vfp_set_fpcr(env, 0x0800009f); + vfp_set_fpsr(env, 0x0800009f); } void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask) @@ -7459,14 +6753,6 @@ static const ARMCPRegInfo sme_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, }; -static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush(cs); -} - static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -7484,14 +6770,6 @@ static void gpccr_reset(CPUARMState *env, const ARMCPRegInfo *ri) env_archcpu(env)->reset_l0gptsz); } -static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - - tlb_flush_all_cpus_synced(cs); -} - static const ARMCPRegInfo rme_reginfo[] = { { .name = "GPCCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 6, @@ -7503,28 +6781,6 @@ static const ARMCPRegInfo rme_reginfo[] = { { .name = "MFAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 5, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mfar_el3) }, - { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_paall_write }, - { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_paallos_write }, - /* - * QEMU does not have a way to invalidate by physical address, thus - * invalidating a range of physical addresses is accomplished by - * flushing all tlb entries in the outer shareable domain, - * just like PAALLOS. - */ - { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_paallos_write }, - { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_paallos_write }, { .name = "DC_CIPAPA", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 1, .access = PL3_W, .type = ARM_CP_NOP }, @@ -7566,7 +6822,6 @@ static const ARMCPRegInfo nmi_reginfo[] = { .writefn = aa64_allint_write, .readfn = aa64_allint_read, .resetfn = arm_cp_reset_ignore }, }; -#endif /* TARGET_AARCH64 */ static void define_pmu_regs(ARMCPU *cpu) { @@ -7677,7 +6932,7 @@ static void define_pmu_regs(ARMCPU *cpu) static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr1 = cpu->isar.id_pfr1; + uint64_t pfr1 = GET_IDREG(&cpu->isar, ID_PFR1); if (env->gicv3state) { pfr1 |= 1 << 28; @@ -7688,7 +6943,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr0 = cpu->isar.id_aa64pfr0; + uint64_t pfr0 = GET_IDREG(&cpu->isar, ID_AA64PFR0); if (env->gicv3state) { pfr0 |= 1 << 24; @@ -7719,8 +6974,8 @@ static CPAccessResult access_lor_other(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (arm_is_secure_below_el3(env)) { - /* Access denied in secure mode. */ - return CP_ACCESS_TRAP; + /* UNDEF if SCR_EL3.NS == 0 */ + return CP_ACCESS_UNDEFINED; } return access_lor_ns(env, ri, isread); } @@ -7758,7 +7013,6 @@ static const ARMCPRegInfo lor_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, }; -#ifdef TARGET_AARCH64 static CPAccessResult access_pauth(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7830,210 +7084,6 @@ static const ARMCPRegInfo pauth_reginfo[] = { .fieldoffset = offsetof(CPUARMState, keys.apib.hi) }, }; -static const ARMCPRegInfo tlbirange_reginfo[] = { - { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAE1IS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAAE1IS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVALE1IS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAALE1IS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAE1OS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAAE1OS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVALE1OS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAALE1OS, - .writefn = tlbi_aa64_rvae1is_write }, - { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAE1, - .writefn = tlbi_aa64_rvae1_write }, - { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAAE1, - .writefn = tlbi_aa64_rvae1_write }, - { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVALE1, - .writefn = tlbi_aa64_rvae1_write }, - { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIRVAALE1, - .writefn = tlbi_aa64_rvae1_write }, - { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ripas2e1is_write }, - { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ripas2e1is_write }, - { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2is_write }, - { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2is_write }, - { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ripas2e1_write }, - { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ripas2e1_write }, - { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2is_write }, - { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2is_write }, - { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2_write }, - { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_rvae2_write }, - { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3is_write }, - { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3is_write }, - { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3is_write }, - { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3is_write }, - { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3_write }, - { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_rvae3_write }, -}; - -static const ARMCPRegInfo tlbios_reginfo[] = { - { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVMALLE1OS, - .writefn = tlbi_aa64_vmalle1is_write }, - { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1, - .fgt = FGT_TLBIVAE1OS, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIASIDE1OS, - .writefn = tlbi_aa64_vmalle1is_write }, - { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAAE1OS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVALE1OS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, - .fgt = FGT_TLBIVAALE1OS, - .writefn = tlbi_aa64_vae1is_write }, - { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_alle2is_write }, - { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2is_write }, - { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1is_write }, - { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = tlbi_aa64_vae2is_write }, - { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle1is_write }, - { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NOP }, - { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3, - .access = PL2_W, .type = ARM_CP_NOP }, - { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NOP }, - { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7, - .access = PL2_W, .type = ARM_CP_NOP }, - { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_alle3is_write }, - { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3is_write }, - { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5, - .access = PL3_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_vae3is_write }, -}; - static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) { Error *err = NULL; @@ -8345,7 +7395,7 @@ static CPAccessResult access_scxtnum(CPUARMState *env, const ARMCPRegInfo *ri, if (hcr & HCR_TGE) { return CP_ACCESS_TRAP_EL2; } - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } } else if (el < 2 && (env->cp15.sctlr_el[2] & SCTLR_TSCXT)) { return CP_ACCESS_TRAP_EL2; @@ -8455,8 +7505,6 @@ static const ARMCPRegInfo nv2_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) }, }; -#endif /* TARGET_AARCH64 */ - static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -8465,7 +7513,7 @@ static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, if (el == 0) { uint64_t sctlr = arm_sctlr(env, el); if (!(sctlr & SCTLR_EnRCTX)) { - return CP_ACCESS_TRAP; + return CP_ACCESS_TRAP_EL1; } } else if (el == 1) { uint64_t hcr = arm_hcr_el2_eff(env); @@ -8702,6 +7750,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) { /* Register all the coprocessor registers based on feature bits */ CPUARMState *env = &cpu->env; + ARMISARegisters *isar = &cpu->isar; + if (arm_feature(env, ARM_FEATURE_M)) { /* M profile has no coprocessor registers */ return; @@ -8716,6 +7766,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, not_v8_cp_reginfo); } +#ifndef CONFIG_USER_ONLY + define_tlb_insn_regs(cpu); +#endif + if (arm_feature(env, ARM_FEATURE_V6)) { /* The ID registers all have impdef reset values */ ARMCPRegInfo v6_idregs[] = { @@ -8723,7 +7777,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_pfr0 }, + .resetvalue = GET_IDREG(isar, ID_PFR0)}, /* * ID_PFR1 is not a plain ARM_CP_CONST because we don't know * the value of the GIC field until after we define these regs. @@ -8734,7 +7788,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .accessfn = access_aa32_tid3, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_pfr1, + .resetvalue = GET_IDREG(isar, ID_PFR1), #else .type = ARM_CP_NO_RAW, .accessfn = access_aa32_tid3, @@ -8746,7 +7800,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_dfr0 }, + .resetvalue = GET_IDREG(isar, ID_DFR0)}, { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8756,62 +7810,62 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_MMFR0)}, { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_MMFR1)}, { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_MMFR2)}, { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_MMFR3)}, { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar0 }, + .resetvalue = GET_IDREG(isar, ID_ISAR0)}, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar1 }, + .resetvalue = GET_IDREG(isar, ID_ISAR1)}, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar2 }, + .resetvalue = GET_IDREG(isar, ID_ISAR2)}, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar3 }, + .resetvalue = GET_IDREG(isar, ID_ISAR3) }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar4 }, + .resetvalue = GET_IDREG(isar, ID_ISAR4) }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar5 }, + .resetvalue = GET_IDREG(isar, ID_ISAR5) }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr4 }, + .resetvalue = GET_IDREG(isar, ID_MMFR4)}, { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar6 }, + .resetvalue = GET_IDREG(isar, ID_ISAR6) }, }; define_arm_cp_regs(cpu, v6_idregs); define_arm_cp_regs(cpu, v6_cp_reginfo); @@ -8821,10 +7875,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_V6K)) { define_arm_cp_regs(cpu, v6k_cp_reginfo); } - if (arm_feature(env, ARM_FEATURE_V7MP) && - !arm_feature(env, ARM_FEATURE_PMSA)) { - define_arm_cp_regs(cpu, v7mp_cp_reginfo); - } if (arm_feature(env, ARM_FEATURE_V7VE)) { define_arm_cp_regs(cpu, pmovsset_cp_reginfo); } @@ -8866,7 +7916,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_aa64pfr0 + .resetvalue = GET_IDREG(isar, ID_AA64PFR0) #else .type = ARM_CP_NO_RAW, .accessfn = access_aa64_tid3, @@ -8878,7 +7928,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64pfr1}, + .resetvalue = GET_IDREG(isar, ID_AA64PFR1)}, { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -8893,12 +7943,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64zfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ZFR0)}, { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64smfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64SMFR0)}, { .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -8913,12 +7963,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR0) }, { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR1) }, { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -8953,17 +8003,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR0)}, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar1 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR1)}, { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar2 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR2)}, { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8993,22 +8043,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR0)}, { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR1) }, { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR2) }, { .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR3) }, { .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, @@ -9080,17 +8130,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_pfr2 }, + .resetvalue = GET_IDREG(isar, ID_PFR2)}, { .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_dfr1 }, + .resetvalue = GET_IDREG(isar, ID_DFR1)}, { .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_mmfr5 }, + .resetvalue = GET_IDREG(isar, ID_MMFR5)}, { .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, @@ -9899,7 +8949,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo); } -#ifdef TARGET_AARCH64 if (cpu_isar_feature(aa64_sme, cpu)) { define_arm_cp_regs(cpu, sme_reginfo); } @@ -9909,12 +8958,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_rndr, cpu)) { define_arm_cp_regs(cpu, rndr_reginfo); } - if (cpu_isar_feature(aa64_tlbirange, cpu)) { - define_arm_cp_regs(cpu, tlbirange_reginfo); - } - if (cpu_isar_feature(aa64_tlbios, cpu)) { - define_arm_cp_regs(cpu, tlbios_reginfo); - } /* Data Cache clean instructions up to PoP */ if (cpu_isar_feature(aa64_dcpop, cpu)) { define_one_arm_cp_reg(cpu, dcpop_reg); @@ -9966,7 +9009,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_nmi, cpu)) { define_arm_cp_regs(cpu, nmi_reginfo); } -#endif if (cpu_isar_feature(any_predinv, cpu)) { define_arm_cp_regs(cpu, predinv_reginfo); @@ -10327,6 +9369,31 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, if (r->state != state && r->state != ARM_CP_STATE_BOTH) { continue; } + if ((r->type & ARM_CP_ADD_TLBI_NXS) && + cpu_isar_feature(aa64_xs, cpu)) { + /* + * This is a TLBI insn which has an NXS variant. The + * NXS variant is at the same encoding except that + * crn is +1, and has the same behaviour except for + * fine-grained trapping. Add the NXS insn here and + * then fall through to add the normal register. + * add_cpreg_to_hashtable() copies the cpreg struct + * and name that it is passed, so it's OK to use + * a local struct here. + */ + ARMCPRegInfo nxs_ri = *r; + g_autofree char *name = g_strdup_printf("%sNXS", r->name); + + assert(state == ARM_CP_STATE_AA64); + assert(nxs_ri.crn < 0xf); + nxs_ri.crn++; + if (nxs_ri.fgt) { + nxs_ri.fgt |= R_FGT_NXS_MASK; + } + add_cpreg_to_hashtable(cpu, &nxs_ri, opaque, state, + ARM_CP_SECSTATE_NS, + crm, opc1, opc2, name); + } if (state == ARM_CP_STATE_AA32) { /* * Under AArch32 CP registers can be common @@ -10765,7 +9832,7 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, uint64_t hcr_el2; if (arm_feature(env, ARM_FEATURE_EL3)) { - rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW); + rw = arm_scr_rw_eff(env); } else { /* * Either EL2 is the highest EL (and so the EL2 register width @@ -10840,6 +9907,7 @@ void arm_log_exception(CPUState *cs) [EXCP_NMI] = "NMI", [EXCP_VINMI] = "Virtual IRQ NMI", [EXCP_VFNMI] = "Virtual FIQ NMI", + [EXCP_MON_TRAP] = "Monitor Trap", }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { @@ -11406,6 +10474,16 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) mask = CPSR_A | CPSR_I | CPSR_F; offset = 0; break; + case EXCP_MON_TRAP: + new_mode = ARM_CPU_MODE_MON; + addr = 0x04; + mask = CPSR_A | CPSR_I | CPSR_F; + if (env->thumb) { + offset = 2; + } else { + offset = 4; + } + break; default: cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); return; /* Never happens. Keep compiler happy. */ @@ -11539,7 +10617,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; unsigned int new_el = env->exception.target_el; - target_ulong addr = env->cp15.vbar_el[new_el]; + vaddr addr = env->cp15.vbar_el[new_el]; unsigned int new_mode = aarch64_pstate_mode(new_el, true); unsigned int old_mode; unsigned int cur_el = arm_current_el(env); @@ -11563,7 +10641,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) switch (new_el) { case 3: - is_aa64 = (env->cp15.scr_el3 & SCR_RW) != 0; + is_aa64 = arm_scr_rw_eff(env); break; case 2: hcr = arm_hcr_el2_eff(env); @@ -11861,10 +10939,20 @@ void arm_cpu_do_interrupt(CPUState *cs) uint64_t arm_sctlr(CPUARMState *env, int el) { - /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ + /* Only EL0 needs to be adjusted for EL1&0 or EL2&0 or EL3&0 */ if (el == 0) { ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + el = 2; + break; + case ARMMMUIdx_E30_0: + el = 3; + break; + default: + el = 1; + break; + } } return env->cp15.sctlr_el[el]; } @@ -12128,289 +11216,6 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, }; } -/* - * Note that signed overflow is undefined in C. The following routines are - * careful to use unsigned types where modulo arithmetic is required. - * Failure to do so _will_ break on newer gcc. - */ - -/* Signed saturating arithmetic. */ - -/* Perform 16-bit signed saturating addition. */ -static inline uint16_t add16_sat(uint16_t a, uint16_t b) -{ - uint16_t res; - - res = a + b; - if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { - if (a & 0x8000) { - res = 0x8000; - } else { - res = 0x7fff; - } - } - return res; -} - -/* Perform 8-bit signed saturating addition. */ -static inline uint8_t add8_sat(uint8_t a, uint8_t b) -{ - uint8_t res; - - res = a + b; - if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { - if (a & 0x80) { - res = 0x80; - } else { - res = 0x7f; - } - } - return res; -} - -/* Perform 16-bit signed saturating subtraction. */ -static inline uint16_t sub16_sat(uint16_t a, uint16_t b) -{ - uint16_t res; - - res = a - b; - if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { - if (a & 0x8000) { - res = 0x8000; - } else { - res = 0x7fff; - } - } - return res; -} - -/* Perform 8-bit signed saturating subtraction. */ -static inline uint8_t sub8_sat(uint8_t a, uint8_t b) -{ - uint8_t res; - - res = a - b; - if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { - if (a & 0x80) { - res = 0x80; - } else { - res = 0x7f; - } - } - return res; -} - -#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); -#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); -#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); -#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); -#define PFX q - -#include "op_addsub.h" - -/* Unsigned saturating arithmetic. */ -static inline uint16_t add16_usat(uint16_t a, uint16_t b) -{ - uint16_t res; - res = a + b; - if (res < a) { - res = 0xffff; - } - return res; -} - -static inline uint16_t sub16_usat(uint16_t a, uint16_t b) -{ - if (a > b) { - return a - b; - } else { - return 0; - } -} - -static inline uint8_t add8_usat(uint8_t a, uint8_t b) -{ - uint8_t res; - res = a + b; - if (res < a) { - res = 0xff; - } - return res; -} - -static inline uint8_t sub8_usat(uint8_t a, uint8_t b) -{ - if (a > b) { - return a - b; - } else { - return 0; - } -} - -#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); -#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); -#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); -#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); -#define PFX uq - -#include "op_addsub.h" - -/* Signed modulo arithmetic. */ -#define SARITH16(a, b, n, op) do { \ - int32_t sum; \ - sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ - RESULT(sum, n, 16); \ - if (sum >= 0) \ - ge |= 3 << (n * 2); \ - } while (0) - -#define SARITH8(a, b, n, op) do { \ - int32_t sum; \ - sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ - RESULT(sum, n, 8); \ - if (sum >= 0) \ - ge |= 1 << n; \ - } while (0) - - -#define ADD16(a, b, n) SARITH16(a, b, n, +) -#define SUB16(a, b, n) SARITH16(a, b, n, -) -#define ADD8(a, b, n) SARITH8(a, b, n, +) -#define SUB8(a, b, n) SARITH8(a, b, n, -) -#define PFX s -#define ARITH_GE - -#include "op_addsub.h" - -/* Unsigned modulo arithmetic. */ -#define ADD16(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ - RESULT(sum, n, 16); \ - if ((sum >> 16) == 1) \ - ge |= 3 << (n * 2); \ - } while (0) - -#define ADD8(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ - RESULT(sum, n, 8); \ - if ((sum >> 8) == 1) \ - ge |= 1 << n; \ - } while (0) - -#define SUB16(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ - RESULT(sum, n, 16); \ - if ((sum >> 16) == 0) \ - ge |= 3 << (n * 2); \ - } while (0) - -#define SUB8(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ - RESULT(sum, n, 8); \ - if ((sum >> 8) == 0) \ - ge |= 1 << n; \ - } while (0) - -#define PFX u -#define ARITH_GE - -#include "op_addsub.h" - -/* Halved signed arithmetic. */ -#define ADD16(a, b, n) \ - RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) -#define SUB16(a, b, n) \ - RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) -#define ADD8(a, b, n) \ - RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) -#define SUB8(a, b, n) \ - RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) -#define PFX sh - -#include "op_addsub.h" - -/* Halved unsigned arithmetic. */ -#define ADD16(a, b, n) \ - RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) -#define SUB16(a, b, n) \ - RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) -#define ADD8(a, b, n) \ - RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) -#define SUB8(a, b, n) \ - RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) -#define PFX uh - -#include "op_addsub.h" - -static inline uint8_t do_usad(uint8_t a, uint8_t b) -{ - if (a > b) { - return a - b; - } else { - return b - a; - } -} - -/* Unsigned sum of absolute byte differences. */ -uint32_t HELPER(usad8)(uint32_t a, uint32_t b) -{ - uint32_t sum; - sum = do_usad(a, b); - sum += do_usad(a >> 8, b >> 8); - sum += do_usad(a >> 16, b >> 16); - sum += do_usad(a >> 24, b >> 24); - return sum; -} - -/* For ARMv6 SEL instruction. */ -uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) -{ - uint32_t mask; - - mask = 0; - if (flags & 1) { - mask |= 0xff; - } - if (flags & 2) { - mask |= 0xff00; - } - if (flags & 4) { - mask |= 0xff0000; - } - if (flags & 8) { - mask |= 0xff000000; - } - return (a & mask) | (b & ~mask); -} - -/* - * CRC helpers. - * The upper bytes of val (above the number specified by 'bytes') must have - * been zeroed out by the caller. - */ -uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) -{ - uint8_t buf[4]; - - stl_le_p(buf, val); - - /* zlib crc32 converts the accumulator and output to one's complement. */ - return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; -} - -uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) -{ - uint8_t buf[4]; - - stl_le_p(buf, val); - - /* Linux crc32c converts the output to one's complement. */ - return crc32c(acc, buf, bytes) ^ 0xffffffff; -} /* * Return the exception level to which FP-disabled exceptions should @@ -12532,6 +11337,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E30_0: return 0; case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: @@ -12541,6 +11347,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) case ARMMMUIdx_E20_2_PAN: return 2; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_3_PAN: return 3; default: g_assert_not_reached(); @@ -12569,6 +11376,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) hcr = arm_hcr_el2_eff(env); if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { idx = ARMMMUIdx_E20_0; + } else if (arm_is_secure_below_el3(env) && + !arm_el_is_aa64(env, 3)) { + idx = ARMMMUIdx_E30_0; } else { idx = ARMMMUIdx_E10_0; } @@ -12593,6 +11403,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) } break; case 3: + if (!arm_el_is_aa64(env, 3) && arm_pan_enabled(env)) { + return ARMMMUIdx_E30_3_PAN; + } return ARMMMUIdx_E3; default: g_assert_not_reached(); @@ -12606,116 +11419,6 @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env) return arm_mmu_idx_el(env, arm_current_el(env)); } -static bool mve_no_pred(CPUARMState *env) -{ - /* - * Return true if there is definitely no predication of MVE - * instructions by VPR or LTPSIZE. (Returning false even if there - * isn't any predication is OK; generated code will just be - * a little worse.) - * If the CPU does not implement MVE then this TB flag is always 0. - * - * NOTE: if you change this logic, the "recalculate s->mve_no_pred" - * logic in gen_update_fp_context() needs to be updated to match. - * - * We do not include the effect of the ECI bits here -- they are - * tracked in other TB flags. This simplifies the logic for - * "when did we emit code that changes the MVE_NO_PRED TB flag - * and thus need to end the TB?". - */ - if (cpu_isar_feature(aa32_mve, env_archcpu(env))) { - return false; - } - if (env->v7m.vpr) { - return false; - } - if (env->v7m.ltpsize < 4) { - return false; - } - return true; -} - -void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - CPUARMTBFlags flags; - - assert_hflags_rebuild_correctly(env); - flags = env->hflags; - - if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { - *pc = env->pc; - if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { - DP_TBFLAG_A64(flags, BTYPE, env->btype); - } - } else { - *pc = env->regs[15]; - - if (arm_feature(env, ARM_FEATURE_M)) { - if (arm_feature(env, ARM_FEATURE_M_SECURITY) && - FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) - != env->v7m.secure) { - DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); - } - - if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && - (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) || - (env->v7m.secure && - !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) { - /* - * ASPEN is set, but FPCA/SFPA indicate that there is no - * active FP context; we must create a new FP context before - * executing any FP insn. - */ - DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); - } - - bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; - if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { - DP_TBFLAG_M32(flags, LSPACT, 1); - } - - if (mve_no_pred(env)) { - DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); - } - } else { - /* - * Note that XSCALE_CPAR shares bits with VECSTRIDE. - * Note that VECLEN+VECSTRIDE are RES0 for M-profile. - */ - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); - } else { - DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); - DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); - } - if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { - DP_TBFLAG_A32(flags, VFPEN, 1); - } - } - - DP_TBFLAG_AM32(flags, THUMB, env->thumb); - DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); - } - - /* - * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine - * states defined in the ARM ARM for software singlestep: - * SS_ACTIVE PSTATE.SS State - * 0 x Inactive (the TB flag for SS is always 0) - * 1 0 Active-pending - * 1 1 Active-not-pending - * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. - */ - if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { - DP_TBFLAG_ANY(flags, PSTATE__SS, 1); - } - - *pflags = flags.flags; - *cs_base = flags.flags2; -} - -#ifdef TARGET_AARCH64 /* * The manual says that when SVE is enabled and VQ is widened the * implementation is allowed to zero the previously inaccessible @@ -12830,7 +11533,6 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, aarch64_sve_narrow_vq(env, new_len + 1); } } -#endif #ifndef CONFIG_USER_ONLY ARMSecuritySpace arm_security_space(CPUARMState *env) diff --git a/target/arm/helper.h b/target/arm/helper.h index eca2043..f340a49 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1,1093 +1,6 @@ -DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) -DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) +/* SPDX-License-Identifier: GPL-2.0-or-later */ -DEF_HELPER_3(add_setq, i32, env, i32, i32) -DEF_HELPER_3(add_saturate, i32, env, i32, i32) -DEF_HELPER_3(sub_saturate, i32, env, i32, i32) -DEF_HELPER_3(add_usaturate, i32, env, i32, i32) -DEF_HELPER_3(sub_usaturate, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32) -DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32) - -#define PAS_OP(pfx) \ - DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \ - DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr) - -PAS_OP(s) -PAS_OP(u) -#undef PAS_OP - -#define PAS_OP(pfx) \ - DEF_HELPER_2(pfx ## add8, i32, i32, i32) \ - DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \ - DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \ - DEF_HELPER_2(pfx ## add16, i32, i32, i32) \ - DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \ - DEF_HELPER_2(pfx ## subaddx, i32, i32, i32) -PAS_OP(q) -PAS_OP(sh) -PAS_OP(uq) -PAS_OP(uh) -#undef PAS_OP - -DEF_HELPER_3(ssat, i32, env, i32, i32) -DEF_HELPER_3(usat, i32, env, i32, i32) -DEF_HELPER_3(ssat16, i32, env, i32, i32) -DEF_HELPER_3(usat16, i32, env, i32, i32) - -DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) - -DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, - i32, i32, i32, i32) -DEF_HELPER_2(exception_internal, noreturn, env, i32) -DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32) -DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32) -DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32) -DEF_HELPER_2(exception_swstep, noreturn, env, i32) -DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl) -DEF_HELPER_1(setend, void, env) -DEF_HELPER_2(wfi, void, env, i32) -DEF_HELPER_1(wfe, void, env) -DEF_HELPER_2(wfit, void, env, i64) -DEF_HELPER_1(yield, void, env) -DEF_HELPER_1(pre_hvc, void, env) -DEF_HELPER_2(pre_smc, void, env, i32) -DEF_HELPER_1(vesb, void, env) - -DEF_HELPER_3(cpsr_write, void, env, i32, i32) -DEF_HELPER_2(cpsr_write_eret, void, env, i32) -DEF_HELPER_1(cpsr_read, i32, env) - -DEF_HELPER_3(v7m_msr, void, env, i32, i32) -DEF_HELPER_2(v7m_mrs, i32, env, i32) - -DEF_HELPER_2(v7m_bxns, void, env, i32) -DEF_HELPER_2(v7m_blxns, void, env, i32) - -DEF_HELPER_3(v7m_tt, i32, env, i32, i32) - -DEF_HELPER_1(v7m_preserve_fp_state, void, env) - -DEF_HELPER_2(v7m_vlstm, void, env, i32) -DEF_HELPER_2(v7m_vlldm, void, env, i32) - -DEF_HELPER_2(v8m_stackcheck, void, env, i32) - -DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) - -DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) -DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) -DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) -DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) -DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) -DEF_HELPER_2(get_cp_reg, i32, env, cptr) -DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) -DEF_HELPER_2(get_cp_reg64, i64, env, cptr) - -DEF_HELPER_2(get_r13_banked, i32, env, i32) -DEF_HELPER_3(set_r13_banked, void, env, i32, i32) - -DEF_HELPER_3(mrs_banked, i32, env, i32, i32) -DEF_HELPER_4(msr_banked, void, env, i32, i32, i32) - -DEF_HELPER_2(get_user_reg, i32, env, i32) -DEF_HELPER_3(set_user_reg, void, env, i32, i32) - -DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env) -DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int) -DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) -DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) -DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) - -DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) - -DEF_HELPER_1(vfp_get_fpscr, i32, env) -DEF_HELPER_2(vfp_set_fpscr, void, env, i32) - -DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) -DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) -DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) -DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) -DEF_HELPER_2(vfp_sqrth, f16, f16, env) -DEF_HELPER_2(vfp_sqrts, f32, f32, env) -DEF_HELPER_2(vfp_sqrtd, f64, f64, env) -DEF_HELPER_3(vfp_cmph, void, f16, f16, env) -DEF_HELPER_3(vfp_cmps, void, f32, f32, env) -DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) -DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env) -DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) -DEF_HELPER_3(vfp_cmped, void, f64, f64, env) - -DEF_HELPER_2(vfp_fcvtds, f64, f32, env) -DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) -DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr) -DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr) - -DEF_HELPER_2(vfp_uitoh, f16, i32, ptr) -DEF_HELPER_2(vfp_uitos, f32, i32, ptr) -DEF_HELPER_2(vfp_uitod, f64, i32, ptr) -DEF_HELPER_2(vfp_sitoh, f16, i32, ptr) -DEF_HELPER_2(vfp_sitos, f32, i32, ptr) -DEF_HELPER_2(vfp_sitod, f64, i32, ptr) - -DEF_HELPER_2(vfp_touih, i32, f16, ptr) -DEF_HELPER_2(vfp_touis, i32, f32, ptr) -DEF_HELPER_2(vfp_touid, i32, f64, ptr) -DEF_HELPER_2(vfp_touizh, i32, f16, ptr) -DEF_HELPER_2(vfp_touizs, i32, f32, ptr) -DEF_HELPER_2(vfp_touizd, i32, f64, ptr) -DEF_HELPER_2(vfp_tosih, s32, f16, ptr) -DEF_HELPER_2(vfp_tosis, s32, f32, ptr) -DEF_HELPER_2(vfp_tosid, s32, f64, ptr) -DEF_HELPER_2(vfp_tosizh, s32, f16, ptr) -DEF_HELPER_2(vfp_tosizs, s32, f32, ptr) -DEF_HELPER_2(vfp_tosizd, s32, f64, ptr) - -DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_touqh, i64, f16, i32, ptr) -DEF_HELPER_3(vfp_tosqh, i64, f16, i32, ptr) -DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr) -DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr) -DEF_HELPER_3(vfp_touqs, i64, f32, i32, ptr) -DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_tosqd, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_touqd, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_sqtos, f32, i64, i32, ptr) -DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_uqtos, f32, i64, i32, ptr) -DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_shtoh, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr) -DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr) - -DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, ptr) -DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, ptr) -DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, ptr) -DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, ptr) - -DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr) - -DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, ptr, i32) -DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32) - -DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) -DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) -DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr) - -DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr) -DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr) -DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) -DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) -DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64) - -DEF_HELPER_3(shl_cc, i32, env, i32, i32) -DEF_HELPER_3(shr_cc, i32, env, i32, i32) -DEF_HELPER_3(sar_cc, i32, env, i32, i32) -DEF_HELPER_3(ror_cc, i32, env, i32, i32) - -DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr) -DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr) -DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) - -DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env) -DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr) - -DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) - -/* neon_helper.c */ -DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) -DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) -DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) -DEF_HELPER_2(neon_pmin_s16, i32, i32, i32) -DEF_HELPER_2(neon_pmax_u8, i32, i32, i32) -DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) -DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) -DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) - -DEF_HELPER_2(neon_shl_u16, i32, i32, i32) -DEF_HELPER_2(neon_shl_s16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s16, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) -DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) -DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) -DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) -DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_2(neon_add_u8, i32, i32, i32) -DEF_HELPER_2(neon_add_u16, i32, i32, i32) -DEF_HELPER_2(neon_sub_u8, i32, i32, i32) -DEF_HELPER_2(neon_sub_u16, i32, i32, i32) -DEF_HELPER_2(neon_mul_u8, i32, i32, i32) -DEF_HELPER_2(neon_mul_u16, i32, i32, i32) - -DEF_HELPER_2(neon_tst_u8, i32, i32, i32) -DEF_HELPER_2(neon_tst_u16, i32, i32, i32) -DEF_HELPER_2(neon_tst_u32, i32, i32, i32) - -DEF_HELPER_1(neon_clz_u8, i32, i32) -DEF_HELPER_1(neon_clz_u16, i32, i32) -DEF_HELPER_1(neon_cls_s8, i32, i32) -DEF_HELPER_1(neon_cls_s16, i32, i32) -DEF_HELPER_1(neon_cls_s32, i32, i32) -DEF_HELPER_1(neon_cnt_u8, i32, i32) -DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32) - -DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) -DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32) -DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32) -DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) -DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32) -DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32) - -DEF_HELPER_1(neon_narrow_u8, i32, i64) -DEF_HELPER_1(neon_narrow_u16, i32, i64) -DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64) -DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64) -DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64) -DEF_HELPER_1(neon_narrow_high_u8, i32, i64) -DEF_HELPER_1(neon_narrow_high_u16, i32, i64) -DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) -DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64) -DEF_HELPER_1(neon_widen_u8, i64, i32) -DEF_HELPER_1(neon_widen_s8, i64, i32) -DEF_HELPER_1(neon_widen_u16, i64, i32) -DEF_HELPER_1(neon_widen_s16, i64, i32) - -DEF_HELPER_2(neon_addl_u16, i64, i64, i64) -DEF_HELPER_2(neon_addl_u32, i64, i64, i64) -DEF_HELPER_2(neon_paddl_u16, i64, i64, i64) -DEF_HELPER_2(neon_paddl_u32, i64, i64, i64) -DEF_HELPER_2(neon_subl_u16, i64, i64, i64) -DEF_HELPER_2(neon_subl_u32, i64, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) -DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) -DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s32, i64, i32, i32) -DEF_HELPER_2(neon_abdl_u64, i64, i32, i32) -DEF_HELPER_2(neon_abdl_s64, i64, i32, i32) -DEF_HELPER_2(neon_mull_u8, i64, i32, i32) -DEF_HELPER_2(neon_mull_s8, i64, i32, i32) -DEF_HELPER_2(neon_mull_u16, i64, i32, i32) -DEF_HELPER_2(neon_mull_s16, i64, i32, i32) - -DEF_HELPER_1(neon_negl_u16, i64, i64) -DEF_HELPER_1(neon_negl_u32, i64, i64) - -DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) -DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) -DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) - -DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) -DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) -DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) -DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) -DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) -DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr) -DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr) - -/* iwmmxt_helper.c */ -DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) -DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) - -#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ -DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ - -DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) -DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) - -DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) - -DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) - -DEF_IWMMXT_HELPER_SIZE_ENV(mins) -DEF_IWMMXT_HELPER_SIZE_ENV(minu) -DEF_IWMMXT_HELPER_SIZE_ENV(maxs) -DEF_IWMMXT_HELPER_SIZE_ENV(maxu) - -DEF_IWMMXT_HELPER_SIZE_ENV(subn) -DEF_IWMMXT_HELPER_SIZE_ENV(addn) -DEF_IWMMXT_HELPER_SIZE_ENV(subu) -DEF_IWMMXT_HELPER_SIZE_ENV(addu) -DEF_IWMMXT_HELPER_SIZE_ENV(subs) -DEF_IWMMXT_HELPER_SIZE_ENV(adds) - -DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) -DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) - -DEF_HELPER_1(iwmmxt_bcstb, i64, i32) -DEF_HELPER_1(iwmmxt_bcstw, i64, i32) -DEF_HELPER_1(iwmmxt_bcstl, i64, i32) - -DEF_HELPER_1(iwmmxt_addcb, i64, i64) -DEF_HELPER_1(iwmmxt_addcw, i64, i64) -DEF_HELPER_1(iwmmxt_addcl, i64, i64) - -DEF_HELPER_1(iwmmxt_msbb, i32, i64) -DEF_HELPER_1(iwmmxt_msbw, i32, i64) -DEF_HELPER_1(iwmmxt_msbl, i32, i64) - -DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) - -DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) - -DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) - -DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) - -DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) - -DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) - -DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +#include "tcg/helper.h" #ifdef TARGET_AARCH64 #include "tcg/helper-a64.h" diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c new file mode 100644 index 0000000..ff13726 --- /dev/null +++ b/target/arm/hvf-stub.c @@ -0,0 +1,20 @@ +/* + * QEMU Hypervisor.framework (HVF) stubs for ARM + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hvf_arm.h" + +uint32_t hvf_arm_get_default_ipa_bit_size(void) +{ + g_assert_not_reached(); +} + +uint32_t hvf_arm_get_max_ipa_bit_size(void) +{ + g_assert_not_reached(); +} diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index ef9bc42..7b6d291 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -11,26 +11,30 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "qemu/log.h" -#include "sysemu/runstate.h" -#include "sysemu/hvf.h" -#include "sysemu/hvf_int.h" -#include "sysemu/hw_accel.h" +#include "system/runstate.h" +#include "system/hvf.h" +#include "system/hvf_int.h" +#include "system/hw_accel.h" #include "hvf_arm.h" #include "cpregs.h" +#include "cpu-sysregs.h" #include <mach/mach_time.h> -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" +#include "hw/boards.h" #include "hw/irq.h" #include "qemu/main-loop.h" -#include "sysemu/cpus.h" +#include "system/cpus.h" #include "arm-powerctl.h" #include "target/arm/cpu.h" #include "target/arm/internals.h" #include "target/arm/multiprocessing.h" #include "target/arm/gtimer.h" -#include "trace/trace-target_arm_hvf.h" +#include "trace.h" #include "migration/vmstate.h" #include "gdbstub/enums.h" @@ -183,6 +187,7 @@ void hvf_arm_init_debug(void) #define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4) #define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4) #define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1) +#define SYSREG_CNTP_CTL_EL0 SYSREG(3, 3, 14, 2, 1) #define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0) #define SYSREG_PMUSERENR_EL0 SYSREG(3, 3, 9, 14, 0) #define SYSREG_PMCNTENSET_EL0 SYSREG(3, 3, 9, 12, 1) @@ -297,6 +302,8 @@ void hvf_arm_init_debug(void) static void hvf_wfi(CPUState *cpu); +static uint32_t chosen_ipa_bit_size; + typedef struct HVFVTimer { /* Vtimer value during migration and paused state */ uint64_t vtimer_val; @@ -839,6 +846,19 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt) return val; } +static void clamp_id_aa64mmfr0_parange_to_ipa_size(ARMISARegisters *isar) +{ + uint32_t ipa_size = chosen_ipa_bit_size ? + chosen_ipa_bit_size : hvf_arm_get_max_ipa_bit_size(); + uint64_t id_aa64mmfr0; + + /* Clamp down the PARange to the IPA size the kernel supports. */ + uint8_t index = round_down_to_parange_index(ipa_size); + id_aa64mmfr0 = GET_IDREG(isar, ID_AA64MMFR0); + id_aa64mmfr0 = (id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index; + SET_IDREG(isar, ID_AA64MMFR0, id_aa64mmfr0); +} + static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { ARMISARegisters host_isar = {}; @@ -846,16 +866,16 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) int reg; uint64_t *val; } regs[] = { - { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 }, - { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 }, - { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 }, - { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 }, - { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 }, - { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 }, + { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.idregs[ID_AA64PFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.idregs[ID_AA64PFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.idregs[ID_AA64DFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.idregs[ID_AA64DFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.idregs[ID_AA64ISAR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.idregs[ID_AA64ISAR1_EL1_IDX] }, /* Add ID_AA64ISAR2_EL1 here when HVF supports it */ - { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, - { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, - { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, + { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.idregs[ID_AA64MMFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.idregs[ID_AA64MMFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.idregs[ID_AA64MMFR2_EL1_IDX] }, /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ }; hv_vcpu_t fd; @@ -863,7 +883,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) hv_vcpu_exit_t *exit; int i; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; ahcf->features = (1ULL << ARM_FEATURE_V8) | (1ULL << ARM_FEATURE_NEON) | (1ULL << ARM_FEATURE_AARCH64) | @@ -882,6 +902,21 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr); r |= hv_vcpu_destroy(fd); + clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar); + + /* + * Disable SME, which is not properly handled by QEMU hvf yet. + * To allow this through we would need to: + * - make sure that the SME state is correctly handled in the + * get_registers/put_registers functions + * - get the SME-specific CPU properties to work with accelerators + * other than TCG + * - fix any assumptions we made that SME implies SVE (since + * on the M4 there is SME but not SVE) + */ + SET_IDREG(&host_isar, ID_AA64PFR1, + GET_IDREG(&host_isar, ID_AA64PFR1) & ~R_ID_AA64PFR1_SME_MASK); + ahcf->isar = host_isar; /* @@ -897,13 +932,37 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ahcf->reset_sctlr |= 0x00800000; /* Make sure we don't advertise AArch32 support for EL0/EL1 */ - if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) { + if ((GET_IDREG(&host_isar, ID_AA64PFR0) & 0xff) != 0x11) { return false; } return r == HV_SUCCESS; } +uint32_t hvf_arm_get_default_ipa_bit_size(void) +{ + uint32_t default_ipa_size; + hv_return_t ret = hv_vm_config_get_default_ipa_size(&default_ipa_size); + assert_hvf_ok(ret); + + return default_ipa_size; +} + +uint32_t hvf_arm_get_max_ipa_bit_size(void) +{ + uint32_t max_ipa_size; + hv_return_t ret = hv_vm_config_get_max_ipa_size(&max_ipa_size); + assert_hvf_ok(ret); + + /* + * We clamp any IPA size we want to back the VM with to a valid PARange + * value so the guest doesn't try and map memory outside of the valid range. + * This logic just clamps the passed in IPA bit size to the first valid + * PARange value <= to it. + */ + return round_down_to_parange_bit_size(max_ipa_size); +} + void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu) { if (!arm_host_cpu_features.dtb_compatible) { @@ -929,6 +988,25 @@ void hvf_arch_vcpu_destroy(CPUState *cpu) { } +hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range) +{ + hv_return_t ret; + hv_vm_config_t config = hv_vm_config_create(); + + ret = hv_vm_config_set_ipa_size(config, pa_range); + if (ret != HV_SUCCESS) { + goto cleanup; + } + chosen_ipa_bit_size = pa_range; + + ret = hv_vm_create(config); + +cleanup: + os_release(config); + + return ret; +} + int hvf_arch_init_vcpu(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -992,7 +1070,12 @@ int hvf_arch_init_vcpu(CPUState *cpu) /* We're limited to underlying hardware caps, override internal versions */ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, - &arm_cpu->isar.id_aa64mmfr0); + &arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); + assert_hvf_ok(ret); + + clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar); + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, + arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); assert_hvf_ok(ret); return 0; @@ -1005,13 +1088,13 @@ void hvf_kick_vcpu_thread(CPUState *cpu) } static void hvf_raise_exception(CPUState *cpu, uint32_t excp, - uint32_t syndrome) + uint32_t syndrome, int target_el) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; cpu->exception_index = excp; - env->exception.target_el = 1; + env->exception.target_el = target_el; env->exception.syndrome = syndrome; arm_cpu_do_interrupt(cpu); @@ -1199,57 +1282,61 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) return false; } -static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) +static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; - uint64_t val = 0; + + if (arm_feature(env, ARM_FEATURE_PMU)) { + switch (reg) { + case SYSREG_PMCR_EL0: + *val = env->cp15.c9_pmcr; + return 0; + case SYSREG_PMCCNTR_EL0: + pmu_op_start(env); + *val = env->cp15.c15_ccnt; + pmu_op_finish(env); + return 0; + case SYSREG_PMCNTENCLR_EL0: + *val = env->cp15.c9_pmcnten; + return 0; + case SYSREG_PMOVSCLR_EL0: + *val = env->cp15.c9_pmovsr; + return 0; + case SYSREG_PMSELR_EL0: + *val = env->cp15.c9_pmselr; + return 0; + case SYSREG_PMINTENCLR_EL1: + *val = env->cp15.c9_pminten; + return 0; + case SYSREG_PMCCFILTR_EL0: + *val = env->cp15.pmccfiltr_el0; + return 0; + case SYSREG_PMCNTENSET_EL0: + *val = env->cp15.c9_pmcnten; + return 0; + case SYSREG_PMUSERENR_EL0: + *val = env->cp15.c9_pmuserenr; + return 0; + case SYSREG_PMCEID0_EL0: + case SYSREG_PMCEID1_EL0: + /* We can't really count anything yet, declare all events invalid */ + *val = 0; + return 0; + } + } switch (reg) { case SYSREG_CNTPCT_EL0: - val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / + *val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / gt_cntfrq_period_ns(arm_cpu); - break; - case SYSREG_PMCR_EL0: - val = env->cp15.c9_pmcr; - break; - case SYSREG_PMCCNTR_EL0: - pmu_op_start(env); - val = env->cp15.c15_ccnt; - pmu_op_finish(env); - break; - case SYSREG_PMCNTENCLR_EL0: - val = env->cp15.c9_pmcnten; - break; - case SYSREG_PMOVSCLR_EL0: - val = env->cp15.c9_pmovsr; - break; - case SYSREG_PMSELR_EL0: - val = env->cp15.c9_pmselr; - break; - case SYSREG_PMINTENCLR_EL1: - val = env->cp15.c9_pminten; - break; - case SYSREG_PMCCFILTR_EL0: - val = env->cp15.pmccfiltr_el0; - break; - case SYSREG_PMCNTENSET_EL0: - val = env->cp15.c9_pmcnten; - break; - case SYSREG_PMUSERENR_EL0: - val = env->cp15.c9_pmuserenr; - break; - case SYSREG_PMCEID0_EL0: - case SYSREG_PMCEID1_EL0: - /* We can't really count anything yet, declare all events invalid */ - val = 0; - break; + return 0; case SYSREG_OSLSR_EL1: - val = env->cp15.oslsr_el1; - break; + *val = env->cp15.oslsr_el1; + return 0; case SYSREG_OSDLR_EL1: /* Dummy register */ - break; + return 0; case SYSREG_ICC_AP0R0_EL1: case SYSREG_ICC_AP0R1_EL1: case SYSREG_ICC_AP0R2_EL1: @@ -1276,8 +1363,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_ICC_SRE_EL1: case SYSREG_ICC_CTLR_EL1: /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ - if (!hvf_sysreg_read_cp(cpu, reg, &val)) { - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + if (hvf_sysreg_read_cp(cpu, reg, val)) { + return 0; } break; case SYSREG_DBGBVR0_EL1: @@ -1296,8 +1383,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_DBGBVR13_EL1: case SYSREG_DBGBVR14_EL1: case SYSREG_DBGBVR15_EL1: - val = env->cp15.dbgbvr[SYSREG_CRM(reg)]; - break; + *val = env->cp15.dbgbvr[SYSREG_CRM(reg)]; + return 0; case SYSREG_DBGBCR0_EL1: case SYSREG_DBGBCR1_EL1: case SYSREG_DBGBCR2_EL1: @@ -1314,8 +1401,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_DBGBCR13_EL1: case SYSREG_DBGBCR14_EL1: case SYSREG_DBGBCR15_EL1: - val = env->cp15.dbgbcr[SYSREG_CRM(reg)]; - break; + *val = env->cp15.dbgbcr[SYSREG_CRM(reg)]; + return 0; case SYSREG_DBGWVR0_EL1: case SYSREG_DBGWVR1_EL1: case SYSREG_DBGWVR2_EL1: @@ -1332,8 +1419,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_DBGWVR13_EL1: case SYSREG_DBGWVR14_EL1: case SYSREG_DBGWVR15_EL1: - val = env->cp15.dbgwvr[SYSREG_CRM(reg)]; - break; + *val = env->cp15.dbgwvr[SYSREG_CRM(reg)]; + return 0; case SYSREG_DBGWCR0_EL1: case SYSREG_DBGWCR1_EL1: case SYSREG_DBGWCR2_EL1: @@ -1350,35 +1437,25 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_DBGWCR13_EL1: case SYSREG_DBGWCR14_EL1: case SYSREG_DBGWCR15_EL1: - val = env->cp15.dbgwcr[SYSREG_CRM(reg)]; - break; + *val = env->cp15.dbgwcr[SYSREG_CRM(reg)]; + return 0; default: if (is_id_sysreg(reg)) { /* ID system registers read as RES0 */ - val = 0; - break; + *val = 0; + return 0; } - cpu_synchronize_state(cpu); - trace_hvf_unhandled_sysreg_read(env->pc, reg, - SYSREG_OP0(reg), - SYSREG_OP1(reg), - SYSREG_CRN(reg), - SYSREG_CRM(reg), - SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); - return 1; } - trace_hvf_sysreg_read(reg, - SYSREG_OP0(reg), - SYSREG_OP1(reg), - SYSREG_CRN(reg), - SYSREG_CRM(reg), - SYSREG_OP2(reg), - val); - hvf_set_reg(cpu, rt, val); - - return 0; + cpu_synchronize_state(cpu); + trace_hvf_unhandled_sysreg_read(env->pc, reg, + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg)); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); + return 1; } static void pmu_update_irq(CPUARMState *env) @@ -1497,70 +1574,82 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) SYSREG_OP2(reg), val); - switch (reg) { - case SYSREG_PMCCNTR_EL0: - pmu_op_start(env); - env->cp15.c15_ccnt = val; - pmu_op_finish(env); - break; - case SYSREG_PMCR_EL0: - pmu_op_start(env); - - if (val & PMCRC) { - /* The counter has been reset */ - env->cp15.c15_ccnt = 0; - } + if (arm_feature(env, ARM_FEATURE_PMU)) { + switch (reg) { + case SYSREG_PMCCNTR_EL0: + pmu_op_start(env); + env->cp15.c15_ccnt = val; + pmu_op_finish(env); + return 0; + case SYSREG_PMCR_EL0: + pmu_op_start(env); + + if (val & PMCRC) { + /* The counter has been reset */ + env->cp15.c15_ccnt = 0; + } - if (val & PMCRP) { - unsigned int i; - for (i = 0; i < pmu_num_counters(env); i++) { - env->cp15.c14_pmevcntr[i] = 0; + if (val & PMCRP) { + unsigned int i; + for (i = 0; i < pmu_num_counters(env); i++) { + env->cp15.c14_pmevcntr[i] = 0; + } } - } - env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; - env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK); + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; + env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK); + + pmu_op_finish(env); + return 0; + case SYSREG_PMUSERENR_EL0: + env->cp15.c9_pmuserenr = val & 0xf; + return 0; + case SYSREG_PMCNTENSET_EL0: + env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env)); + return 0; + case SYSREG_PMCNTENCLR_EL0: + env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env)); + return 0; + case SYSREG_PMINTENCLR_EL1: + pmu_op_start(env); + env->cp15.c9_pminten |= val; + pmu_op_finish(env); + return 0; + case SYSREG_PMOVSCLR_EL0: + pmu_op_start(env); + env->cp15.c9_pmovsr &= ~val; + pmu_op_finish(env); + return 0; + case SYSREG_PMSWINC_EL0: + pmu_op_start(env); + pmswinc_write(env, val); + pmu_op_finish(env); + return 0; + case SYSREG_PMSELR_EL0: + env->cp15.c9_pmselr = val & 0x1f; + return 0; + case SYSREG_PMCCFILTR_EL0: + pmu_op_start(env); + env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0; + pmu_op_finish(env); + return 0; + } + } - pmu_op_finish(env); - break; - case SYSREG_PMUSERENR_EL0: - env->cp15.c9_pmuserenr = val & 0xf; - break; - case SYSREG_PMCNTENSET_EL0: - env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env)); - break; - case SYSREG_PMCNTENCLR_EL0: - env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env)); - break; - case SYSREG_PMINTENCLR_EL1: - pmu_op_start(env); - env->cp15.c9_pminten |= val; - pmu_op_finish(env); - break; - case SYSREG_PMOVSCLR_EL0: - pmu_op_start(env); - env->cp15.c9_pmovsr &= ~val; - pmu_op_finish(env); - break; - case SYSREG_PMSWINC_EL0: - pmu_op_start(env); - pmswinc_write(env, val); - pmu_op_finish(env); - break; - case SYSREG_PMSELR_EL0: - env->cp15.c9_pmselr = val & 0x1f; - break; - case SYSREG_PMCCFILTR_EL0: - pmu_op_start(env); - env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0; - pmu_op_finish(env); - break; + switch (reg) { case SYSREG_OSLAR_EL1: env->cp15.oslsr_el1 = val & 1; - break; + return 0; + case SYSREG_CNTP_CTL_EL0: + /* + * Guests should not rely on the physical counter, but macOS emits + * disable writes to it. Let it do so, but ignore the requests. + */ + qemu_log_mask(LOG_UNIMP, "Unsupported write to CNTP_CTL_EL0\n"); + return 0; case SYSREG_OSDLR_EL1: /* Dummy register */ - break; + return 0; case SYSREG_ICC_AP0R0_EL1: case SYSREG_ICC_AP0R1_EL1: case SYSREG_ICC_AP0R2_EL1: @@ -1587,13 +1676,13 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ - if (!hvf_sysreg_write_cp(cpu, reg, val)) { - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + if (hvf_sysreg_write_cp(cpu, reg, val)) { + return 0; } break; case SYSREG_MDSCR_EL1: env->cp15.mdscr_el1 = val; - break; + return 0; case SYSREG_DBGBVR0_EL1: case SYSREG_DBGBVR1_EL1: case SYSREG_DBGBVR2_EL1: @@ -1611,7 +1700,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_DBGBVR14_EL1: case SYSREG_DBGBVR15_EL1: env->cp15.dbgbvr[SYSREG_CRM(reg)] = val; - break; + return 0; case SYSREG_DBGBCR0_EL1: case SYSREG_DBGBCR1_EL1: case SYSREG_DBGBCR2_EL1: @@ -1629,7 +1718,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_DBGBCR14_EL1: case SYSREG_DBGBCR15_EL1: env->cp15.dbgbcr[SYSREG_CRM(reg)] = val; - break; + return 0; case SYSREG_DBGWVR0_EL1: case SYSREG_DBGWVR1_EL1: case SYSREG_DBGWVR2_EL1: @@ -1647,7 +1736,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_DBGWVR14_EL1: case SYSREG_DBGWVR15_EL1: env->cp15.dbgwvr[SYSREG_CRM(reg)] = val; - break; + return 0; case SYSREG_DBGWCR0_EL1: case SYSREG_DBGWCR1_EL1: case SYSREG_DBGWCR2_EL1: @@ -1665,20 +1754,18 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_DBGWCR14_EL1: case SYSREG_DBGWCR15_EL1: env->cp15.dbgwcr[SYSREG_CRM(reg)] = val; - break; - default: - cpu_synchronize_state(cpu); - trace_hvf_unhandled_sysreg_write(env->pc, reg, - SYSREG_OP0(reg), - SYSREG_OP1(reg), - SYSREG_CRN(reg), - SYSREG_CRM(reg), - SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); - return 1; + return 0; } - return 0; + cpu_synchronize_state(cpu); + trace_hvf_unhandled_sysreg_write(env->pc, reg, + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg)); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); + return 1; } static int hvf_inject_interrupts(CPUState *cpu) @@ -1828,7 +1915,17 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); bql_unlock(); - assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); + r = hv_vcpu_run(cpu->accel->fd); + bql_lock(); + switch (r) { + case HV_SUCCESS: + break; + case HV_ILLEGAL_GUEST_STATE: + trace_hvf_illegal_guest_state(); + /* fall through */ + default: + g_assert_not_reached(); + } /* handle VMEXIT */ uint64_t exit_reason = hvf_exit->reason; @@ -1836,7 +1933,6 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t ec = syn_get_ec(syndrome); ret = 0; - bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ @@ -1871,7 +1967,7 @@ int hvf_vcpu_exec(CPUState *cpu) if (!hvf_find_sw_breakpoint(cpu, env->pc)) { /* Re-inject into the guest */ ret = 0; - hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0)); + hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0), 1); } break; } @@ -1902,6 +1998,7 @@ int hvf_vcpu_exec(CPUState *cpu) bool isv = syndrome & ARM_EL_ISV; bool iswrite = (syndrome >> 6) & 1; bool s1ptw = (syndrome >> 7) & 1; + bool sse = (syndrome >> 21) & 1; uint32_t sas = (syndrome >> 22) & 3; uint32_t len = 1 << sas; uint32_t srt = (syndrome >> 16) & 0x1f; @@ -1929,6 +2026,9 @@ int hvf_vcpu_exec(CPUState *cpu) address_space_read(&address_space_memory, hvf_exit->exception.physical_address, MEMTXATTRS_UNSPECIFIED, &val, len); + if (sse) { + val = sextract64(val, 0, len * 8); + } hvf_set_reg(cpu, srt, val); } @@ -1943,7 +2043,17 @@ int hvf_vcpu_exec(CPUState *cpu) int sysreg_ret = 0; if (isread) { - sysreg_ret = hvf_sysreg_read(cpu, reg, rt); + sysreg_ret = hvf_sysreg_read(cpu, reg, &val); + if (!sysreg_ret) { + trace_hvf_sysreg_read(reg, + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg), + val); + hvf_set_reg(cpu, rt, val); + } } else { val = hvf_get_reg(cpu, rt); sysreg_ret = hvf_sysreg_write(cpu, reg, val); @@ -1962,13 +2072,13 @@ int hvf_vcpu_exec(CPUState *cpu) cpu_synchronize_state(cpu); if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) { if (!hvf_handle_psci_call(cpu)) { - trace_hvf_unknown_hvc(env->xregs[0]); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */ env->xregs[0] = -1; } } else { - trace_hvf_unknown_hvc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; case EC_AA64_SMC: @@ -1983,7 +2093,7 @@ int hvf_vcpu_exec(CPUState *cpu) } } else { trace_hvf_unknown_smc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; default: @@ -2182,28 +2292,23 @@ static inline bool hvf_arm_hw_debug_active(CPUState *cpu) return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); } -static void hvf_arch_set_traps(void) +static void hvf_arch_set_traps(CPUState *cpu) { - CPUState *cpu; bool should_enable_traps = false; hv_return_t r = HV_SUCCESS; /* Check whether guest debugging is enabled for at least one vCPU; if it * is, enable exiting the guest on all vCPUs */ - CPU_FOREACH(cpu) { - should_enable_traps |= cpu->accel->guest_debug_enabled; - } - CPU_FOREACH(cpu) { - /* Set whether debug exceptions exit the guest */ - r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd, - should_enable_traps); - assert_hvf_ok(r); + should_enable_traps |= cpu->accel->guest_debug_enabled; + /* Set whether debug exceptions exit the guest */ + r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); - /* Set whether accesses to debug registers exit the guest */ - r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd, - should_enable_traps); - assert_hvf_ok(r); - } + /* Set whether accesses to debug registers exit the guest */ + r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); } void hvf_arch_update_guest_debug(CPUState *cpu) @@ -2244,7 +2349,7 @@ void hvf_arch_update_guest_debug(CPUState *cpu) deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0); } - hvf_arch_set_traps(); + hvf_arch_set_traps(cpu); } bool hvf_arch_supports_guest_debug(void) diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events index 4fbbe4b..b49746f 100644 --- a/target/arm/hvf/trace-events +++ b/target/arm/hvf/trace-events @@ -5,9 +5,10 @@ hvf_inject_irq(void) "injecting IRQ" hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64 hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")" -hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64 +hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64 hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64 hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]" -hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x" +hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpuid=0x%x" hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]" hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]" +hvf_illegal_guest_state(void) "HV_ILLEGAL_GUEST_STATE" diff --git a/target/arm/hvf/trace.h b/target/arm/hvf/trace.h new file mode 100644 index 0000000..04a19c1 --- /dev/null +++ b/target/arm/hvf/trace.h @@ -0,0 +1 @@ +#include "trace/trace-target_arm_hvf.h" diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index e848c1d..ea82f26 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -11,7 +11,7 @@ #ifndef QEMU_HVF_ARM_H #define QEMU_HVF_ARM_H -#include "cpu.h" +#include "target/arm/cpu-qom.h" /** * hvf_arm_init_debug() - initialize guest debug capabilities @@ -22,4 +22,7 @@ void hvf_arm_init_debug(void); void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); +uint32_t hvf_arm_get_default_ipa_bit_size(void); +uint32_t hvf_arm_get_max_ipa_bit_size(void); + #endif diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c index f120d55..bb59697 100644 --- a/target/arm/hyp_gdbstub.c +++ b/target/arm/hyp_gdbstub.c @@ -54,7 +54,7 @@ GArray *hw_breakpoints, *hw_watchpoints; * here so future PC comparisons will work properly. */ -int insert_hw_breakpoint(target_ulong addr) +int insert_hw_breakpoint(vaddr addr) { HWBreakpoint brk = { .bcr = 0x1, /* BCR E=1, enable */ @@ -80,7 +80,7 @@ int insert_hw_breakpoint(target_ulong addr) * Delete a breakpoint and shuffle any above down */ -int delete_hw_breakpoint(target_ulong pc) +int delete_hw_breakpoint(vaddr pc) { int i; for (i = 0; i < hw_breakpoints->len; i++) { @@ -125,7 +125,7 @@ int delete_hw_breakpoint(target_ulong pc) * need to ensure you mask the address as required and set BAS=0xff */ -int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) +int insert_hw_watchpoint(vaddr addr, vaddr len, int type) { HWWatchpoint wp = { .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ @@ -158,7 +158,6 @@ int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) break; default: g_assert_not_reached(); - break; } if (len <= 8) { /* we align the address and set the bits in BAS */ @@ -183,7 +182,7 @@ int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) return 0; } -bool check_watchpoint_in_range(int i, target_ulong addr) +bool check_watchpoint_in_range(int i, vaddr addr) { HWWatchpoint *wp = get_hw_wp(i); uint64_t addr_top, addr_bottom = wp->wvr; @@ -215,7 +214,7 @@ bool check_watchpoint_in_range(int i, target_ulong addr) * Delete a breakpoint and shuffle any above down */ -int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) +int delete_hw_watchpoint(vaddr addr, vaddr len, int type) { int i; for (i = 0; i < cur_hw_wps; i++) { @@ -227,7 +226,7 @@ int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) return -ENOENT; } -bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) +bool find_hw_breakpoint(CPUState *cpu, vaddr pc) { int i; @@ -240,7 +239,7 @@ bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) return false; } -CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr) { int i; diff --git a/target/arm/internals.h b/target/arm/internals.h index 11b5da2..21a8d67 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -25,9 +25,13 @@ #ifndef TARGET_ARM_INTERNALS_H #define TARGET_ARM_INTERNALS_H +#include "exec/hwaddr.h" +#include "exec/vaddr.h" #include "exec/breakpoint.h" +#include "accel/tcg/tb-cpu-state.h" #include "hw/registerfields.h" #include "tcg/tcg-gvec-desc.h" +#include "system/memory.h" #include "syndrome.h" #include "cpu-features.h" @@ -350,20 +354,30 @@ static inline int r14_bank_number(int mode) } void arm_cpu_register(const ARMCPUInfo *info); -void aarch64_cpu_register(const ARMCPUInfo *info); void register_cp_regs_for_features(ARMCPU *cpu); void init_cpreg_list(ARMCPU *cpu); void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); +void arm_translate_code(CPUState *cs, TranslationBlock *tb, + int *max_insns, vaddr pc, void *host_pc); + +void arm_cpu_register_gdb_commands(ARMCPU *cpu); +void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *, + GPtrArray *, GPtrArray *); void arm_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data); #ifdef CONFIG_TCG +TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs); void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); + +/* Our implementation of TCGCPUOps::cpu_exec_halt */ +bool arm_cpu_exec_halt(CPUState *cs); +int arm_cpu_mmu_index(CPUState *cs, bool ifetch); #endif /* CONFIG_TCG */ typedef enum ARMFPRounding { @@ -383,6 +397,141 @@ static inline FloatRoundMode arm_rmode_to_sf(ARMFPRounding rmode) return arm_rmode_to_sf_map[rmode]; } +/* Return the effective value of SCR_EL3.RW */ +static inline bool arm_scr_rw_eff(CPUARMState *env) +{ + /* + * SCR_EL3.RW has an effective value of 1 if: + * - we are NS and EL2 is implemented but doesn't support AArch32 + * - we are S and EL2 is enabled (in which case it must be AArch64) + */ + ARMCPU *cpu = env_archcpu(env); + + if (env->cp15.scr_el3 & SCR_RW) { + return true; + } + if (env->cp15.scr_el3 & SCR_NS) { + return arm_feature(env, ARM_FEATURE_EL2) && + !cpu_isar_feature(aa64_aa32_el2, cpu); + } else { + return env->cp15.scr_el3 & SCR_EEL2; + } +} + +/* Return true if the specified exception level is running in AArch64 state. */ +static inline bool arm_el_is_aa64(CPUARMState *env, int el) +{ + /* + * This isn't valid for EL0 (if we're in EL0, is_a64() is what you want, + * and if we're not in EL0 then the state of EL0 isn't well defined.) + */ + assert(el >= 1 && el <= 3); + bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64); + + /* + * The highest exception level is always at the maximum supported + * register width, and then lower levels have a register width controlled + * by bits in the SCR or HCR registers. + */ + if (el == 3) { + return aa64; + } + + if (arm_feature(env, ARM_FEATURE_EL3)) { + aa64 = aa64 && arm_scr_rw_eff(env); + } + + if (el == 2) { + return aa64; + } + + if (arm_is_el2_enabled(env)) { + aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW); + } + + return aa64; +} + +/* + * Return the current Exception Level (as per ARMv8; note that this differs + * from the ARMv7 Privilege Level). + */ +static inline int arm_current_el(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_M)) { + return arm_v7m_is_handler_mode(env) || + !(env->v7m.control[env->v7m.secure] & 1); + } + + if (is_a64(env)) { + return extract32(env->pstate, 2, 2); + } + + switch (env->uncached_cpsr & 0x1f) { + case ARM_CPU_MODE_USR: + return 0; + case ARM_CPU_MODE_HYP: + return 2; + case ARM_CPU_MODE_MON: + return 3; + default: + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + /* If EL3 is 32-bit then all secure privileged modes run in EL3 */ + return 3; + } + + return 1; + } +} + +static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env, + bool sctlr_b) +{ +#ifdef CONFIG_USER_ONLY + /* + * In system mode, BE32 is modelled in line with the + * architecture (as word-invariant big-endianness), where loads + * and stores are done little endian but from addresses which + * are adjusted by XORing with the appropriate constant. So the + * endianness to use for the raw data access is not affected by + * SCTLR.B. + * In user mode, however, we model BE32 as byte-invariant + * big-endianness (because user-only code cannot tell the + * difference), and so we need to use a data access endianness + * that depends on SCTLR.B. + */ + if (sctlr_b) { + return true; + } +#endif + /* In 32bit endianness is determined by looking at CPSR's E bit */ + return env->uncached_cpsr & CPSR_E; +} + +static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr) +{ + return sctlr & (el ? SCTLR_EE : SCTLR_E0E); +} + +/* Return true if the processor is in big-endian mode. */ +static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) +{ + if (!is_a64(env)) { + return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env)); + } else { + int cur_el = arm_current_el(env); + uint64_t sctlr = arm_sctlr(env, cur_el); + return arm_cpu_data_is_big_endian_a64(cur_el, sctlr); + } +} + +#ifdef CONFIG_USER_ONLY +static inline bool arm_cpu_bswap_data(CPUARMState *env) +{ + return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env); +} +#endif + static inline void aarch64_save_sp(CPUARMState *env, int el) { if (env->pstate & PSTATE_SP) { @@ -429,6 +578,25 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm) */ unsigned int arm_pamax(ARMCPU *cpu); +/* + * round_down_to_parange_index + * @bit_size: uint8_t + * + * Rounds down the bit_size supplied to the first supported ARM physical + * address range and returns the index for this. The index is intended to + * be used to set ID_AA64MMFR0_EL1's PARANGE bits. + */ +uint8_t round_down_to_parange_index(uint8_t bit_size); + +/* + * round_down_to_parange_bit_size + * @bit_size: uint8_t + * + * Rounds down the bit_size supplied to the first supported ARM physical + * address range bit size and returns this. + */ +uint8_t round_down_to_parange_bit_size(uint8_t bit_size); + /* Return true if extended addresses are enabled. * This is always the case if our translation regime is 64 bit, * but depends on TTBCR.EAE for 32 bit. @@ -482,16 +650,12 @@ static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) { return false; } -static inline void arm_handle_psci_call(ARMCPU *cpu) -{ - g_assert_not_reached(); -} #else /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ bool arm_is_psci_call(ARMCPU *cpu, int excp_type); +#endif /* Actually handle a PSCI call */ void arm_handle_psci_call(ARMCPU *cpu); -#endif /** * arm_clear_exclusive: clear the exclusive monitor @@ -561,8 +725,8 @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; struct ARMMMUFaultInfo { ARMFaultType type; ARMGPCF gpcf; - target_ulong s2addr; - target_ulong paddr; + hwaddr s2addr; + hwaddr paddr; ARMSecuritySpace paddr_space; int level; int domain; @@ -776,9 +940,9 @@ void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr, void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr, MMUAccessType access_type, uintptr_t ra); #else -bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr); +bool arm_cpu_tlb_fill_align(CPUState *cs, CPUTLBEntryFull *out, vaddr addr, + MMUAccessType access_type, int mmu_idx, + MemOp memop, int size, bool probe, uintptr_t ra); #endif static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx) @@ -845,7 +1009,16 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu) } } -/* Return true if this address translation regime has two ranges. */ +/* + * Return true if this address translation regime has two ranges. + * Note that this will not return the correct answer for AArch32 + * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is + * never called from a context where EL3 can be AArch32. (The + * correct return value for ARMMMUIdx_E3 would be different for + * that case, so we can't just make the function return the + * correct value anyway; we would need an extra "bool e3_is_aarch32" + * argument which all the current callsites would pass as 'false'.) + */ static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) { switch (mmu_idx) { @@ -870,6 +1043,7 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E30_3_PAN: return true; default: return false; @@ -893,10 +1067,11 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) case ARMMMUIdx_E2: return 2; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: return 3; case ARMMMUIdx_E10_0: case ARMMMUIdx_Stage1_E0: - return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E10_1: @@ -918,7 +1093,9 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { + case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E30_0: case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_MUser: case ARMMMUIdx_MSUser: @@ -927,10 +1104,6 @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) return true; default: return false; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - g_assert_not_reached(); } } @@ -998,7 +1171,7 @@ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) static inline int arm_num_brps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; } @@ -1012,7 +1185,7 @@ static inline int arm_num_brps(ARMCPU *cpu) static inline int arm_num_wrps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; } @@ -1026,7 +1199,7 @@ static inline int arm_num_wrps(ARMCPU *cpu) static inline int arm_num_ctx_cmps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, CTX_CMPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; } @@ -1387,6 +1560,7 @@ typedef struct GetPhysAddrResult { * @env: CPUARMState * @address: virtual address to get physical address for * @access_type: 0 for read, 1 for write, 2 for execute + * @memop: memory operation feeding this access, or 0 for none * @mmu_idx: MMU index indicating required translation regime * @result: set on translation success. * @fi: set to fault info if the translation fails @@ -1404,8 +1578,8 @@ typedef struct GetPhysAddrResult { * * for PSMAv5 based systems we don't bother to return a full FSR format * value. */ -bool get_phys_addr(CPUARMState *env, target_ulong address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, +bool get_phys_addr(CPUARMState *env, vaddr address, + MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) __attribute__((nonnull)); @@ -1415,6 +1589,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, * @env: CPUARMState * @address: virtual address to get physical address for * @access_type: 0 for read, 1 for write, 2 for execute + * @memop: memory operation feeding this access, or 0 for none * @mmu_idx: MMU index indicating required translation regime * @space: security space for the access * @result: set on translation success. @@ -1423,8 +1598,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, * Similar to get_phys_addr, but use the given security space and don't perform * a Granule Protection Check on the resulting address. */ -bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, - MMUAccessType access_type, +bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, + MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, ARMSecuritySpace space, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) @@ -1632,7 +1807,6 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env) return (1ULL << 31) | ((1ULL << pmu_num_counters(env)) - 1); } -#ifdef TARGET_AARCH64 GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg); int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg); @@ -1640,6 +1814,8 @@ int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg); int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg); +int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_tag_ctl_reg(CPUState *cs, uint8_t *buf, int reg); void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp); @@ -1648,7 +1824,12 @@ void aarch64_max_tcg_initfn(Object *obj); void aarch64_add_pauth_properties(Object *obj); void aarch64_add_sve_properties(Object *obj); void aarch64_add_sme_properties(Object *obj); -#endif + +/* Return true if the gdbstub is presenting an AArch64 CPU */ +static inline bool arm_gdbstub_is_aarch64(ARMCPU *cpu) +{ + return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); +} /* Read the CONTROL register as the MRS instruction would. */ uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure); @@ -1688,6 +1869,9 @@ static inline uint64_t pauth_ptr_mask(ARMVAParameters param) /* Add the cpreg definitions for debug related system registers */ void define_debug_regs(ARMCPU *cpu); +/* Add the cpreg definitions for TLBI instructions */ +void define_tlb_insn_regs(ARMCPU *cpu); + /* Effective value of MDCR_EL2 */ static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env) { @@ -1719,8 +1903,6 @@ static inline bool arm_fgt_active(CPUARMState *env, int el) (!arm_feature(env, ARM_FEATURE_EL3) || (env->cp15.scr_el3 & SCR_FGTEN)); } -void assert_hflags_rebuild_correctly(CPUARMState *env); - /* * Although the ARM implementation of hardware assisted debugging * allows for different breakpoints per-core, the current GDB @@ -1762,20 +1944,42 @@ extern GArray *hw_breakpoints, *hw_watchpoints; #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) -bool find_hw_breakpoint(CPUState *cpu, target_ulong pc); -int insert_hw_breakpoint(target_ulong pc); -int delete_hw_breakpoint(target_ulong pc); +bool find_hw_breakpoint(CPUState *cpu, vaddr pc); +int insert_hw_breakpoint(vaddr pc); +int delete_hw_breakpoint(vaddr pc); -bool check_watchpoint_in_range(int i, target_ulong addr); -CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); -int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); -int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); +bool check_watchpoint_in_range(int i, vaddr addr); +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr); +int insert_hw_watchpoint(vaddr addr, vaddr len, int type); +int delete_hw_watchpoint(vaddr addr, vaddr len, int type); /* Return the current value of the system counter in ticks */ uint64_t gt_get_countervalue(CPUARMState *env); /* * Return the currently applicable offset between the system counter - * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2). + * and the counter for the specified timer, as used for direct register + * accesses. */ -uint64_t gt_virt_cnt_offset(CPUARMState *env); +uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx); + +/* + * Return mask of ARMMMUIdxBit values corresponding to an "invalidate + * all EL1" scope; this covers stage 1 and stage 2. + */ +int alle1_tlbmask(CPUARMState *env); + +/* Set the float_status behaviour to match the Arm defaults */ +void arm_set_default_fp_behaviours(float_status *s); +/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */ +void arm_set_ah_fp_behaviours(float_status *s); +/* Read the float_status info and return the appropriate FPSR value */ +uint32_t vfp_get_fpsr_from_host(CPUARMState *env); +/* Clear the exception status flags from all float_status fields */ +void vfp_clear_float_status_exc_flags(CPUARMState *env); +/* + * Update float_status fields to handle the bits of the FPCR + * specified by mask changing to the values in val. + */ +void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask); + #endif diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index 965a486..34e57fa 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -22,3 +22,100 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) { g_assert_not_reached(); } + +/* + * It's safe to call these functions without KVM support. + * They should either do nothing or return "not supported". + */ +bool kvm_arm_aarch32_supported(void) +{ + return false; +} + +bool kvm_arm_pmu_supported(void) +{ + return false; +} + +bool kvm_arm_sve_supported(void) +{ + return false; +} + +bool kvm_arm_mte_supported(void) +{ + return false; +} + +/* + * These functions should never actually be called without KVM support. + */ +void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_add_vcpu_properties(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) +{ + g_assert_not_reached(); +} + +int kvm_arm_vgic_probe(void) +{ + g_assert_not_reached(); +} + +void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) +{ + g_assert_not_reached(); +} + +void kvm_arm_pmu_init(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) +{ + g_assert_not_reached(); +} + +void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) +{ + g_assert_not_reached(); +} + +uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void kvm_arm_enable_mte(Object *cpuobj, Error **errp) +{ + g_assert_not_reached(); +} + +void kvm_arm_reset_vcpu(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level) +{ + g_assert_not_reached(); +} + +void kvm_arm_cpu_pre_save(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +bool kvm_arm_cpu_post_load(ARMCPU *cpu) +{ + g_assert_not_reached(); +} diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 70f79ed..426f8b1 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -20,17 +20,18 @@ #include "qemu/main-loop.h" #include "qom/object.h" #include "qapi/error.h" -#include "sysemu/sysemu.h" -#include "sysemu/runstate.h" -#include "sysemu/kvm.h" -#include "sysemu/kvm_int.h" +#include "system/system.h" +#include "system/runstate.h" +#include "system/kvm.h" +#include "system/kvm_int.h" #include "kvm_arm.h" #include "cpu.h" +#include "cpu-sysregs.h" #include "trace.h" #include "internals.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "gdbstub/enums.h" #include "hw/boards.h" #include "hw/irq.h" @@ -39,8 +40,10 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/ghes.h" #include "target/arm/gtimer.h" +#include "migration/blocker.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(DEVICE_CTRL), KVM_CAP_LAST_INFO }; @@ -98,8 +101,7 @@ static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); } -bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, - int *fdarray, +bool kvm_arm_create_scratch_host_vcpu(int *fdarray, struct kvm_vcpu_init *init) { int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; @@ -119,6 +121,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, if (vmfd < 0) { goto err; } + + /* + * The MTE capability must be enabled by the VMM before creating + * any VCPUs in order to allow the MTE bits of the ID_AA64PFR1 + * register to be probed correctly, as they are masked if MTE + * is not enabled. + */ + if (kvm_arm_mte_supported()) { + KVMState kvm_state; + + kvm_state.fd = kvmfd; + kvm_state.vmfd = vmfd; + kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0); + } + cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); if (cpufd < 0) { goto err; @@ -133,40 +150,13 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, struct kvm_vcpu_init preferred; ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred); - if (!ret) { - init->target = preferred.target; - } - } - if (ret >= 0) { - ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); if (ret < 0) { goto err; } - } else if (cpus_to_try) { - /* Old kernel which doesn't know about the - * PREFERRED_TARGET ioctl: we know it will only support - * creating one kind of guest CPU which is its preferred - * CPU type. - */ - struct kvm_vcpu_init try; - - while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { - try.target = *cpus_to_try++; - memcpy(try.features, init->features, sizeof(init->features)); - ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try); - if (ret >= 0) { - break; - } - } - if (ret < 0) { - goto err; - } - init->target = try.target; - } else { - /* Treat a NULL cpus_to_try argument the same as an empty - * list, which means we will fail the call since this must - * be an old kernel which doesn't support PREFERRED_TARGET. - */ + init->target = preferred.target; + } + ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); + if (ret < 0) { goto err; } @@ -229,6 +219,28 @@ static bool kvm_arm_pauth_supported(void) kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); } + +static uint64_t idregs_sysreg_to_kvm_reg(ARMSysRegs sysreg) +{ + return ARM64_SYS_REG((sysreg & CP_REG_ARM64_SYSREG_OP0_MASK) >> CP_REG_ARM64_SYSREG_OP0_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP1_MASK) >> CP_REG_ARM64_SYSREG_OP1_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRN_MASK) >> CP_REG_ARM64_SYSREG_CRN_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRM_MASK) >> CP_REG_ARM64_SYSREG_CRM_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP2_MASK) >> CP_REG_ARM64_SYSREG_OP2_SHIFT); +} + +/* read a sysreg value and store it in the idregs */ +static int get_host_cpu_reg(int fd, ARMHostCPUFeatures *ahcf, ARMIDRegisterIdx index) +{ + uint64_t *reg; + int ret; + + reg = &ahcf->isar.idregs[index]; + ret = read_sys_reg64(fd, reg, + idregs_sysreg_to_kvm_reg(id_register_sysreg[index])); + return ret; +} + static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { /* Identify the feature bits corresponding to the host CPU, and @@ -242,17 +254,6 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) uint64_t features = 0; int err; - /* Old kernels may not know about the PREFERRED_TARGET ioctl: however - * we know these will only support creating one kind of guest CPU, - * which is its preferred CPU type. Fortunately these old kernels - * support only a very limited number of CPUs. - */ - static const uint32_t cpus_to_try[] = { - KVM_ARM_TARGET_AEM_V8, - KVM_ARM_TARGET_FOUNDATION_V8, - KVM_ARM_TARGET_CORTEX_A57, - QEMU_KVM_ARM_TARGET_NONE - }; /* * target = -1 informs kvm_arm_create_scratch_host_vcpu() * to use the preferred target @@ -280,17 +281,18 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (kvm_arm_pmu_supported()) { init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; pmu_supported = true; + features |= 1ULL << ARM_FEATURE_PMU; } - if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) { return false; } ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; + int fd = fdarray[2]; - err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, - ARM64_SYS_REG(3, 0, 0, 4, 0)); + err = get_host_cpu_reg(fd, ahcf, ID_AA64PFR0_EL1_IDX); if (unlikely(err < 0)) { /* * Before v4.15, the kernel only exposed a limited number of system @@ -308,31 +310,20 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * ??? Either of these sounds like too much effort just * to work around running a modern host kernel. */ - ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ + SET_IDREG(&ahcf->isar, ID_AA64PFR0, 0x00000011); /* EL1&0, AArch64 only */ err = 0; } else { - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, - ARM64_SYS_REG(3, 0, 0, 4, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, - ARM64_SYS_REG(3, 0, 0, 4, 5)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, - ARM64_SYS_REG(3, 0, 0, 5, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, - ARM64_SYS_REG(3, 0, 0, 5, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, - ARM64_SYS_REG(3, 0, 0, 6, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, - ARM64_SYS_REG(3, 0, 0, 6, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, - ARM64_SYS_REG(3, 0, 0, 6, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, - ARM64_SYS_REG(3, 0, 0, 7, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, - ARM64_SYS_REG(3, 0, 0, 7, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, - ARM64_SYS_REG(3, 0, 0, 7, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3, - ARM64_SYS_REG(3, 0, 0, 7, 3)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64SMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR3_EL1_IDX); /* * Note that if AArch32 support is not present in the host, @@ -341,49 +332,31 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * than skipping the reads and leaving 0, as we must avoid * considering the values in every case. */ - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, - ARM64_SYS_REG(3, 0, 0, 1, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, - ARM64_SYS_REG(3, 0, 0, 1, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, - ARM64_SYS_REG(3, 0, 0, 1, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, - ARM64_SYS_REG(3, 0, 0, 1, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, - ARM64_SYS_REG(3, 0, 0, 1, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, - ARM64_SYS_REG(3, 0, 0, 1, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, - ARM64_SYS_REG(3, 0, 0, 1, 7)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, - ARM64_SYS_REG(3, 0, 0, 2, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, - ARM64_SYS_REG(3, 0, 0, 2, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, - ARM64_SYS_REG(3, 0, 0, 2, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, - ARM64_SYS_REG(3, 0, 0, 2, 3)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, - ARM64_SYS_REG(3, 0, 0, 2, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, - ARM64_SYS_REG(3, 0, 0, 2, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, - ARM64_SYS_REG(3, 0, 0, 2, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, - ARM64_SYS_REG(3, 0, 0, 2, 7)); - - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, + err |= get_host_cpu_reg(fd, ahcf, ID_PFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR4_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR5_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR6_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR4_EL1_IDX); + + err |= read_sys_reg32(fd, &ahcf->isar.mvfr0, ARM64_SYS_REG(3, 0, 0, 3, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr1, ARM64_SYS_REG(3, 0, 0, 3, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr2, ARM64_SYS_REG(3, 0, 0, 3, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, - ARM64_SYS_REG(3, 0, 0, 3, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, - ARM64_SYS_REG(3, 0, 0, 3, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, - ARM64_SYS_REG(3, 0, 0, 3, 6)); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR5_EL1_IDX); /* * DBGDIDR is a bit complicated because the kernel doesn't @@ -395,14 +368,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. * We only do this if the CPU supports AArch32 at EL1. */ - if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { - int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); - int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + if (FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL1) >= 2) { + int wrps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, WRPS); + int brps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, BRPS); int ctx_cmps = - FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); + FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, CTX_CMPS); int version = 6; /* ARMv8 debug architecture */ bool has_el3 = - !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); + !!FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL3); uint32_t dbgdidr = 0; dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); @@ -417,7 +390,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (pmu_supported) { /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, + err |= read_sys_reg64(fd, &ahcf->isar.reset_pmcr_el0, ARM64_SYS_REG(3, 3, 9, 12, 0)); } @@ -429,8 +402,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * enabled SVE support, which resulted in an error rather than RAZ. * So only read the register if we set KVM_ARM_VCPU_SVE above. */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, - ARM64_SYS_REG(3, 0, 0, 4, 4)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ZFR0_EL1_IDX); } } @@ -448,7 +420,6 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) features |= 1ULL << ARM_FEATURE_V8; features |= 1ULL << ARM_FEATURE_NEON; features |= 1ULL << ARM_FEATURE_AARCH64; - features |= 1ULL << ARM_FEATURE_PMU; features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; ahcf->features = features; @@ -675,19 +646,11 @@ static void kvm_arm_set_device_addr(KVMDevice *kd) { struct kvm_device_attr *attr = &kd->kdattr; int ret; + uint64_t addr = kd->kda.addr; - /* If the device control API is available and we have a device fd on the - * KVMDevice struct, let's use the newer API - */ - if (kd->dev_fd >= 0) { - uint64_t addr = kd->kda.addr; - - addr |= kd->kda_addr_ormask; - attr->addr = (uintptr_t)&addr; - ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); - } else { - ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); - } + addr |= kd->kda_addr_ormask; + attr->addr = (uintptr_t)&addr; + ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); if (ret < 0) { fprintf(stderr, "Failed to set device address: %s\n", @@ -968,13 +931,24 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu) } } -void kvm_arm_cpu_post_load(ARMCPU *cpu) +bool kvm_arm_cpu_post_load(ARMCPU *cpu) { + if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) { + return false; + } + /* Note that it's OK for the TCG side not to know about + * every register in the list; KVM is authoritative if + * we're using it. + */ + write_list_to_cpustate(cpu); + /* KVM virtual time adjustment */ if (cpu->kvm_adjvtime) { cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); cpu->kvm_vtime_dirty = true; } + + return true; } void kvm_arm_reset_vcpu(ARMCPU *cpu) @@ -1793,6 +1767,11 @@ bool kvm_arm_sve_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); } +bool kvm_arm_mte_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE); +} + QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) @@ -1821,7 +1800,7 @@ uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) probed = true; - if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { + if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) { error_report("failed to create scratch VCPU with SVE enabled"); abort(); } @@ -1860,6 +1839,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; @@ -1868,8 +1852,7 @@ int kvm_arch_init_vcpu(CPUState *cs) CPUARMState *env = &cpu->env; uint64_t psciver; - if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || - !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { + if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) { error_report("KVM is not supported for this guest CPU type"); return -EINVAL; } @@ -1888,13 +1871,8 @@ int kvm_arch_init_vcpu(CPUState *cs) if (!arm_feature(env, ARM_FEATURE_AARCH64)) { cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; } - if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { - cpu->has_pmu = false; - } if (cpu->has_pmu) { cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; - } else { - env->features &= ~(1ULL << ARM_FEATURE_PMU); } if (cpu_isar_feature(aa64_sve, cpu)) { assert(kvm_arm_sve_supported()); @@ -2047,7 +2025,7 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level) +int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) { uint64_t val; uint32_t fpr; @@ -2231,7 +2209,7 @@ static int kvm_arch_get_sve(CPUState *cs) return 0; } -int kvm_arch_get_registers(CPUState *cs) +int kvm_arch_get_registers(CPUState *cs, Error **errp) { uint64_t val; unsigned int el; @@ -2378,7 +2356,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) */ if (code == BUS_MCEERR_AR) { kvm_cpu_synchronize_state(c); - if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { kvm_inject_arm_sea(c); } else { error_report("failed to record the error"); @@ -2422,3 +2400,69 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) } return 0; } + +void kvm_arm_enable_mte(Object *cpuobj, Error **errp) +{ + static bool tried_to_enable; + static bool succeeded_to_enable; + Error *mte_migration_blocker = NULL; + ARMCPU *cpu = ARM_CPU(cpuobj); + int ret; + + if (!tried_to_enable) { + /* + * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make + * sense), and we only want a single migration blocker as well. + */ + tried_to_enable = true; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0); + if (ret) { + error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE"); + return; + } + + /* TODO: Add migration support with MTE enabled */ + error_setg(&mte_migration_blocker, + "Live migration disabled due to MTE enabled"); + if (migrate_add_blocker(&mte_migration_blocker, errp)) { + error_free(mte_migration_blocker); + return; + } + + succeeded_to_enable = true; + } + + if (succeeded_to_enable) { + cpu->kvm_mte = true; + } +} + +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level) +{ + ARMCPU *cpu = arm_cpu; + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + uint32_t linestate_bit; + int irq_id; + + switch (irq) { + case ARM_CPU_IRQ: + irq_id = KVM_ARM_IRQ_CPU_IRQ; + linestate_bit = CPU_INTERRUPT_HARD; + break; + case ARM_CPU_FIQ: + irq_id = KVM_ARM_IRQ_CPU_FIQ; + linestate_bit = CPU_INTERRUPT_FIQ; + break; + default: + g_assert_not_reached(); + } + + if (level) { + env->irq_line_state |= linestate_bit; + } else { + env->irq_line_state &= ~linestate_bit; + } + kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level); +} diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index cfaa0d9..7dc83ca 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -11,7 +11,8 @@ #ifndef QEMU_KVM_ARM_H #define QEMU_KVM_ARM_H -#include "sysemu/kvm.h" +#include "system/kvm.h" +#include "target/arm/cpu-qom.h" #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) @@ -22,17 +23,15 @@ * @devid: the KVM device ID * @group: device control API group for setting addresses * @attr: device control API address type - * @dev_fd: device control device file descriptor (or -1 if not supported) + * @dev_fd: device control device file descriptor * @addr_ormask: value to be OR'ed with resolved address * - * Remember the memory region @mr, and when it is mapped by the - * machine model, tell the kernel that base address using the - * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API. @devid - * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or - * the arm-vgic device in the device control API. - * The machine model may map - * and unmap the device multiple times; the kernel will only be told the final - * address at the point where machine init is complete. + * Remember the memory region @mr, and when it is mapped by the machine + * model, tell the kernel that base address using the device control API. + * @devid should be the ID of the device as defined by the arm-vgic device + * in the device control API. The machine model may map and unmap the device + * multiple times; the kernel will only be told the final address at the + * point where machine init is complete. */ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, uint64_t attr, int dev_fd, uint64_t addr_ormask); @@ -85,8 +84,10 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu); * @cpu: ARMCPU * * Called from cpu_post_load() to update KVM CPU state from the cpreg list. + * + * Returns: true on success, or false if write_list_to_kvmstate failed. */ -void kvm_arm_cpu_post_load(ARMCPU *cpu); +bool kvm_arm_cpu_post_load(ARMCPU *cpu); /** * kvm_arm_reset_vcpu: @@ -96,13 +97,9 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); -#ifdef CONFIG_KVM +struct kvm_vcpu_init; /** * kvm_arm_create_scratch_host_vcpu: - * @cpus_to_try: array of QEMU_KVM_ARM_TARGET_* values (terminated with - * QEMU_KVM_ARM_TARGET_NONE) to try as fallback if the kernel does not - * know the PREFERRED_TARGET ioctl. Passing NULL is the same as passing - * an empty array. * @fdarray: filled in with kvmfd, vmfd, cpufd file descriptors in that order * @init: filled in with the necessary values for creating a host * vcpu. If NULL is provided, will not init the vCPU (though the cpufd @@ -115,8 +112,7 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu); * Returns: true on success (and fdarray and init are filled in), * false on failure (and fdarray and init are not valid). */ -bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, - int *fdarray, +bool kvm_arm_create_scratch_host_vcpu(int *fdarray, struct kvm_vcpu_init *init); /** @@ -189,6 +185,13 @@ bool kvm_arm_pmu_supported(void); bool kvm_arm_sve_supported(void); /** + * kvm_arm_mte_supported: + * + * Returns: true if KVM can enable MTE, and false otherwise. + */ +bool kvm_arm_mte_supported(void); + +/** * kvm_arm_get_max_vm_ipa_size: * @ms: Machine state handle * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case @@ -214,75 +217,8 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa); int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); -#else - -/* - * It's safe to call these functions without KVM support. - * They should either do nothing or return "not supported". - */ -static inline bool kvm_arm_aarch32_supported(void) -{ - return false; -} - -static inline bool kvm_arm_pmu_supported(void) -{ - return false; -} - -static inline bool kvm_arm_sve_supported(void) -{ - return false; -} - -/* - * These functions should never actually be called without KVM support. - */ -static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -static inline void kvm_arm_add_vcpu_properties(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) -{ - g_assert_not_reached(); -} - -static inline int kvm_arm_vgic_probe(void) -{ - g_assert_not_reached(); -} - -static inline void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) -{ - g_assert_not_reached(); -} +void kvm_arm_enable_mte(Object *cpuobj, Error **errp); -static inline void kvm_arm_pmu_init(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -static inline void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) -{ - g_assert_not_reached(); -} - -static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) -{ - g_assert_not_reached(); -} - -static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) -{ - g_assert_not_reached(); -} - -#endif +void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level); #endif diff --git a/target/arm/machine.c b/target/arm/machine.c index 0a722ca..e442d48 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -1,12 +1,13 @@ #include "qemu/osdep.h" #include "cpu.h" #include "qemu/error-report.h" -#include "sysemu/kvm.h" -#include "sysemu/tcg.h" +#include "system/kvm.h" +#include "system/tcg.h" #include "kvm_arm.h" #include "internals.h" #include "cpu-features.h" -#include "migration/cpu.h" +#include "migration/qemu-file-types.h" +#include "migration/vmstate.h" #include "target/arm/gtimer.h" static bool vfp_needed(void *opaque) @@ -18,6 +19,35 @@ static bool vfp_needed(void *opaque) : cpu_isar_feature(aa32_vfp_simd, cpu)); } +static bool vfp_fpcr_fpsr_needed(void *opaque) +{ + /* + * If either the FPCR or the FPSR include set bits that are not + * visible in the AArch32 FPSCR view of floating point control/status + * then we must send the FPCR and FPSR as two separate fields in the + * cpu/vfp/fpcr_fpsr subsection, and we will send a 0 for the old + * FPSCR field in cpu/vfp. + * + * If all the set bits are representable in an AArch32 FPSCR then we + * send that value as the cpu/vfp FPSCR field, and don't send the + * cpu/vfp/fpcr_fpsr subsection. + * + * On incoming migration, if the cpu/vfp FPSCR field is non-zero we + * use it, and if the fpcr_fpsr subsection is present we use that. + * (The subsection will never be present with a non-zero FPSCR field, + * and if FPSCR is zero and the subsection is not present that means + * that FPSCR/FPSR/FPCR are zero.) + * + * This preserves migration compatibility with older QEMU versions, + * in both directions. + */ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return (vfp_get_fpcr(env) & ~FPSCR_FPCR_MASK) || + (vfp_get_fpsr(env) & ~FPSCR_FPSR_MASK); +} + static int get_fpscr(QEMUFile *f, void *opaque, size_t size, const VMStateField *field) { @@ -25,7 +55,10 @@ static int get_fpscr(QEMUFile *f, void *opaque, size_t size, CPUARMState *env = &cpu->env; uint32_t val = qemu_get_be32(f); - vfp_set_fpscr(env, val); + if (val) { + /* 0 means we might have the data in the fpcr_fpsr subsection */ + vfp_set_fpscr(env, val); + } return 0; } @@ -34,8 +67,9 @@ static int put_fpscr(QEMUFile *f, void *opaque, size_t size, { ARMCPU *cpu = opaque; CPUARMState *env = &cpu->env; + uint32_t fpscr = vfp_fpcr_fpsr_needed(opaque) ? 0 : vfp_get_fpscr(env); - qemu_put_be32(f, vfp_get_fpscr(env)); + qemu_put_be32(f, fpscr); return 0; } @@ -45,6 +79,86 @@ static const VMStateInfo vmstate_fpscr = { .put = put_fpscr, }; +static int get_fpcr(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val = qemu_get_be64(f); + + vfp_set_fpcr(env, val); + return 0; +} + +static int put_fpcr(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + qemu_put_be64(f, vfp_get_fpcr(env)); + return 0; +} + +static const VMStateInfo vmstate_fpcr = { + .name = "fpcr", + .get = get_fpcr, + .put = put_fpcr, +}; + +static int get_fpsr(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val = qemu_get_be64(f); + + vfp_set_fpsr(env, val); + return 0; +} + +static int put_fpsr(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + qemu_put_be64(f, vfp_get_fpsr(env)); + return 0; +} + +static const VMStateInfo vmstate_fpsr = { + .name = "fpsr", + .get = get_fpsr, + .put = put_fpsr, +}; + +static const VMStateDescription vmstate_vfp_fpcr_fpsr = { + .name = "cpu/vfp/fpcr_fpsr", + .version_id = 1, + .minimum_version_id = 1, + .needed = vfp_fpcr_fpsr_needed, + .fields = (const VMStateField[]) { + { + .name = "fpcr", + .version_id = 0, + .size = sizeof(uint64_t), + .info = &vmstate_fpcr, + .flags = VMS_SINGLE, + .offset = 0, + }, + { + .name = "fpsr", + .version_id = 0, + .size = sizeof(uint64_t), + .info = &vmstate_fpsr, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_vfp = { .name = "cpu/vfp", .version_id = 3, @@ -100,6 +214,10 @@ static const VMStateDescription vmstate_vfp = { .offset = 0, }, VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_vfp_fpcr_fpsr, + NULL } }; @@ -123,7 +241,6 @@ static const VMStateDescription vmstate_iwmmxt = { } }; -#ifdef TARGET_AARCH64 /* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build, * and ARMPredicateReg is actively empty. This triggers errors * in the expansion of the VMSTATE macros. @@ -203,7 +320,6 @@ static const VMStateDescription vmstate_za = { VMSTATE_END_OF_LIST() } }; -#endif /* AARCH64 */ static bool serror_needed(void *opaque) { @@ -785,6 +901,20 @@ static int cpu_pre_load(void *opaque) CPUARMState *env = &cpu->env; /* + * In an inbound migration where on the source FPSCR/FPSR/FPCR are 0, + * there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr() + * and vfp_set_fpsr() from get_fpcr() and get_fpsr(); also the get_fpscr() + * function will not call vfp_set_fpscr() because it will see a 0 in the + * inbound data. Ensure that in this case we have a correctly set up + * zero FPSCR/FPCR/FPSR. + * + * This is not strictly needed because FPSCR is zero out of reset, but + * it avoids the possibility of future confusing migration bugs if some + * future architecture change makes the reset value non-zero. + */ + vfp_set_fpscr(env, 0); + + /* * Pre-initialize irq_line_state to a value that's never valid as * real data, so cpu_post_load() can tell whether we've seen the * irq-line-state subsection in the incoming migration state. @@ -846,15 +976,9 @@ static int cpu_post_load(void *opaque, int version_id) } if (kvm_enabled()) { - if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) { + if (!kvm_arm_cpu_post_load(cpu)) { return -1; } - /* Note that it's OK for the TCG side not to know about - * every register in the list; KVM is authoritative if - * we're using it. - */ - write_list_to_cpustate(cpu); - kvm_arm_cpu_post_load(cpu); } else { if (!write_list_to_cpustate(cpu)) { return -1; @@ -970,10 +1094,8 @@ const VMStateDescription vmstate_arm_cpu = { &vmstate_pmsav7, &vmstate_pmsav8, &vmstate_m_security, -#ifdef TARGET_AARCH64 &vmstate_sve, &vmstate_za, -#endif &vmstate_serror, &vmstate_irq_line_state, &vmstate_wfxt_timer, diff --git a/target/arm/meson.build b/target/arm/meson.build index 2e10464..7aa81e3 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -1,41 +1,58 @@ arm_ss = ss.source_set() +arm_common_ss = ss.source_set() arm_ss.add(files( - 'cpu.c', - 'debug_helper.c', 'gdbstub.c', - 'helper.c', - 'vfp_helper.c', )) -arm_ss.add(zlib) - -arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'), if_false: files('kvm-stub.c')) -arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', - 'gdbstub64.c', -)) + 'gdbstub64.c')) arm_system_ss = ss.source_set() +arm_common_system_ss = ss.source_set() arm_system_ss.add(files( + 'arm-qmp-cmds.c', +)) +arm_system_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c')) +arm_system_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) + +arm_user_ss = ss.source_set() +arm_user_ss.add(files('cpu.c')) +arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files( + 'cpu32-stubs.c', +)) +arm_user_ss.add(files( + 'debug_helper.c', + 'helper.c', + 'vfp_fpscr.c', +)) + +arm_common_system_ss.add(files('cpu.c')) +arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( + 'cpu32-stubs.c')) +arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) +arm_common_system_ss.add(files( 'arch_dump.c', 'arm-powerctl.c', - 'arm-qmp-cmds.c', 'cortex-regs.c', + 'debug_helper.c', + 'helper.c', 'machine.c', 'ptw.c', + 'vfp_fpscr.c', )) -arm_user_ss = ss.source_set() - subdir('hvf') if 'CONFIG_TCG' in config_all_accel subdir('tcg') else - arm_ss.add(files('tcg-stubs.c')) + arm_common_system_ss.add(files('tcg-stubs.c')) endif target_arch += {'arm': arm_ss} target_system_arch += {'arm': arm_system_ss} target_user_arch += {'arm': arm_user_ss} +target_common_arch += {'arm': arm_common_ss} +target_common_system_arch += {'arm': arm_common_system_ss} diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 4476b32..561bf26 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -10,15 +10,14 @@ #include "qemu/log.h" #include "qemu/range.h" #include "qemu/main-loop.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" +#include "accel/tcg/probe.h" #include "cpu.h" #include "internals.h" #include "cpu-features.h" #include "idau.h" -#ifdef CONFIG_TCG -# include "tcg/oversized-guest.h" -#endif typedef struct S1Translate { /* @@ -74,17 +73,21 @@ typedef struct S1Translate { } S1Translate; static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, + vaddr address, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi); static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, + vaddr address, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi); +static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, + int user_rw, int prot_rw, int xn, int pxn, + ARMSecuritySpace in_pa, ARMSecuritySpace out_pa); + /* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ static const uint8_t pamax_map[] = { [0] = 32, @@ -96,6 +99,21 @@ static const uint8_t pamax_map[] = { [6] = 52, }; +uint8_t round_down_to_parange_index(uint8_t bit_size) +{ + for (int i = ARRAY_SIZE(pamax_map) - 1; i >= 0; i--) { + if (pamax_map[i] <= bit_size) { + return i; + } + } + g_assert_not_reached(); +} + +uint8_t round_down_to_parange_bit_size(uint8_t bit_size) +{ + return pamax_map[round_down_to_parange_index(bit_size)]; +} + /* * The cpu-specific constant value of PAMax; also used by hw/arm/virt. * Note that machvirt_init calls this on a CPU that is inited but not realized! @@ -104,7 +122,7 @@ unsigned int arm_pamax(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { unsigned int parange = - FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); /* * id_aa64mmfr0 is a read-only register so values outside of the @@ -265,6 +283,8 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, case ARMMMUIdx_E20_2_PAN: case ARMMMUIdx_E2: case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: break; case ARMMMUIdx_Phys_S: @@ -312,7 +332,7 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, * physical address size is invalid. */ pps = FIELD_EX64(gpccr, GPCCR, PPS); - if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) { + if (pps > FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE)) { goto fault_walk; } pps = pamax_map[pps]; @@ -564,7 +584,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, }; GetPhysAddrResult s2 = { }; - if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, &s2, fi)) { + if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, 0, &s2, fi)) { goto fail; } @@ -717,7 +737,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, uint64_t new_val, S1Translate *ptw, ARMMMUFaultInfo *fi) { -#if defined(TARGET_AARCH64) && defined(CONFIG_TCG) +#if defined(CONFIG_ATOMIC64) && defined(CONFIG_TCG) uint64_t cur_val; void *host = ptw->out_host; @@ -819,7 +839,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, ptw->out_rw = true; } -#ifdef CONFIG_ATOMIC64 if (ptw->out_be) { old_val = cpu_to_be64(old_val); new_val = cpu_to_be64(new_val); @@ -831,36 +850,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); cur_val = le64_to_cpu(cur_val); } -#else - /* - * We can't support the full 64-bit atomic cmpxchg on the host. - * Because this is only used for FEAT_HAFDBS, which is only for AA64, - * we know that TCG_OVERSIZED_GUEST is set, which means that we are - * running in round-robin mode and could only race with dma i/o. - */ -#if !TCG_OVERSIZED_GUEST -# error "Unexpected configuration" -#endif - bool locked = bql_locked(); - if (!locked) { - bql_lock(); - } - if (ptw->out_be) { - cur_val = ldq_be_p(host); - if (cur_val == old_val) { - stq_be_p(host, new_val); - } - } else { - cur_val = ldq_le_p(host); - if (cur_val == old_val) { - stq_le_p(host, new_val); - } - } - if (!locked) { - bql_unlock(); - } -#endif - return cur_val; #else /* AArch32 does not have FEAT_HADFS; non-TCG guests only use debug-mode. */ @@ -1131,7 +1120,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, hwaddr phys_addr; uint32_t dacr; bool ns; - int user_prot; + ARMSecuritySpace out_space; /* Pagetable walk. */ /* Lookup l1 descriptor. */ @@ -1223,16 +1212,19 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, g_assert_not_reached(); } } + out_space = ptw->in_space; + if (ns) { + /* + * The NS bit will (as required by the architecture) have no effect if + * the CPU doesn't support TZ or this is a non-secure translation + * regime, because the output space will already be non-secure. + */ + out_space = ARMSS_NonSecure; + } if (domain_prot == 3) { result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; } else { - if (pxn && !regime_is_user(env, mmu_idx)) { - xn = 1; - } - if (xn && access_type == MMU_INST_FETCH) { - fi->type = ARMFault_Permission; - goto do_fault; - } + int user_rw, prot_rw; if (arm_feature(env, ARM_FEATURE_V6K) && (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { @@ -1242,37 +1234,23 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, fi->type = ARMFault_AccessFlag; goto do_fault; } - result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); - user_prot = simple_ap_to_rw_prot_is_user(ap >> 1, 1); + prot_rw = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); + user_rw = simple_ap_to_rw_prot_is_user(ap >> 1, 1); } else { - result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); - user_prot = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1); - } - if (result->f.prot && !xn) { - result->f.prot |= PAGE_EXEC; + prot_rw = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + user_rw = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1); } + + result->f.prot = get_S1prot(env, mmu_idx, false, user_rw, prot_rw, + xn, pxn, result->f.attrs.space, out_space); if (!(result->f.prot & (1 << access_type))) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; } - if (regime_is_pan(env, mmu_idx) && - !regime_is_user(env, mmu_idx) && - user_prot && - access_type != MMU_INST_FETCH) { - /* Privileged Access Never fault */ - fi->type = ARMFault_Permission; - goto do_fault; - } - } - if (ns) { - /* The NS bit will (as required by the architecture) have no effect if - * the CPU doesn't support TZ or this is a non-secure translation - * regime, because the attribute will already be non-secure. - */ - result->f.attrs.secure = false; - result->f.attrs.space = ARMSS_NonSecure; } + result->f.attrs.space = out_space; + result->f.attrs.secure = arm_space_is_secure(out_space); result->f.phys_addr = phys_addr; return false; do_fault: @@ -1340,25 +1318,24 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) * @env: CPUARMState * @mmu_idx: MMU index indicating required translation regime * @is_aa64: TRUE if AArch64 - * @ap: The 2-bit simple AP (AP[2:1]) + * @user_rw: Translated AP for user access + * @prot_rw: Translated AP for privileged access * @xn: XN (execute-never) bit * @pxn: PXN (privileged execute-never) bit * @in_pa: The original input pa space * @out_pa: The output pa space, modified by NSTable, NS, and NSE */ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, - int ap, int xn, int pxn, + int user_rw, int prot_rw, int xn, int pxn, ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) { ARMCPU *cpu = env_archcpu(env); bool is_user = regime_is_user(env, mmu_idx); - int prot_rw, user_rw; bool have_wxn; int wxn = 0; assert(!regime_is_stage2(mmu_idx)); - user_rw = simple_ap_to_rw_prot_is_user(ap, true); if (is_user) { prot_rw = user_rw; } else { @@ -1376,8 +1353,6 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, regime_is_pan(env, mmu_idx) && (regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) { prot_rw = 0; - } else { - prot_rw = simple_ap_to_rw_prot_is_user(ap, false); } } @@ -1669,12 +1644,13 @@ static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw) * @ptw: Current and next stage parameters for the walk. * @address: virtual address to get physical address for * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH + * @memop: memory operation feeding this access, or 0 for none * @result: set on translation success, * @fi: set to fault info if the translation fails */ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint64_t address, - MMUAccessType access_type, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { ARMCPU *cpu = env_archcpu(env); @@ -1684,7 +1660,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint64_t ttbr; hwaddr descaddr, indexmask, indexmask_grainsize; uint32_t tableattrs; - target_ulong page_size; + uint64_t page_size; uint64_t attrs; int32_t stride; int addrsize, inputsize, outputsize; @@ -1727,7 +1703,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * ID_AA64MMFR0 is a read-only register so values outside of the * supported mappings can be considered an implementation error. */ - ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + ps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); ps = MIN(ps, param.ps); assert(ps < ARRAY_SIZE(pamax_map)); outputsize = pamax_map[ps]; @@ -1757,7 +1733,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * validation to do here. */ if (inputsize < addrsize) { - target_ulong top_bits = sextract64(address, inputsize, + uint64_t top_bits = sextract64(address, inputsize, addrsize - inputsize); if (-top_bits != param.select) { /* The gap between the two regions is a Translation fault */ @@ -2013,8 +1989,21 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, xn = extract64(attrs, 53, 2); result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); } + + result->cacheattrs.is_s2_format = true; + result->cacheattrs.attrs = extract32(attrs, 2, 4); + /* + * Security state does not really affect HCR_EL2.FWB; + * we only need to filter FWB for aa32 or other FEAT. + */ + device = S2_attrs_are_device(arm_hcr_el2_eff(env), + result->cacheattrs.attrs); } else { int nse, ns = extract32(attrs, 5, 1); + uint8_t attrindx; + uint64_t mair; + int user_rw, prot_rw; + switch (out_space) { case ARMSS_Root: /* @@ -2080,12 +2069,58 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, xn = 0; ap &= ~1; } + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); /* * Note that we modified ptw->in_space earlier for NSTable, but * result->f.attrs retains a copy of the original security space. */ - result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, xn, pxn, - result->f.attrs.space, out_space); + result->f.prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw, + xn, pxn, result->f.attrs.space, out_space); + + /* Index into MAIR registers for cache attributes */ + attrindx = extract32(attrs, 2, 3); + mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + result->cacheattrs.is_s2_format = false; + result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); + + /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */ + if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) { + result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */ + } + device = S1_attrs_are_device(result->cacheattrs.attrs); + } + + /* + * Enable alignment checks on Device memory. + * + * Per R_XCHFJ, the correct ordering for alignment, permission, + * and stage 2 faults is: + * - Alignment fault caused by the memory type + * - Permission fault + * - A stage 2 fault on the memory access + * Perform the alignment check now, so that we recognize it in + * the correct order. Set TLB_CHECK_ALIGNED so that any subsequent + * softmmu tlb hit will also check the alignment; clear along the + * non-device path so that tlb_fill_flags is consistent in the + * event of restart_atomic_update. + * + * In v7, for a CPU without the Virtualization Extensions this + * access is UNPREDICTABLE; we choose to make it take the alignment + * fault as is required for a v7VE CPU. (QEMU doesn't emulate any + * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.) + */ + if (device) { + unsigned a_bits = memop_atomicity_bits(memop); + if (address & ((1 << a_bits) - 1)) { + fi->type = ARMFault_Alignment; + goto do_fault; + } + result->f.tlb_fill_flags = TLB_CHECK_ALIGNED; + } else { + result->f.tlb_fill_flags = 0; } if (!(result->f.prot & (1 << access_type))) { @@ -2115,51 +2150,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.attrs.space = out_space; result->f.attrs.secure = arm_space_is_secure(out_space); - if (regime_is_stage2(mmu_idx)) { - result->cacheattrs.is_s2_format = true; - result->cacheattrs.attrs = extract32(attrs, 2, 4); - /* - * Security state does not really affect HCR_EL2.FWB; - * we only need to filter FWB for aa32 or other FEAT. - */ - device = S2_attrs_are_device(arm_hcr_el2_eff(env), - result->cacheattrs.attrs); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 2, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - result->cacheattrs.is_s2_format = false; - result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); - - /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */ - if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) { - result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */ - } - device = S1_attrs_are_device(result->cacheattrs.attrs); - } - - /* - * Enable alignment checks on Device memory. - * - * Per R_XCHFJ, this check is mis-ordered. The correct ordering - * for alignment, permission, and stage 2 faults should be: - * - Alignment fault caused by the memory type - * - Permission fault - * - A stage 2 fault on the memory access - * but due to the way the TCG softmmu TLB operates, we will have - * implicitly done the permission check and the stage2 lookup in - * finding the TLB entry, so the alignment check cannot be done sooner. - * - * In v7, for a CPU without the Virtualization Extensions this - * access is UNPREDICTABLE; we choose to make it take the alignment - * fault as is required for a v7VE CPU. (QEMU doesn't emulate any - * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.) - */ - if (device) { - result->f.tlb_fill_flags |= TLB_CHECK_ALIGNED; - } - /* * For FEAT_LPA2 and effective DS, the SH field in the attributes * was re-purposed for output address bits. The SH attribute in @@ -3202,7 +3192,7 @@ static ARMCacheAttrs combine_cacheattrs(uint64_t hcr, */ static bool get_phys_addr_disabled(CPUARMState *env, S1Translate *ptw, - target_ulong address, + vaddr address, MMUAccessType access_type, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) @@ -3285,8 +3275,8 @@ static bool get_phys_addr_disabled(CPUARMState *env, } static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, + vaddr address, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { @@ -3298,7 +3288,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, ARMSecuritySpace ipa_space; uint64_t hcr; - ret = get_phys_addr_nogpc(env, ptw, address, access_type, result, fi); + ret = get_phys_addr_nogpc(env, ptw, address, access_type, + memop, result, fi); /* If S1 fails, return early. */ if (ret) { @@ -3324,7 +3315,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); - ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, result, fi); + ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, + memop, result, fi); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ @@ -3390,8 +3382,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, } static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, + vaddr address, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { @@ -3454,7 +3446,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, if (arm_feature(env, ARM_FEATURE_EL2) && !regime_translation_disabled(env, ARMMMUIdx_Stage2, ptw->in_space)) { return get_phys_addr_twostage(env, ptw, address, access_type, - result, fi); + memop, result, fi); } /* fall through */ @@ -3517,7 +3509,8 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, } if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, ptw, address, access_type, result, fi); + return get_phys_addr_lpae(env, ptw, address, access_type, + memop, result, fi); } else if (arm_feature(env, ARM_FEATURE_V7) || regime_sctlr(env, mmu_idx) & SCTLR_XP) { return get_phys_addr_v6(env, ptw, address, access_type, result, fi); @@ -3527,12 +3520,13 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, } static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, - target_ulong address, - MMUAccessType access_type, + vaddr address, + MMUAccessType access_type, MemOp memop, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { - if (get_phys_addr_nogpc(env, ptw, address, access_type, result, fi)) { + if (get_phys_addr_nogpc(env, ptw, address, access_type, + memop, result, fi)) { return true; } if (!granule_protection_check(env, result->f.phys_addr, @@ -3543,8 +3537,8 @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, return false; } -bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, - MMUAccessType access_type, +bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, + MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, ARMSecuritySpace space, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) @@ -3553,16 +3547,13 @@ bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, .in_mmu_idx = mmu_idx, .in_space = space, }; - return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi); + return get_phys_addr_nogpc(env, &ptw, address, access_type, + memop, result, fi); } -bool get_phys_addr(CPUARMState *env, target_ulong address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +static ARMSecuritySpace +arm_mmu_idx_to_security_space(CPUARMState *env, ARMMMUIdx mmu_idx) { - S1Translate ptw = { - .in_mmu_idx = mmu_idx, - }; ARMSecuritySpace ss; switch (mmu_idx) { @@ -3604,6 +3595,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, ss = ARMSS_Secure; break; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: if (arm_feature(env, ARM_FEATURE_AARCH64) && cpu_isar_feature(aa64_rme, env_archcpu(env))) { ss = ARMSS_Root; @@ -3621,27 +3614,33 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, g_assert_not_reached(); } - ptw.in_space = ss; - return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); + return ss; } -hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, - MemTxAttrs *attrs) +bool get_phys_addr(CPUARMState *env, vaddr address, + MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - ARMMMUIdx mmu_idx = arm_mmu_idx(env); - ARMSecuritySpace ss = arm_security_space(env); S1Translate ptw = { .in_mmu_idx = mmu_idx, - .in_space = ss, + .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), + }; + + return get_phys_addr_gpc(env, &ptw, address, access_type, + memop, result, fi); +} + +static hwaddr arm_cpu_get_phys_page(CPUARMState *env, vaddr addr, + MemTxAttrs *attrs, ARMMMUIdx mmu_idx) +{ + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), .in_debug = true, }; GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - bool ret; - - ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi); + bool ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, 0, &res, &fi); *attrs = res.f.attrs; if (ret) { @@ -3649,3 +3648,33 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, } return res.f.phys_addr; } + +hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, + MemTxAttrs *attrs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + + hwaddr res = arm_cpu_get_phys_page(env, addr, attrs, mmu_idx); + + if (res != -1) { + return res; + } + + /* + * Memory may be accessible for an "unprivileged load/store" variant. + * In this case, get_a64_user_mem_index function generates an op using an + * unprivileged mmu idx, so we need to try with it. + */ + switch (mmu_idx) { + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E10_0); + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E20_0); + default: + return -1; + } +} diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c index 152b172..5e5166c 100644 --- a/target/arm/tcg-stubs.c +++ b/target/arm/tcg-stubs.c @@ -21,7 +21,30 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, { g_assert_not_reached(); } -/* Temporarily while cpu_get_tb_cpu_state() is still in common code */ -void assert_hflags_rebuild_correctly(CPUARMState *env) + +/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */ +void define_tlb_insn_regs(ARMCPU *cpu) +{ +} + +/* With KVM, we never use float_status, so these can be no-ops */ +void arm_set_default_fp_behaviours(float_status *s) +{ +} + +void arm_set_ah_fp_behaviours(float_status *s) +{ +} + +uint32_t vfp_get_fpsr_from_host(CPUARMState *env) +{ + return 0; +} + +void vfp_clear_float_status_exc_flags(CPUARMState *env) +{ +} + +void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) { } diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 2b7a325..8c798cd 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -21,28 +21,40 @@ %rd 0:5 %esz_sd 22:1 !function=plus_2 +%esz_hs 22:1 !function=plus_1 %esz_hsd 22:2 !function=xor_2 %hl 11:1 21:1 %hlm 11:1 20:2 &r rn +&rrr rd rn rm &ri rd imm +&rr rd rn +&rr_sf rd rn sf &rri_sf rd rn imm sf +&rrr_sf rd rn rm sf &i imm &rr_e rd rn esz +&rri_e rd rn imm esz &rrr_e rd rn rm esz &rrx_e rd rn rm idx esz &rrrr_e rd rn rm ra esz &qrr_e q rd rn esz +&qrri_e q rd rn imm esz &qrrr_e q rd rn rm esz &qrrx_e q rd rn rm idx esz &qrrrr_e q rd rn rm ra esz @rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 +@rr_s ........ ... ..... ...... rn:5 rd:5 &rr_e esz=2 @rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 +@rr_e ........ esz:2 . ..... ...... rn:5 rd:5 &rr_e @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd +@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd +@rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0 @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 +@rrr_s ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=2 @rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3 @rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd @rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd @@ -54,13 +66,23 @@ @rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3 @rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0 +@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2 @r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0 @rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0 @rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3 @rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3 +@qrr_b . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=0 +@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1 +@qrr_s . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=2 +@qrr_bh . q:1 ...... . esz:1 ...... ...... rn:5 rd:5 &qrr_e +@qrr_hs . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_hs +@qrr_sd . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_sd +@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e + @qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0 @qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1 +@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2 @qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd @qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e @qr2r_e . q:1 ...... esz:2 . ..... ...... rm:5 rd:5 &qrrr_e rn=%rd @@ -154,7 +176,7 @@ UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32 EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1 EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0 -# Branches +### Branches %imm26 0:s26 !function=times_4 @branch . ..... .......................... &i imm=%imm26 @@ -238,6 +260,9 @@ WFIT 1101 0101 0000 0011 0001 0000 001 rd:5 CLREX 1101 0101 0000 0011 0011 ---- 010 11111 DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111 +# For the DSB nXS variant, types always equals MBReqTypes_All and we ignore the +# domain bits. +DSB_nXS 1101 0101 0000 0011 0011 -- 10 001 11111 ISB 1101 0101 0000 0011 0011 ---- 110 11111 SB 1101 0101 0000 0011 0011 0000 111 11111 @@ -284,7 +309,7 @@ HLT 1101 0100 010 ................ 000 00 @i16 # DCPS2 1101 0100 101 ................ 000 10 @i16 # DCPS3 1101 0100 101 ................ 000 11 @i16 -# Loads and stores +### Loads and stores &stxr rn rt rt2 rs sz lasr &stlr rn rt sz lasr @@ -519,7 +544,7 @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm &ldapr_stlr_i rn rt imm sz sign ext -@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i +@ldapr_stlr_i .. ...... .. . imm:s9 .. rn:5 rt:5 &ldapr_stlr_i STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 @@ -642,6 +667,138 @@ CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy +### Data Processing (register) + +# Data Processing (2-source) + +@rrr . .......... rm:5 ...... rn:5 rd:5 &rrr +@rrr_sf sf:1 .......... rm:5 ...... rn:5 rd:5 &rrr_sf + +UDIV . 00 11010110 ..... 00001 0 ..... ..... @rrr_sf +SDIV . 00 11010110 ..... 00001 1 ..... ..... @rrr_sf +LSLV . 00 11010110 ..... 00100 0 ..... ..... @rrr_sf +LSRV . 00 11010110 ..... 00100 1 ..... ..... @rrr_sf +ASRV . 00 11010110 ..... 00101 0 ..... ..... @rrr_sf +RORV . 00 11010110 ..... 00101 1 ..... ..... @rrr_sf + +CRC32 0 00 11010110 ..... 0100 00 ..... ..... @rrr_b +CRC32 0 00 11010110 ..... 0100 01 ..... ..... @rrr_h +CRC32 0 00 11010110 ..... 0100 10 ..... ..... @rrr_s +CRC32 1 00 11010110 ..... 0100 11 ..... ..... @rrr_d + +CRC32C 0 00 11010110 ..... 0101 00 ..... ..... @rrr_b +CRC32C 0 00 11010110 ..... 0101 01 ..... ..... @rrr_h +CRC32C 0 00 11010110 ..... 0101 10 ..... ..... @rrr_s +CRC32C 1 00 11010110 ..... 0101 11 ..... ..... @rrr_d + +SUBP 1 00 11010110 ..... 000000 ..... ..... @rrr +SUBPS 1 01 11010110 ..... 000000 ..... ..... @rrr +IRG 1 00 11010110 ..... 000100 ..... ..... @rrr +GMI 1 00 11010110 ..... 000101 ..... ..... @rrr + +PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr + +# Data Processing (1-source) + +@rr . .......... ..... ...... rn:5 rd:5 &rr +@rr_sf sf:1 .......... ..... ...... rn:5 rd:5 &rr_sf + +RBIT . 10 11010110 00000 000000 ..... ..... @rr_sf +REV16 . 10 11010110 00000 000001 ..... ..... @rr_sf +REV32 . 10 11010110 00000 000010 ..... ..... @rr_sf +REV64 1 10 11010110 00000 000011 ..... ..... @rr + +CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf +CLS . 10 11010110 00000 000101 ..... ..... @rr_sf + +&pacaut rd rn z +@pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut + +PACIA 1 10 11010110 00001 00.000 ..... ..... @pacaut +PACIB 1 10 11010110 00001 00.001 ..... ..... @pacaut +PACDA 1 10 11010110 00001 00.010 ..... ..... @pacaut +PACDB 1 10 11010110 00001 00.011 ..... ..... @pacaut + +AUTIA 1 10 11010110 00001 00.100 ..... ..... @pacaut +AUTIB 1 10 11010110 00001 00.101 ..... ..... @pacaut +AUTDA 1 10 11010110 00001 00.110 ..... ..... @pacaut +AUTDB 1 10 11010110 00001 00.111 ..... ..... @pacaut + +XPACI 1 10 11010110 00001 010000 11111 rd:5 +XPACD 1 10 11010110 00001 010001 11111 rd:5 + +# Logical (shifted reg) + +&logic_shift rd rn rm sf sa st n +@logic_shift sf:1 .. ..... st:2 n:1 rm:5 sa:6 rn:5 rd:5 &logic_shift + +AND_r . 00 01010 .. . ..... ...... ..... ..... @logic_shift +ORR_r . 01 01010 .. . ..... ...... ..... ..... @logic_shift +EOR_r . 10 01010 .. . ..... ...... ..... ..... @logic_shift +ANDS_r . 11 01010 .. . ..... ...... ..... ..... @logic_shift + +# Add/subtract (shifted reg) + +&addsub_shift rd rn rm sf sa st +@addsub_shift sf:1 .. ..... st:2 . rm:5 sa:6 rn:5 rd:5 &addsub_shift + +ADD_r . 00 01011 .. 0 ..... ...... ..... ..... @addsub_shift +SUB_r . 10 01011 .. 0 ..... ...... ..... ..... @addsub_shift +ADDS_r . 01 01011 .. 0 ..... ...... ..... ..... @addsub_shift +SUBS_r . 11 01011 .. 0 ..... ...... ..... ..... @addsub_shift + +# Add/subtract (extended reg) + +&addsub_ext rd rn rm sf sa st +@addsub_ext sf:1 .. ........ rm:5 st:3 sa:3 rn:5 rd:5 &addsub_ext + +ADD_ext . 00 01011001 ..... ... ... ..... ..... @addsub_ext +SUB_ext . 10 01011001 ..... ... ... ..... ..... @addsub_ext +ADDS_ext . 01 01011001 ..... ... ... ..... ..... @addsub_ext +SUBS_ext . 11 01011001 ..... ... ... ..... ..... @addsub_ext + +# Add/subtract (carry) + +ADC . 00 11010000 ..... 000000 ..... ..... @rrr_sf +ADCS . 01 11010000 ..... 000000 ..... ..... @rrr_sf +SBC . 10 11010000 ..... 000000 ..... ..... @rrr_sf +SBCS . 11 11010000 ..... 000000 ..... ..... @rrr_sf + +# Rotate right into flags + +RMIF 1 01 11010000 imm:6 00001 rn:5 0 mask:4 + +# Evaluate into flags + +SETF8 0 01 11010000 00000 000010 rn:5 01101 +SETF16 0 01 11010000 00000 010010 rn:5 01101 + +# Conditional compare + +CCMP sf:1 op:1 1 11010010 y:5 cond:4 imm:1 0 rn:5 0 nzcv:4 + +# Conditional select + +CSEL sf:1 else_inv:1 011010100 rm:5 cond:4 0 else_inc:1 rn:5 rd:5 + +# Data Processing (3-source) + +&rrrr rd rn rm ra +@rrrr . .. ........ rm:5 . ra:5 rn:5 rd:5 &rrrr + +MADD_w 0 00 11011000 ..... 0 ..... ..... ..... @rrrr +MSUB_w 0 00 11011000 ..... 1 ..... ..... ..... @rrrr +MADD_x 1 00 11011000 ..... 0 ..... ..... ..... @rrrr +MSUB_x 1 00 11011000 ..... 1 ..... ..... ..... @rrrr + +SMADDL 1 00 11011001 ..... 0 ..... ..... ..... @rrrr +SMSUBL 1 00 11011001 ..... 1 ..... ..... ..... @rrrr +UMADDL 1 00 11011101 ..... 0 ..... ..... ..... @rrrr +UMSUBL 1 00 11011101 ..... 1 ..... ..... ..... @rrrr + +SMULH 1 00 11011010 ..... 0 11111 ..... ..... @rrr +UMULH 1 00 11011110 ..... 0 11111 ..... ..... @rrr + ### Cryptographic AES AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0 @@ -781,6 +938,16 @@ CMEQ_s 0111 1110 111 ..... 10001 1 ..... ..... @rrr_d SQDMULH_s 0101 1110 ..1 ..... 10110 1 ..... ..... @rrr_e SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e +SQRDMLAH_s 0111 1110 ..0 ..... 10000 1 ..... ..... @rrr_e +SQRDMLSH_s 0111 1110 ..0 ..... 10001 1 ..... ..... @rrr_e + +# Decode scalar x scalar as scalar x indexed, with index 0. +SQDMULL_si 0101 1110 011 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=1 +SQDMULL_si 0101 1110 101 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=2 +SQDMLAL_si 0101 1110 011 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=1 +SQDMLAL_si 0101 1110 101 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=2 +SQDMLSL_si 0101 1110 011 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=1 +SQDMLSL_si 0101 1110 101 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=2 ### Advanced SIMD scalar pairwise @@ -941,6 +1108,59 @@ MLS_v 0.10 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e SQDMULH_v 0.00 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e SQRDMULH_v 0.10 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e +SQRDMLAH_v 0.10 1110 ..0 ..... 10000 1 ..... ..... @qrrr_e +SQRDMLSH_v 0.10 1110 ..0 ..... 10001 1 ..... ..... @qrrr_e + +SDOT_v 0.00 1110 100 ..... 10010 1 ..... ..... @qrrr_s +UDOT_v 0.10 1110 100 ..... 10010 1 ..... ..... @qrrr_s +USDOT_v 0.00 1110 100 ..... 10011 1 ..... ..... @qrrr_s +BFDOT_v 0.10 1110 010 ..... 11111 1 ..... ..... @qrrr_s +BFMLAL_v 0.10 1110 110 ..... 11111 1 ..... ..... @qrrr_h +BFMMLA 0110 1110 010 ..... 11101 1 ..... ..... @rrr_q1e0 +SMMLA 0100 1110 100 ..... 10100 1 ..... ..... @rrr_q1e0 +UMMLA 0110 1110 100 ..... 10100 1 ..... ..... @rrr_q1e0 +USMMLA 0100 1110 100 ..... 10101 1 ..... ..... @rrr_q1e0 + +FCADD_90 0.10 1110 ..0 ..... 11100 1 ..... ..... @qrrr_e +FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e + +FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5 + +SMULL_v 0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e +UMULL_v 0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e +SMLAL_v 0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e +UMLAL_v 0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e +SMLSL_v 0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e +UMLSL_v 0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e + +SADDL_v 0.00 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e +UADDL_v 0.10 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e +SSUBL_v 0.00 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e +USUBL_v 0.10 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e +SABAL_v 0.00 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e +UABAL_v 0.10 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e +SABDL_v 0.00 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e +UABDL_v 0.10 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e + +SQDMULL_v 0.00 1110 011 ..... 11010 0 ..... ..... @qrrr_h +SQDMULL_v 0.00 1110 101 ..... 11010 0 ..... ..... @qrrr_s +SQDMLAL_v 0.00 1110 011 ..... 10010 0 ..... ..... @qrrr_h +SQDMLAL_v 0.00 1110 101 ..... 10010 0 ..... ..... @qrrr_s +SQDMLSL_v 0.00 1110 011 ..... 10110 0 ..... ..... @qrrr_h +SQDMLSL_v 0.00 1110 101 ..... 10110 0 ..... ..... @qrrr_s + +SADDW 0.00 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e +UADDW 0.10 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e +SSUBW 0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e +USUBW 0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e + +ADDHN 0.00 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e +RADDHN 0.10 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e +SUBHN 0.00 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e +RSUBHN 0.10 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e + +PMULL_p8 0.00 1110 001 ..... 11100 0 ..... ..... @qrrr_b +PMULL_p64 0.00 1110 111 ..... 11100 0 ..... ..... @qrrr_b ### Advanced SIMD scalar x indexed element @@ -966,6 +1186,21 @@ SQDMULH_si 0101 1111 10 .. .... 1100 . 0 ..... ..... @rrx_s SQRDMULH_si 0101 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h SQRDMULH_si 0101 1111 10 . ..... 1101 . 0 ..... ..... @rrx_s +SQRDMLAH_si 0111 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h +SQRDMLAH_si 0111 1111 10 .. .... 1101 . 0 ..... ..... @rrx_s + +SQRDMLSH_si 0111 1111 01 .. .... 1111 . 0 ..... ..... @rrx_h +SQRDMLSH_si 0111 1111 10 .. .... 1111 . 0 ..... ..... @rrx_s + +SQDMULL_si 0101 1111 01 .. .... 1011 . 0 ..... ..... @rrx_h +SQDMULL_si 0101 1111 10 . ..... 1011 . 0 ..... ..... @rrx_s + +SQDMLAL_si 0101 1111 01 .. .... 0011 . 0 ..... ..... @rrx_h +SQDMLAL_si 0101 1111 10 . ..... 0011 . 0 ..... ..... @rrx_s + +SQDMLSL_si 0101 1111 01 .. .... 0111 . 0 ..... ..... @rrx_h +SQDMLSL_si 0101 1111 10 . ..... 0111 . 0 ..... ..... @rrx_s + ### Advanced SIMD vector x indexed element FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h @@ -1004,6 +1239,47 @@ SQDMULH_vi 0.00 1111 10 . ..... 1100 . 0 ..... ..... @qrrx_s SQRDMULH_vi 0.00 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h SQRDMULH_vi 0.00 1111 10 . ..... 1101 . 0 ..... ..... @qrrx_s +SQRDMLAH_vi 0.10 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h +SQRDMLAH_vi 0.10 1111 10 .. .... 1101 . 0 ..... ..... @qrrx_s + +SQRDMLSH_vi 0.10 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_h +SQRDMLSH_vi 0.10 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s + +SDOT_vi 0.00 1111 10 .. .... 1110 . 0 ..... ..... @qrrx_s +UDOT_vi 0.10 1111 10 .. .... 1110 . 0 ..... ..... @qrrx_s +SUDOT_vi 0.00 1111 00 .. .... 1111 . 0 ..... ..... @qrrx_s +USDOT_vi 0.00 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s +BFDOT_vi 0.00 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_s +BFMLAL_vi 0.00 1111 11 .. .... 1111 . 0 ..... ..... @qrrx_h + +FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0 +FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1 +FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1 + +SMULL_vi 0.00 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h +SMULL_vi 0.00 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s +UMULL_vi 0.10 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h +UMULL_vi 0.10 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s + +SMLAL_vi 0.00 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h +SMLAL_vi 0.00 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s +UMLAL_vi 0.10 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h +UMLAL_vi 0.10 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s + +SMLSL_vi 0.00 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h +SMLSL_vi 0.00 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s +UMLSL_vi 0.10 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h +UMLSL_vi 0.10 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s + +SQDMULL_vi 0.00 1111 01 .. .... 1011 . 0 ..... ..... @qrrx_h +SQDMULL_vi 0.00 1111 10 . ..... 1011 . 0 ..... ..... @qrrx_s + +SQDMLAL_vi 0.00 1111 01 .. .... 0011 . 0 ..... ..... @qrrx_h +SQDMLAL_vi 0.00 1111 10 . ..... 0011 . 0 ..... ..... @qrrx_s + +SQDMLSL_vi 0.00 1111 01 .. .... 0111 . 0 ..... ..... @qrrx_h +SQDMLSL_vi 0.00 1111 10 . ..... 0111 . 0 ..... ..... @qrrx_s + # Floating-point conditional select FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd @@ -1016,3 +1292,605 @@ FMADD 0001 1111 .. 0 ..... 0 ..... ..... ..... @rrrr_hsd FMSUB 0001 1111 .. 0 ..... 1 ..... ..... ..... @rrrr_hsd FNMADD 0001 1111 .. 1 ..... 0 ..... ..... ..... @rrrr_hsd FNMSUB 0001 1111 .. 1 ..... 1 ..... ..... ..... @rrrr_hsd + +# Advanced SIMD Extract + +EXT_d 0010 1110 00 0 rm:5 00 imm:3 0 rn:5 rd:5 +EXT_q 0110 1110 00 0 rm:5 0 imm:4 0 rn:5 rd:5 + +# Advanced SIMD Table Lookup + +TBL_TBX 0 q:1 00 1110 000 rm:5 0 len:2 tbx:1 00 rn:5 rd:5 + +# Advanced SIMD Permute + +UZP1 0.00 1110 .. 0 ..... 0 001 10 ..... ..... @qrrr_e +UZP2 0.00 1110 .. 0 ..... 0 101 10 ..... ..... @qrrr_e +TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e +TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e +ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e +ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e + +# Advanced SIMD Across Lanes + +ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e +SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e +UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e +SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e +UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e +SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e +UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e + +FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h +FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2 + +FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h +FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2 + +FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h +FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2 + +FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h +FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2 + +# Conversion between floating-point and fixed-point (general register) + +&fcvt rd rn esz sf shift +%fcvt_shift32 10:5 !function=rsub_32 +%fcvt_shift64 10:6 !function=rsub_64 + +@fcvt32 0 ....... .. ...... 1..... rn:5 rd:5 \ + &fcvt sf=0 esz=%esz_hsd shift=%fcvt_shift32 +@fcvt64 1 ....... .. ...... ...... rn:5 rd:5 \ + &fcvt sf=1 esz=%esz_hsd shift=%fcvt_shift64 + +SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt32 +SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt64 +UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt32 +UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt64 + +FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt32 +FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt64 +FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt32 +FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt64 + +# Conversion between floating-point and integer (general register) + +@icvt sf:1 ....... .. ...... ...... rn:5 rd:5 \ + &fcvt esz=%esz_hsd shift=0 + +SCVTF_g . 0011110 .. 100010 000000 ..... ..... @icvt +UCVTF_g . 0011110 .. 100011 000000 ..... ..... @icvt + +FCVTNS_g . 0011110 .. 100000 000000 ..... ..... @icvt +FCVTNU_g . 0011110 .. 100001 000000 ..... ..... @icvt +FCVTPS_g . 0011110 .. 101000 000000 ..... ..... @icvt +FCVTPU_g . 0011110 .. 101001 000000 ..... ..... @icvt +FCVTMS_g . 0011110 .. 110000 000000 ..... ..... @icvt +FCVTMU_g . 0011110 .. 110001 000000 ..... ..... @icvt +FCVTZS_g . 0011110 .. 111000 000000 ..... ..... @icvt +FCVTZU_g . 0011110 .. 111001 000000 ..... ..... @icvt +FCVTAS_g . 0011110 .. 100100 000000 ..... ..... @icvt +FCVTAU_g . 0011110 .. 100101 000000 ..... ..... @icvt + +FJCVTZS 0 0011110 01 111110 000000 ..... ..... @rr + +FMOV_ws 0 0011110 00 100110 000000 ..... ..... @rr +FMOV_sw 0 0011110 00 100111 000000 ..... ..... @rr + +FMOV_xd 1 0011110 01 100110 000000 ..... ..... @rr +FMOV_dx 1 0011110 01 100111 000000 ..... ..... @rr + +# Move to/from upper half of 128-bit +FMOV_xu 1 0011110 10 101110 000000 ..... ..... @rr +FMOV_ux 1 0011110 10 101111 000000 ..... ..... @rr + +# Half-precision allows both sf=0 and sf=1 with identical results +FMOV_xh - 0011110 11 100110 000000 ..... ..... @rr +FMOV_hx - 0011110 11 100111 000000 ..... ..... @rr + +# Floating-point data processing (1 source) + +FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd +FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd +FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd +FSQRT_s 00011110 .. 1 000011 10000 ..... ..... @rr_hsd + +FRINTN_s 00011110 .. 1 001000 10000 ..... ..... @rr_hsd +FRINTP_s 00011110 .. 1 001001 10000 ..... ..... @rr_hsd +FRINTM_s 00011110 .. 1 001010 10000 ..... ..... @rr_hsd +FRINTZ_s 00011110 .. 1 001011 10000 ..... ..... @rr_hsd +FRINTA_s 00011110 .. 1 001100 10000 ..... ..... @rr_hsd +FRINTX_s 00011110 .. 1 001110 10000 ..... ..... @rr_hsd +FRINTI_s 00011110 .. 1 001111 10000 ..... ..... @rr_hsd + +BFCVT_s 00011110 01 1 000110 10000 ..... ..... @rr_s + +FRINT32Z_s 00011110 0. 1 010000 10000 ..... ..... @rr_sd +FRINT32X_s 00011110 0. 1 010001 10000 ..... ..... @rr_sd +FRINT64Z_s 00011110 0. 1 010010 10000 ..... ..... @rr_sd +FRINT64X_s 00011110 0. 1 010011 10000 ..... ..... @rr_sd + +FCVT_s_ds 00011110 00 1 000101 10000 ..... ..... @rr +FCVT_s_hs 00011110 00 1 000111 10000 ..... ..... @rr +FCVT_s_sd 00011110 01 1 000100 10000 ..... ..... @rr +FCVT_s_hd 00011110 01 1 000111 10000 ..... ..... @rr +FCVT_s_sh 00011110 11 1 000100 10000 ..... ..... @rr +FCVT_s_dh 00011110 11 1 000101 10000 ..... ..... @rr + +# Floating-point Immediate + +FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd + +# Floating-point Compare + +FCMP 00011110 .. 1 rm:5 001000 rn:5 e:1 z:1 000 esz=%esz_hsd + +# Floating-point Conditional Compare + +FCCMP 00011110 .. 1 rm:5 cond:4 01 rn:5 e:1 nzcv:4 esz=%esz_hsd + +# Advanced SIMD Modified Immediate / Shift by Immediate + +%abcdefgh 16:3 5:5 + +# Right shifts are encoded as N - shift, where N is the element size in bits. +%neon_rshift_i6 16:6 !function=rsub_64 +%neon_rshift_i5 16:5 !function=rsub_32 +%neon_rshift_i4 16:4 !function=rsub_16 +%neon_rshift_i3 16:3 !function=rsub_8 + +@q_shri_b . q:1 .. ..... 0001 ... ..... . rn:5 rd:5 \ + &qrri_e esz=0 imm=%neon_rshift_i3 +@q_shri_h . q:1 .. ..... 001 .... ..... . rn:5 rd:5 \ + &qrri_e esz=1 imm=%neon_rshift_i4 +@q_shri_s . q:1 .. ..... 01 ..... ..... . rn:5 rd:5 \ + &qrri_e esz=2 imm=%neon_rshift_i5 +@q_shri_d . 1 .. ..... 1 ...... ..... . rn:5 rd:5 \ + &qrri_e esz=3 imm=%neon_rshift_i6 q=1 + +@q_shli_b . q:1 .. ..... 0001 imm:3 ..... . rn:5 rd:5 &qrri_e esz=0 +@q_shli_h . q:1 .. ..... 001 imm:4 ..... . rn:5 rd:5 &qrri_e esz=1 +@q_shli_s . q:1 .. ..... 01 imm:5 ..... . rn:5 rd:5 &qrri_e esz=2 +@q_shli_d . 1 .. ..... 1 imm:6 ..... . rn:5 rd:5 &qrri_e esz=3 q=1 + +FMOVI_v_h 0 q:1 00 1111 00000 ... 1111 11 ..... rd:5 %abcdefgh + +# MOVI, MVNI, ORR, BIC, FMOV are all intermixed via cmode. +Vimm 0 q:1 op:1 0 1111 00000 ... cmode:4 01 ..... rd:5 %abcdefgh + +SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_b +SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_h +SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_s +SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_d + +USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_b +USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_h +USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_s +USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_d + +SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_b +SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_h +SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_s +SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_d + +USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_b +USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_h +USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_s +USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_d + +SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_b +SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_h +SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_s +SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_d + +URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_b +URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_h +URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_s +URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_d + +SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_b +SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_h +SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_s +SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_d + +URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_b +URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_h +URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_s +URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_d + +SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_b +SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_h +SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_s +SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_d + +SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_b +SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_h +SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_s +SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_d + +SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_b +SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_h +SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_s +SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_d + +SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_b +SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_h +SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_s + +USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_b +USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_h +USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_s + +SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_b +SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_h +SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_s + +RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_b +RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_h +RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_s + +SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_b +SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_h +SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_s +SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_d + +UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_b +UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_h +UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_s +UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_d + +SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_b +SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_h +SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_s +SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_d + +SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_b +SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_h +SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_s + +UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_b +UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_h +UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_s + +SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_b +SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_h +SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_s + +SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_b +SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_h +SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_s + +UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_b +UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_h +UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_s + +SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_b +SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_h +SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_s + +# Advanced SIMD scalar shift by immediate + +@shri_b .... ..... 0001 ... ..... . rn:5 rd:5 \ + &rri_e esz=0 imm=%neon_rshift_i3 +@shri_h .... ..... 001 .... ..... . rn:5 rd:5 \ + &rri_e esz=1 imm=%neon_rshift_i4 +@shri_s .... ..... 01 ..... ..... . rn:5 rd:5 \ + &rri_e esz=2 imm=%neon_rshift_i5 +@shri_d .... ..... 1 ...... ..... . rn:5 rd:5 \ + &rri_e esz=3 imm=%neon_rshift_i6 + +@shli_b .... ..... 0001 imm:3 ..... . rn:5 rd:5 &rri_e esz=0 +@shli_h .... ..... 001 imm:4 ..... . rn:5 rd:5 &rri_e esz=1 +@shli_s .... ..... 01 imm:5 ..... . rn:5 rd:5 &rri_e esz=2 +@shli_d .... ..... 1 imm:6 ..... . rn:5 rd:5 &rri_e esz=3 + +SSHR_s 0101 11110 .... ... 00000 1 ..... ..... @shri_d +USHR_s 0111 11110 .... ... 00000 1 ..... ..... @shri_d +SSRA_s 0101 11110 .... ... 00010 1 ..... ..... @shri_d +USRA_s 0111 11110 .... ... 00010 1 ..... ..... @shri_d +SRSHR_s 0101 11110 .... ... 00100 1 ..... ..... @shri_d +URSHR_s 0111 11110 .... ... 00100 1 ..... ..... @shri_d +SRSRA_s 0101 11110 .... ... 00110 1 ..... ..... @shri_d +URSRA_s 0111 11110 .... ... 00110 1 ..... ..... @shri_d +SRI_s 0111 11110 .... ... 01000 1 ..... ..... @shri_d + +SHL_s 0101 11110 .... ... 01010 1 ..... ..... @shli_d +SLI_s 0111 11110 .... ... 01010 1 ..... ..... @shli_d + +SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_b +SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_h +SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_s +SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_d + +UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_b +UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_h +UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_s +UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_d + +SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_b +SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_h +SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_s +SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_d + +SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_b +SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_h +SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_s + +UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_b +UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_h +UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_s + +SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_b +SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_h +SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_s + +SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_b +SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_h +SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_s + +UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_b +UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_h +UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_s + +SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b +SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h +SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s + +# Advanced SIMD scalar two-register miscellaneous + +SQABS_s 0101 1110 ..1 00000 01111 0 ..... ..... @rr_e +SQNEG_s 0111 1110 ..1 00000 01111 0 ..... ..... @rr_e +ABS_s 0101 1110 111 00000 10111 0 ..... ..... @rr +NEG_s 0111 1110 111 00000 10111 0 ..... ..... @rr +CMGT0_s 0101 1110 111 00000 10001 0 ..... ..... @rr +CMGE0_s 0111 1110 111 00000 10001 0 ..... ..... @rr +CMEQ0_s 0101 1110 111 00000 10011 0 ..... ..... @rr +CMLE0_s 0111 1110 111 00000 10011 0 ..... ..... @rr +CMLT0_s 0101 1110 111 00000 10101 0 ..... ..... @rr + +SQXTUN_s 0111 1110 ..1 00001 00101 0 ..... ..... @rr_e +SQXTN_s 0101 1110 ..1 00001 01001 0 ..... ..... @rr_e +UQXTN_s 0111 1110 ..1 00001 01001 0 ..... ..... @rr_e + +FCVTXN_s 0111 1110 011 00001 01101 0 ..... ..... @rr_s + +FCMGT0_s 0101 1110 111 11000 11001 0 ..... ..... @rr_h +FCMGT0_s 0101 1110 1.1 00000 11001 0 ..... ..... @rr_sd + +FCMGE0_s 0111 1110 111 11000 11001 0 ..... ..... @rr_h +FCMGE0_s 0111 1110 1.1 00000 11001 0 ..... ..... @rr_sd + +FCMEQ0_s 0101 1110 111 11000 11011 0 ..... ..... @rr_h +FCMEQ0_s 0101 1110 1.1 00000 11011 0 ..... ..... @rr_sd + +FCMLE0_s 0111 1110 111 11000 11011 0 ..... ..... @rr_h +FCMLE0_s 0111 1110 1.1 00000 11011 0 ..... ..... @rr_sd + +FCMLT0_s 0101 1110 111 11000 11101 0 ..... ..... @rr_h +FCMLT0_s 0101 1110 1.1 00000 11101 0 ..... ..... @rr_sd + +FRECPE_s 0101 1110 111 11001 11011 0 ..... ..... @rr_h +FRECPE_s 0101 1110 1.1 00001 11011 0 ..... ..... @rr_sd + +FRECPX_s 0101 1110 111 11001 11111 0 ..... ..... @rr_h +FRECPX_s 0101 1110 1.1 00001 11111 0 ..... ..... @rr_sd + +FRSQRTE_s 0111 1110 111 11001 11011 0 ..... ..... @rr_h +FRSQRTE_s 0111 1110 1.1 00001 11011 0 ..... ..... @rr_sd + +@icvt_h . ....... .. ...... ...... rn:5 rd:5 \ + &fcvt sf=0 esz=1 shift=0 +@icvt_sd . ....... .. ...... ...... rn:5 rd:5 \ + &fcvt sf=0 esz=%esz_sd shift=0 + +SCVTF_f 0101 1110 011 11001 11011 0 ..... ..... @icvt_h +SCVTF_f 0101 1110 0.1 00001 11011 0 ..... ..... @icvt_sd + +UCVTF_f 0111 1110 011 11001 11011 0 ..... ..... @icvt_h +UCVTF_f 0111 1110 0.1 00001 11011 0 ..... ..... @icvt_sd + +FCVTNS_f 0101 1110 011 11001 10101 0 ..... ..... @icvt_h +FCVTNS_f 0101 1110 0.1 00001 10101 0 ..... ..... @icvt_sd +FCVTNU_f 0111 1110 011 11001 10101 0 ..... ..... @icvt_h +FCVTNU_f 0111 1110 0.1 00001 10101 0 ..... ..... @icvt_sd + +FCVTPS_f 0101 1110 111 11001 10101 0 ..... ..... @icvt_h +FCVTPS_f 0101 1110 1.1 00001 10101 0 ..... ..... @icvt_sd +FCVTPU_f 0111 1110 111 11001 10101 0 ..... ..... @icvt_h +FCVTPU_f 0111 1110 1.1 00001 10101 0 ..... ..... @icvt_sd + +FCVTMS_f 0101 1110 011 11001 10111 0 ..... ..... @icvt_h +FCVTMS_f 0101 1110 0.1 00001 10111 0 ..... ..... @icvt_sd +FCVTMU_f 0111 1110 011 11001 10111 0 ..... ..... @icvt_h +FCVTMU_f 0111 1110 0.1 00001 10111 0 ..... ..... @icvt_sd + +FCVTZS_f 0101 1110 111 11001 10111 0 ..... ..... @icvt_h +FCVTZS_f 0101 1110 1.1 00001 10111 0 ..... ..... @icvt_sd +FCVTZU_f 0111 1110 111 11001 10111 0 ..... ..... @icvt_h +FCVTZU_f 0111 1110 1.1 00001 10111 0 ..... ..... @icvt_sd + +FCVTAS_f 0101 1110 011 11001 11001 0 ..... ..... @icvt_h +FCVTAS_f 0101 1110 0.1 00001 11001 0 ..... ..... @icvt_sd +FCVTAU_f 0111 1110 011 11001 11001 0 ..... ..... @icvt_h +FCVTAU_f 0111 1110 0.1 00001 11001 0 ..... ..... @icvt_sd + +%fcvt_f_sh_h 16:4 !function=rsub_16 +%fcvt_f_sh_s 16:5 !function=rsub_32 +%fcvt_f_sh_d 16:6 !function=rsub_64 + +@fcvt_fixed_h .... .... . 001 .... ...... rn:5 rd:5 \ + &fcvt sf=0 esz=1 shift=%fcvt_f_sh_h +@fcvt_fixed_s .... .... . 01 ..... ...... rn:5 rd:5 \ + &fcvt sf=0 esz=2 shift=%fcvt_f_sh_s +@fcvt_fixed_d .... .... . 1 ...... ...... rn:5 rd:5 \ + &fcvt sf=0 esz=3 shift=%fcvt_f_sh_d + +SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h +SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s +SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d + +UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h +UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s +UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d + +FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h +FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s +FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d + +FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h +FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s +FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d + +# Advanced SIMD two-register miscellaneous + +SQABS_v 0.00 1110 ..1 00000 01111 0 ..... ..... @qrr_e +SQNEG_v 0.10 1110 ..1 00000 01111 0 ..... ..... @qrr_e +ABS_v 0.00 1110 ..1 00000 10111 0 ..... ..... @qrr_e +NEG_v 0.10 1110 ..1 00000 10111 0 ..... ..... @qrr_e +CLS_v 0.00 1110 ..1 00000 01001 0 ..... ..... @qrr_e +CLZ_v 0.10 1110 ..1 00000 01001 0 ..... ..... @qrr_e +CNT_v 0.00 1110 001 00000 01011 0 ..... ..... @qrr_b +NOT_v 0.10 1110 001 00000 01011 0 ..... ..... @qrr_b +RBIT_v 0.10 1110 011 00000 01011 0 ..... ..... @qrr_b +CMGT0_v 0.00 1110 ..1 00000 10001 0 ..... ..... @qrr_e +CMGE0_v 0.10 1110 ..1 00000 10001 0 ..... ..... @qrr_e +CMEQ0_v 0.00 1110 ..1 00000 10011 0 ..... ..... @qrr_e +CMLE0_v 0.10 1110 ..1 00000 10011 0 ..... ..... @qrr_e +CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e + +REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b +REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh +REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e + +SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e +UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e +SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e +UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e + +XTN 0.00 1110 ..1 00001 00101 0 ..... ..... @qrr_e +SQXTUN_v 0.10 1110 ..1 00001 00101 0 ..... ..... @qrr_e +SQXTN_v 0.00 1110 ..1 00001 01001 0 ..... ..... @qrr_e +UQXTN_v 0.10 1110 ..1 00001 01001 0 ..... ..... @qrr_e + +FCVTN_v 0.00 1110 0.1 00001 01101 0 ..... ..... @qrr_hs +FCVTXN_v 0.10 1110 011 00001 01101 0 ..... ..... @qrr_s +BFCVTN_v 0.00 1110 101 00001 01101 0 ..... ..... @qrr_h + +SHLL_v 0.10 1110 ..1 00001 00111 0 ..... ..... @qrr_e + +FABS_v 0.00 1110 111 11000 11111 0 ..... ..... @qrr_h +FABS_v 0.00 1110 1.1 00000 11111 0 ..... ..... @qrr_sd + +FNEG_v 0.10 1110 111 11000 11111 0 ..... ..... @qrr_h +FNEG_v 0.10 1110 1.1 00000 11111 0 ..... ..... @qrr_sd + +FSQRT_v 0.10 1110 111 11001 11111 0 ..... ..... @qrr_h +FSQRT_v 0.10 1110 1.1 00001 11111 0 ..... ..... @qrr_sd + +FRINTN_v 0.00 1110 011 11001 10001 0 ..... ..... @qrr_h +FRINTN_v 0.00 1110 0.1 00001 10001 0 ..... ..... @qrr_sd + +FRINTM_v 0.00 1110 011 11001 10011 0 ..... ..... @qrr_h +FRINTM_v 0.00 1110 0.1 00001 10011 0 ..... ..... @qrr_sd + +FRINTP_v 0.00 1110 111 11001 10001 0 ..... ..... @qrr_h +FRINTP_v 0.00 1110 1.1 00001 10001 0 ..... ..... @qrr_sd + +FRINTZ_v 0.00 1110 111 11001 10011 0 ..... ..... @qrr_h +FRINTZ_v 0.00 1110 1.1 00001 10011 0 ..... ..... @qrr_sd + +FRINTA_v 0.10 1110 011 11001 10001 0 ..... ..... @qrr_h +FRINTA_v 0.10 1110 0.1 00001 10001 0 ..... ..... @qrr_sd + +FRINTX_v 0.10 1110 011 11001 10011 0 ..... ..... @qrr_h +FRINTX_v 0.10 1110 0.1 00001 10011 0 ..... ..... @qrr_sd + +FRINTI_v 0.10 1110 111 11001 10011 0 ..... ..... @qrr_h +FRINTI_v 0.10 1110 1.1 00001 10011 0 ..... ..... @qrr_sd + +FRINT32Z_v 0.00 1110 0.1 00001 11101 0 ..... ..... @qrr_sd +FRINT32X_v 0.10 1110 0.1 00001 11101 0 ..... ..... @qrr_sd +FRINT64Z_v 0.00 1110 0.1 00001 11111 0 ..... ..... @qrr_sd +FRINT64X_v 0.10 1110 0.1 00001 11111 0 ..... ..... @qrr_sd + +SCVTF_vi 0.00 1110 011 11001 11011 0 ..... ..... @qrr_h +SCVTF_vi 0.00 1110 0.1 00001 11011 0 ..... ..... @qrr_sd + +UCVTF_vi 0.10 1110 011 11001 11011 0 ..... ..... @qrr_h +UCVTF_vi 0.10 1110 0.1 00001 11011 0 ..... ..... @qrr_sd + +FCVTNS_vi 0.00 1110 011 11001 10101 0 ..... ..... @qrr_h +FCVTNS_vi 0.00 1110 0.1 00001 10101 0 ..... ..... @qrr_sd +FCVTNU_vi 0.10 1110 011 11001 10101 0 ..... ..... @qrr_h +FCVTNU_vi 0.10 1110 0.1 00001 10101 0 ..... ..... @qrr_sd + +FCVTPS_vi 0.00 1110 111 11001 10101 0 ..... ..... @qrr_h +FCVTPS_vi 0.00 1110 1.1 00001 10101 0 ..... ..... @qrr_sd +FCVTPU_vi 0.10 1110 111 11001 10101 0 ..... ..... @qrr_h +FCVTPU_vi 0.10 1110 1.1 00001 10101 0 ..... ..... @qrr_sd + +FCVTMS_vi 0.00 1110 011 11001 10111 0 ..... ..... @qrr_h +FCVTMS_vi 0.00 1110 0.1 00001 10111 0 ..... ..... @qrr_sd +FCVTMU_vi 0.10 1110 011 11001 10111 0 ..... ..... @qrr_h +FCVTMU_vi 0.10 1110 0.1 00001 10111 0 ..... ..... @qrr_sd + +FCVTZS_vi 0.00 1110 111 11001 10111 0 ..... ..... @qrr_h +FCVTZS_vi 0.00 1110 1.1 00001 10111 0 ..... ..... @qrr_sd +FCVTZU_vi 0.10 1110 111 11001 10111 0 ..... ..... @qrr_h +FCVTZU_vi 0.10 1110 1.1 00001 10111 0 ..... ..... @qrr_sd + +FCVTAS_vi 0.00 1110 011 11001 11001 0 ..... ..... @qrr_h +FCVTAS_vi 0.00 1110 0.1 00001 11001 0 ..... ..... @qrr_sd +FCVTAU_vi 0.10 1110 011 11001 11001 0 ..... ..... @qrr_h +FCVTAU_vi 0.10 1110 0.1 00001 11001 0 ..... ..... @qrr_sd + +FCMGT0_v 0.00 1110 111 11000 11001 0 ..... ..... @qrr_h +FCMGT0_v 0.00 1110 1.1 00000 11001 0 ..... ..... @qrr_sd + +FCMGE0_v 0.10 1110 111 11000 11001 0 ..... ..... @qrr_h +FCMGE0_v 0.10 1110 1.1 00000 11001 0 ..... ..... @qrr_sd + +FCMEQ0_v 0.00 1110 111 11000 11011 0 ..... ..... @qrr_h +FCMEQ0_v 0.00 1110 1.1 00000 11011 0 ..... ..... @qrr_sd + +FCMLE0_v 0.10 1110 111 11000 11011 0 ..... ..... @qrr_h +FCMLE0_v 0.10 1110 1.1 00000 11011 0 ..... ..... @qrr_sd + +FCMLT0_v 0.00 1110 111 11000 11101 0 ..... ..... @qrr_h +FCMLT0_v 0.00 1110 1.1 00000 11101 0 ..... ..... @qrr_sd + +FRECPE_v 0.00 1110 111 11001 11011 0 ..... ..... @qrr_h +FRECPE_v 0.00 1110 1.1 00001 11011 0 ..... ..... @qrr_sd + +FRSQRTE_v 0.10 1110 111 11001 11011 0 ..... ..... @qrr_h +FRSQRTE_v 0.10 1110 1.1 00001 11011 0 ..... ..... @qrr_sd + +URECPE_v 0.00 1110 101 00001 11001 0 ..... ..... @qrr_s +URSQRTE_v 0.10 1110 101 00001 11001 0 ..... ..... @qrr_s + +FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd + +&fcvt_q rd rn esz q shift +@fcvtq_h . q:1 . ...... 001 .... ...... rn:5 rd:5 \ + &fcvt_q esz=1 shift=%fcvt_f_sh_h +@fcvtq_s . q:1 . ...... 01 ..... ...... rn:5 rd:5 \ + &fcvt_q esz=2 shift=%fcvt_f_sh_s +@fcvtq_d . q:1 . ...... 1 ...... ...... rn:5 rd:5 \ + &fcvt_q esz=3 shift=%fcvt_f_sh_d + +SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_h +SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_s +SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_d + +UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_h +UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_s +UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_d + +FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_h +FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_s +FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_d + +FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_h +FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_s +FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_d diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c new file mode 100644 index 0000000..6701398 --- /dev/null +++ b/target/arm/tcg/arith_helper.c @@ -0,0 +1,297 @@ +/* + * ARM generic helpers for various arithmetical operations. + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qemu/crc32c.h" +#include <zlib.h> /* for crc32 */ + +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + +/* + * Note that signed overflow is undefined in C. The following routines are + * careful to use unsigned types where modulo arithmetic is required. + * Failure to do so _will_ break on newer gcc. + */ + +/* Signed saturating arithmetic. */ + +/* Perform 16-bit signed saturating addition. */ +static inline uint16_t add16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a + b; + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { + if (a & 0x8000) { + res = 0x8000; + } else { + res = 0x7fff; + } + } + return res; +} + +/* Perform 8-bit signed saturating addition. */ +static inline uint8_t add8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a + b; + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { + if (a & 0x80) { + res = 0x80; + } else { + res = 0x7f; + } + } + return res; +} + +/* Perform 16-bit signed saturating subtraction. */ +static inline uint16_t sub16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a - b; + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { + if (a & 0x8000) { + res = 0x8000; + } else { + res = 0x7fff; + } + } + return res; +} + +/* Perform 8-bit signed saturating subtraction. */ +static inline uint8_t sub8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a - b; + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { + if (a & 0x80) { + res = 0x80; + } else { + res = 0x7f; + } + } + return res; +} + +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); +#define PFX q + +#include "op_addsub.c.inc" + +/* Unsigned saturating arithmetic. */ +static inline uint16_t add16_usat(uint16_t a, uint16_t b) +{ + uint16_t res; + res = a + b; + if (res < a) { + res = 0xffff; + } + return res; +} + +static inline uint16_t sub16_usat(uint16_t a, uint16_t b) +{ + if (a > b) { + return a - b; + } else { + return 0; + } +} + +static inline uint8_t add8_usat(uint8_t a, uint8_t b) +{ + uint8_t res; + res = a + b; + if (res < a) { + res = 0xff; + } + return res; +} + +static inline uint8_t sub8_usat(uint8_t a, uint8_t b) +{ + if (a > b) { + return a - b; + } else { + return 0; + } +} + +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); +#define PFX uq + +#include "op_addsub.c.inc" + +/* Signed modulo arithmetic. */ +#define SARITH16(a, b, n, op) do { \ + int32_t sum; \ + sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ + RESULT(sum, n, 16); \ + if (sum >= 0) \ + ge |= 3 << (n * 2); \ + } while (0) + +#define SARITH8(a, b, n, op) do { \ + int32_t sum; \ + sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ + RESULT(sum, n, 8); \ + if (sum >= 0) \ + ge |= 1 << n; \ + } while (0) + + +#define ADD16(a, b, n) SARITH16(a, b, n, +) +#define SUB16(a, b, n) SARITH16(a, b, n, -) +#define ADD8(a, b, n) SARITH8(a, b, n, +) +#define SUB8(a, b, n) SARITH8(a, b, n, -) +#define PFX s +#define ARITH_GE + +#include "op_addsub.c.inc" + +/* Unsigned modulo arithmetic. */ +#define ADD16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 1) \ + ge |= 3 << (n * 2); \ + } while (0) + +#define ADD8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 1) \ + ge |= 1 << n; \ + } while (0) + +#define SUB16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 0) \ + ge |= 3 << (n * 2); \ + } while (0) + +#define SUB8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 0) \ + ge |= 1 << n; \ + } while (0) + +#define PFX u +#define ARITH_GE + +#include "op_addsub.c.inc" + +/* Halved signed arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) +#define PFX sh + +#include "op_addsub.c.inc" + +/* Halved unsigned arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define PFX uh + +#include "op_addsub.c.inc" + +static inline uint8_t do_usad(uint8_t a, uint8_t b) +{ + if (a > b) { + return a - b; + } else { + return b - a; + } +} + +/* Unsigned sum of absolute byte differences. */ +uint32_t HELPER(usad8)(uint32_t a, uint32_t b) +{ + uint32_t sum; + sum = do_usad(a, b); + sum += do_usad(a >> 8, b >> 8); + sum += do_usad(a >> 16, b >> 16); + sum += do_usad(a >> 24, b >> 24); + return sum; +} + +/* For ARMv6 SEL instruction. */ +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) +{ + uint32_t mask; + + mask = 0; + if (flags & 1) { + mask |= 0xff; + } + if (flags & 2) { + mask |= 0xff00; + } + if (flags & 4) { + mask |= 0xff0000; + } + if (flags & 8) { + mask |= 0xff000000; + } + return (a & mask) | (b & ~mask); +} + +/* + * CRC helpers. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + stl_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + stl_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c index c059c68..eddd711 100644 --- a/target/arm/tcg/cpu-v7m.c +++ b/target/arm/tcg/cpu-v7m.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "hw/core/tcg-cpu-ops.h" +#include "accel/tcg/cpu-ops.h" #include "internals.h" #if !defined(CONFIG_USER_ONLY) @@ -19,7 +19,6 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { - CPUClass *cc = CPU_GET_CLASS(cs); ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; bool ret = false; @@ -35,7 +34,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) if (interrupt_request & CPU_INTERRUPT_HARD && (armv7m_nvic_can_take_pending_exception(env->nvic))) { cs->exception_index = EXCP_IRQ; - cc->tcg_ops->do_interrupt(cs); + cs->cc->tcg_ops->do_interrupt(cs); ret = true; } return ret; @@ -46,6 +45,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V6); set_feature(&cpu->env, ARM_FEATURE_M); @@ -59,51 +59,53 @@ static void cortex_m0_initfn(Object *obj) * by looking at ID register fields. We use the same values as * for the M3. */ - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m3_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); set_feature(&cpu->env, ARM_FEATURE_M_MAIN); cpu->midr = 0x410fc231; cpu->pmsav7_dregion = 8; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m4_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -114,26 +116,27 @@ static void cortex_m4_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000000; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -144,26 +147,27 @@ static void cortex_m7_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00100030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02112000; - cpu->isar.id_isar2 = 0x20232231; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00100030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02112000); + SET_IDREG(isar, ID_ISAR2, 0x20232231); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m33_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_M); @@ -176,21 +180,21 @@ static void cortex_m33_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000210); + SET_IDREG(isar, ID_DFR0, 0x00200000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00101F40; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00101F40); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); cpu->clidr = 0x00000000; cpu->ctr = 0x8000c000; } @@ -198,6 +202,7 @@ static void cortex_m33_initfn(Object *obj) static void cortex_m55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_V8_1M); @@ -213,37 +218,47 @@ static void cortex_m55_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12100211; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x20000030; - cpu->isar.id_pfr1 = 0x00000230; - cpu->isar.id_dfr0 = 0x10200000; + SET_IDREG(isar, ID_PFR0, 0x20000030); + SET_IDREG(isar, ID_PFR1, 0x00000230); + SET_IDREG(isar, ID_DFR0, 0x10200000); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00111040; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000011; - cpu->isar.id_isar0 = 0x01103110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_MMFR0, 0x00111040); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000011); + SET_IDREG(isar, ID_ISAR0, 0x01103110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); cpu->clidr = 0x00000000; /* caches not implemented */ cpu->ctr = 0x8303c003; } static const TCGCPUOps arm_v7m_tcg_ops = { + /* ARM processors have a weak memory model */ + .guest_default_memory_order = 0, + .mttcg_supported = true, + .initialize = arm_translate_init, + .translate_code = arm_translate_code, + .get_tb_cpu_state = arm_get_tb_cpu_state, .synchronize_from_tb = arm_cpu_synchronize_from_tb, .debug_excp_handler = arm_debug_excp_handler, .restore_state_to_opc = arm_restore_state_to_opc, + .mmu_index = arm_cpu_mmu_index, #ifdef CONFIG_USER_ONLY .record_sigsegv = arm_cpu_record_sigsegv, .record_sigbus = arm_cpu_record_sigbus, #else - .tlb_fill = arm_cpu_tlb_fill, + .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, + .cpu_exec_halt = arm_cpu_exec_halt, + .cpu_exec_reset = cpu_reset, .do_interrupt = arm_v7m_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, @@ -253,14 +268,13 @@ static const TCGCPUOps arm_v7m_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void arm_v7m_class_init(ObjectClass *oc, void *data) +static void arm_v7m_class_init(ObjectClass *oc, const void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); acc->info = data; cc->tcg_ops = &arm_v7m_tcg_ops; - cc->gdb_core_xml_file = "arm-m-profile.xml"; } static const ARMCPUInfo arm_v7m_cpus[] = { diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index bdd82d9..942b636 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "hw/core/tcg-cpu-ops.h" +#include "accel/tcg/cpu-ops.h" #include "internals.h" #include "target/arm/idau.h" #if !defined(CONFIG_USER_ONLY) @@ -23,18 +23,19 @@ void aa32_max_features(ARMCPU *cpu) { uint32_t t; + ARMISARegisters *isar = &cpu->isar; /* Add additional features supported by QEMU */ - t = cpu->isar.id_isar5; + t = GET_IDREG(isar, ID_ISAR5); t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */ t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */ t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */ t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */ - cpu->isar.id_isar5 = t; + SET_IDREG(isar, ID_ISAR5, t); - t = cpu->isar.id_isar6; + t = GET_IDREG(isar, ID_ISAR6); t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */ t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */ t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */ @@ -42,7 +43,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */ t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */ t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */ - cpu->isar.id_isar6 = t; + SET_IDREG(isar, ID_ISAR6, t); t = cpu->isar.mvfr1; t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */ @@ -54,42 +55,64 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ cpu->isar.mvfr2 = t; - t = cpu->isar.id_mmfr3; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - cpu->isar.id_mmfr3 = t; + FIELD_DP32_IDREG(isar, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - t = cpu->isar.id_mmfr4; + t = GET_IDREG(isar, ID_MMFR4); t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */ t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */ - cpu->isar.id_mmfr4 = t; + SET_IDREG(isar, ID_MMFR4, t); - t = cpu->isar.id_mmfr5; - t = FIELD_DP32(t, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - cpu->isar.id_mmfr5 = t; + FIELD_DP32_IDREG(isar, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - t = cpu->isar.id_pfr0; - t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CVS2 */ + t = GET_IDREG(isar, ID_PFR0); + t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CSV2 */ t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */ - cpu->isar.id_pfr0 = t; + SET_IDREG(isar, ID_PFR0, t); - t = cpu->isar.id_pfr2; + t = GET_IDREG(isar, ID_PFR2); t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */ t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */ - cpu->isar.id_pfr2 = t; + SET_IDREG(isar, ID_PFR2, t); - t = cpu->isar.id_dfr0; - t = FIELD_DP32(t, ID_DFR0, COPDBG, 9); /* FEAT_Debugv8p4 */ - t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */ + t = GET_IDREG(isar, ID_DFR0); + t = FIELD_DP32(t, ID_DFR0, COPDBG, 10); /* FEAT_Debugv8p8 */ + t = FIELD_DP32(t, ID_DFR0, COPSDBG, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ - cpu->isar.id_dfr0 = t; - - t = cpu->isar.id_dfr1; - t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_dfr1 = t; + SET_IDREG(isar, ID_DFR0, t); + + /* Debug ID registers. */ + + /* Bit[15] is RES1, Bit[13] and Bits[11:0] are RES0. */ + t = 0x00008000; + t = FIELD_DP32(t, DBGDIDR, SE_IMP, 1); + t = FIELD_DP32(t, DBGDIDR, NSUHD_IMP, 1); + t = FIELD_DP32(t, DBGDIDR, VERSION, 10); /* FEAT_Debugv8p8 */ + t = FIELD_DP32(t, DBGDIDR, CTX_CMPS, 1); + t = FIELD_DP32(t, DBGDIDR, BRPS, 5); + t = FIELD_DP32(t, DBGDIDR, WRPS, 3); + cpu->isar.dbgdidr = t; + + t = 0; + t = FIELD_DP32(t, DBGDEVID, PCSAMPLE, 3); + t = FIELD_DP32(t, DBGDEVID, WPADDRMASK, 1); + t = FIELD_DP32(t, DBGDEVID, BPADDRMASK, 15); + t = FIELD_DP32(t, DBGDEVID, VECTORCATCH, 0); + t = FIELD_DP32(t, DBGDEVID, VIRTEXTNS, 1); + t = FIELD_DP32(t, DBGDEVID, DOUBLELOCK, 1); + t = FIELD_DP32(t, DBGDEVID, AUXREGS, 0); + t = FIELD_DP32(t, DBGDEVID, CIDMASK, 0); + cpu->isar.dbgdevid = t; + + /* Bits[31:4] are RES0. */ + t = 0; + t = FIELD_DP32(t, DBGDEVID1, PCSROFFSET, 2); + cpu->isar.dbgdevid1 = t; + + FIELD_DP32_IDREG(isar, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ } /* CPU models. These are not needed for the AArch64 linux-user build. */ @@ -112,7 +135,7 @@ static void arm926_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -154,7 +177,7 @@ static void arm1026_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -178,6 +201,7 @@ static void arm1026_initfn(Object *obj) static void arm1136_r2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an * older core than plain "arm1136". In particular this does not @@ -198,24 +222,25 @@ static void arm1136_r2_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1136_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1136"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -229,24 +254,25 @@ static void arm1136_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1176_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1176"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -261,24 +287,25 @@ static void arm1176_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x33; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x33); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222100; - cpu->isar.id_isar0 = 0x0140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231121; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x01141; + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222100); + SET_IDREG(isar, ID_ISAR0, 0x0140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231121); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x01141); cpu->reset_auxcr = 7; } static void arm11mpcore_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm11mpcore"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -290,18 +317,18 @@ static void arm11mpcore_initfn(Object *obj) cpu->isar.mvfr0 = 0x11111111; cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0); cpu->id_afr0 = 0x2; - cpu->isar.id_mmfr0 = 0x01100103; - cpu->isar.id_mmfr1 = 0x10020302; - cpu->isar.id_mmfr2 = 0x01222000; - cpu->isar.id_isar0 = 0x00100011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11221011; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_MMFR0, 0x01100103); + SET_IDREG(isar, ID_MMFR1, 0x10020302); + SET_IDREG(isar, ID_MMFR2, 0x01222000); + SET_IDREG(isar, ID_ISAR0, 0x00100011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11221011); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 1; } @@ -315,6 +342,7 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = { static void cortex_a8_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a8"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -329,19 +357,19 @@ static void cortex_a8_initfn(Object *obj) cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x400; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x400); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x31100003; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01202000; - cpu->isar.id_mmfr3 = 0x11; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x12112111; - cpu->isar.id_isar2 = 0x21232031; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_MMFR0, 0x31100003); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01202000); + SET_IDREG(isar, ID_MMFR3, 0x11); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x12112111); + SET_IDREG(isar, ID_ISAR2, 0x21232031); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x15141000; cpu->clidr = (1 << 27) | (2 << 24) | 3; cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ @@ -384,6 +412,7 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = { static void cortex_a9_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a9"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -404,19 +433,19 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x000; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x000); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x00100103; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01230000; - cpu->isar.id_mmfr3 = 0x00002111; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_MMFR0, 0x00100103); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01230000); + SET_IDREG(isar, ID_MMFR3, 0x00002111); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x35141000; cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ @@ -451,6 +480,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a7"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -469,23 +499,23 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); /* * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f005; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x1; @@ -500,6 +530,7 @@ static void cortex_a7_initfn(Object *obj) static void cortex_a15_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a15"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -520,19 +551,19 @@ static void cortex_a15_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f021; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x0; @@ -546,9 +577,9 @@ static void cortex_a15_initfn(Object *obj) static const ARMCPRegInfo cortexr5_cp_reginfo[] = { /* Dummy the TCM region regs for the moment */ - { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, + { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_CONST }, - { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, + { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST }, { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP }, @@ -557,27 +588,28 @@ static const ARMCPRegInfo cortexr5_cp_reginfo[] = { static void cortex_r5_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x411fc153; /* r1p3 */ - cpu->isar.id_pfr0 = 0x0131; - cpu->isar.id_pfr1 = 0x001; - cpu->isar.id_dfr0 = 0x010400; + SET_IDREG(isar, ID_PFR0, 0x0131); + SET_IDREG(isar, ID_PFR1, 0x001); + SET_IDREG(isar, ID_DFR0, 0x010400); cpu->id_afr0 = 0x0; - cpu->isar.id_mmfr0 = 0x0210030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0x0211; - cpu->isar.id_isar0 = 0x02101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232141; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x0010142; - cpu->isar.id_isar5 = 0x0; - cpu->isar.id_isar6 = 0x0; + SET_IDREG(isar, ID_MMFR0, 0x0210030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0x0211); + SET_IDREG(isar, ID_ISAR0, 0x02101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232141); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x0010142); + SET_IDREG(isar, ID_ISAR5, 0x0); + SET_IDREG(isar, ID_ISAR6, 0x0); cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; cpu->isar.reset_pmcr_el0 = 0x41151800; @@ -692,6 +724,7 @@ static const ARMCPRegInfo cortex_r52_cp_reginfo[] = { static void cortex_r52_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -709,21 +742,21 @@ static void cortex_r52_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8144c004; cpu->reset_sctlr = 0x30c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x10111001; - cpu->isar.id_dfr0 = 0x03010006; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x10111001); + SET_IDREG(isar, ID_DFR0, 0x03010006); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00211040; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0xf0102211; - cpu->isar.id_mmfr4 = 0x00000010; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232142; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x00010001; + SET_IDREG(isar, ID_MMFR0, 0x00211040); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0xf0102211); + SET_IDREG(isar, ID_MMFR4, 0x00000010); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232142); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x00010001); cpu->isar.dbgdidr = 0x77168000; cpu->clidr = (1 << 27) | (1 << 24) | 0x3; cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ @@ -921,6 +954,7 @@ static void pxa270c5_initfn(Object *obj) static void arm_max_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* aarch64_a57_initfn, advertising none of the aarch64 features */ cpu->dtb_compatible = "arm,cortex-a57"; @@ -940,24 +974,21 @@ static void arm_max_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.dbgdidr = 0x3516d000; - cpu->isar.dbgdevid = 0x00110f13; - cpu->isar.dbgdevid1 = 0x2; + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); cpu->isar.reset_pmcr_el0 = 0x41013000; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ @@ -1001,19 +1032,31 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "ti925t", .initfn = ti925t_initfn }, { .name = "sa1100", .initfn = sa1100_initfn }, { .name = "sa1110", .initfn = sa1110_initfn }, - { .name = "pxa250", .initfn = pxa250_initfn }, - { .name = "pxa255", .initfn = pxa255_initfn }, - { .name = "pxa260", .initfn = pxa260_initfn }, - { .name = "pxa261", .initfn = pxa261_initfn }, - { .name = "pxa262", .initfn = pxa262_initfn }, + { .name = "pxa250", .initfn = pxa250_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa255", .initfn = pxa255_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa260", .initfn = pxa260_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa261", .initfn = pxa261_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa262", .initfn = pxa262_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, /* "pxa270" is an alias for "pxa270-a0" */ - { .name = "pxa270", .initfn = pxa270a0_initfn }, - { .name = "pxa270-a0", .initfn = pxa270a0_initfn }, - { .name = "pxa270-a1", .initfn = pxa270a1_initfn }, - { .name = "pxa270-b0", .initfn = pxa270b0_initfn }, - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, + { .name = "pxa270", .initfn = pxa270a0_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-a0", .initfn = pxa270a0_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-a1", .initfn = pxa270a1_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-b0", .initfn = pxa270b0_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-b1", .initfn = pxa270b1_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn, + .deprecation_note = "iwMMXt CPUs are no longer supported", }, #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 0899251..937f29e2 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -29,35 +29,10 @@ #include "cpu-features.h" #include "cpregs.h" -static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize, - unsigned cachesize) -{ - unsigned lg_linesize = ctz32(linesize); - unsigned sets; - - /* - * The 64-bit CCSIDR_EL1 format is: - * [55:32] number of sets - 1 - * [23:3] associativity - 1 - * [2:0] log2(linesize) - 4 - * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc - */ - assert(assoc != 0); - assert(is_power_of_2(linesize)); - assert(lg_linesize >= 4 && lg_linesize <= 7 + 4); - - /* sets * associativity * linesize == cachesize. */ - sets = cachesize / (assoc * linesize); - assert(cachesize % (assoc * linesize) == 0); - - return ((uint64_t)(sets - 1) << 32) - | ((assoc - 1) << 3) - | (lg_linesize - 4); -} - static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a35"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -74,28 +49,28 @@ static void aarch64_a35_initfn(Object *obj) cpu->midr = 0x411fd040; cpu->revidr = 0; cpu->ctr = 0x84448004; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64pfr1 = 0; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64dfr1 = 0; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64isar1 = 0; - cpu->isar.id_aa64mmfr0 = 0x00101122; - cpu->isar.id_aa64mmfr1 = 0; + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64PFR1, 0); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64ISAR1, 0); + SET_IDREG(isar, ID_AA64MMFR0, 0x00101122); + SET_IDREG(isar, ID_AA64MMFR1, 0); cpu->clidr = 0x0a200023; cpu->dcz_blocksize = 4; @@ -106,9 +81,12 @@ static void aarch64_a35_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x410a3000; /* From B2.29 Cache ID registers */ - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ - cpu->ccsidr[2] = 0x703fe03a; /* 512KB L2 cache */ + /* 32KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); + /* 32KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 2); + /* 512KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 512 * KiB, 7); /* From B3.5 VGIC Type register */ cpu->gic_num_lrs = 4; @@ -180,11 +158,8 @@ static bool cpu_arm_get_rme(Object *obj, Error **errp) static void cpu_arm_set_rme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, RME, value); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, RME, value); } static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name, @@ -221,12 +196,13 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, visit_type_uint32(v, name, &value, errp); } -static Property arm_cpu_lpa2_property = +static const Property arm_cpu_lpa2_property = DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a55"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -243,38 +219,41 @@ static void aarch64_a55_initfn(Object *obj) cpu->clidr = 0x82000023; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x0000000010112222ull; - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x0000000010112222ull); + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x412FD050; /* r2p0 */ cpu->revidr = 0; /* From B2.23 CCSIDR_EL1 */ - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */ - cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */ + /* 32KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); + /* 32KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 2); + /* 512KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 512 * KiB, 7); /* From B2.96 SCTLR_EL3 */ cpu->reset_sctlr = 0x30c50838; @@ -296,6 +275,7 @@ static void aarch64_a55_initfn(Object *obj) static void aarch64_a72_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a72"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -315,32 +295,35 @@ static void aarch64_a72_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41023000; cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ - cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ + /* 32KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); + /* 48KB L1 dcache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 3, 64, 48 * KiB, 2); + /* 1MB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 1 * MiB, 7); cpu->dcz_blocksize = 4; /* 64 bytes */ cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; @@ -352,6 +335,7 @@ static void aarch64_a72_initfn(Object *obj) static void aarch64_a76_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a76"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -368,38 +352,41 @@ static void aarch64_a76_initfn(Object *obj) cpu->clidr = 0x82000023; cpu->ctr = 0x8444C004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull), + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0b1; /* r4p1 */ cpu->revidr = 0; /* From B2.18 CCSIDR_EL1 */ - cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */ - cpu->ccsidr[2] = 0x707fe03a; /* 512KB L2 cache */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7); + /* 64KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2); + /* 512KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 512 * KiB, 7); /* From B2.93 SCTLR_EL3 */ cpu->reset_sctlr = 0x30c50838; @@ -422,6 +409,7 @@ static void aarch64_a76_initfn(Object *obj) static void aarch64_a64fx_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,a64fx"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -436,22 +424,25 @@ static void aarch64_a64fx_initfn(Object *obj) cpu->revidr = 0x00000000; cpu->ctr = 0x86668006; cpu->reset_sctlr = 0x30000180; - cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */ - cpu->isar.id_aa64pfr1 = 0x0000000000000000; - cpu->isar.id_aa64dfr0 = 0x0000000010305408; - cpu->isar.id_aa64dfr1 = 0x0000000000000000; + SET_IDREG(isar, ID_AA64PFR0, 0x0000000101111111); /* No RAS Extensions */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408), + SET_IDREG(isar, ID_AA64DFR1, 0x0000000000000000), cpu->id_aa64afr0 = 0x0000000000000000; cpu->id_aa64afr1 = 0x0000000000000000; - cpu->isar.id_aa64mmfr0 = 0x0000000000001122; - cpu->isar.id_aa64mmfr1 = 0x0000000011212100; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011; - cpu->isar.id_aa64isar0 = 0x0000000010211120; - cpu->isar.id_aa64isar1 = 0x0000000000010001; - cpu->isar.id_aa64zfr0 = 0x0000000000000000; + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000001122); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000011212100); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000000010211120); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000010001); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000000000000000); cpu->clidr = 0x0000000080000023; - cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */ - cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */ - cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 7); + /* 64KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 2); + /* 8MB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 256, 8 * MiB, 7); cpu->dcz_blocksize = 6; /* 256 bytes */ cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; @@ -592,6 +583,7 @@ static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) static void aarch64_neoverse_n1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -608,38 +600,41 @@ static void aarch64_neoverse_n1_initfn(Object *obj) cpu->clidr = 0x82000023; cpu->ctr = 0x8444c004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000110305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0c1; /* r4p1 */ cpu->revidr = 0; /* From B2.23 CCSIDR_EL1 */ - cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */ - cpu->ccsidr[2] = 0x70ffe03a; /* 1MB L2 cache */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7); + /* 64KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2); + /* 1MB L2 dcache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 1 * MiB, 7); /* From B2.98 SCTLR_EL3 */ cpu->reset_sctlr = 0x30c50838; @@ -664,6 +659,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) static void aarch64_neoverse_v1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-v1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -682,32 +678,32 @@ static void aarch64_neoverse_v1_initfn(Object *obj) cpu->dcz_blocksize = 4; cpu->id_aa64afr0 = 0x00000000; cpu->id_aa64afr1 = 0x00000000; - cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; - cpu->isar.id_aa64dfr1 = 0x00000000; - cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0111000001211032ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; - cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + SET_IDREG(isar, ID_AA64DFR0, 0x000001f210305519ull), + SET_IDREG(isar, ID_AA64DFR1, 0x00000000), + SET_IDREG(isar, ID_AA64ISAR0, 0x1011111110212120ull); /* with FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011000001211032ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull), + SET_IDREG(isar, ID_AA64MMFR2, 0x0220011102101011ull), + SET_IDREG(isar, ID_AA64PFR0, 0x1101110120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x15011099; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; - cpu->isar.id_isar6 = 0x01100111; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_DFR0, 0x15011099); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); + SET_IDREG(isar, ID_ISAR6, 0x01100111); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x411FD402; /* r1p2 */ cpu->revidr = 0; @@ -721,9 +717,12 @@ static void aarch64_neoverse_v1_initfn(Object *obj) * L2: 8-way set associative, 64 byte line size, either 512K or 1MB. * L3: No L3 (this matches the CLIDR_EL1 value). */ - cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ - cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ - cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB); /* L2 cache */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0); + /* 64KB L1 icache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; + /* 1MB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 1 * MiB, 0); /* From 3.2.115 SCTLR_EL3 */ cpu->reset_sctlr = 0x30c50838; @@ -740,7 +739,7 @@ static void aarch64_neoverse_v1_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; /* From 3.7.5 ID_AA64ZFR0_EL1 */ - cpu->isar.id_aa64zfr0 = 0x0000100000100000; + SET_IDREG(isar, ID_AA64ZFR0, 0x0000100000100000); cpu->sve_vq.supported = (1 << 0) /* 128bit */ | (1 << 1); /* 256bit */ @@ -887,6 +886,7 @@ static const ARMCPRegInfo cortex_a710_cp_reginfo[] = { static void aarch64_a710_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a710"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -902,38 +902,38 @@ static void aarch64_a710_initfn(Object *obj) /* Ordered by Section B.4: AArch64 registers */ cpu->midr = 0x412FD471; /* r2p1 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x21021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x21021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f010305619ull; - cpu->isar.id_aa64dfr1 = 0; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f010305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); cpu->id_aa64afr0 = 0; cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */ - cpu->isar.id_aa64isar1 = 0x0010111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull; + SET_IDREG(isar, ID_AA64ISAR0, 0x0221111110212120ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0010111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011110101011ull); cpu->clidr = 0x0000001482000023ull; cpu->gm_blocksize = 4; cpu->ctr = 0x000000049444c004ull; @@ -959,9 +959,12 @@ static void aarch64_a710_initfn(Object *obj) * L1: 4-way set associative 64-byte line size, total either 32K or 64K. * L2: 8-way set associative 64 byte line size, total either 256K or 512K. */ - cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ - cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ - cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */ + /* L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0); + /* L1 icache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; + /* L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 512 * KiB, 0); /* FIXME: Not documented -- copied from neoverse-v1 */ cpu->reset_sctlr = 0x30c50838; @@ -985,6 +988,7 @@ static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = { static void aarch64_neoverse_n2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n2"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -1000,38 +1004,38 @@ static void aarch64_neoverse_n2_initfn(Object *obj) /* Ordered by Section B.5: AArch64 ID registers */ cpu->midr = 0x410FD493; /* r0p3 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f210305619ull; - cpu->isar.id_aa64dfr1 = 0; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f210305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); cpu->id_aa64afr0 = 0; cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0011111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull; + SET_IDREG(isar, ID_AA64ISAR0, 0x1221111110212120ull); /* with Crypto and FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011112101011ull); cpu->clidr = 0x0000001482000023ull; cpu->gm_blocksize = 4; cpu->ctr = 0x00000004b444c004ull; @@ -1057,10 +1061,12 @@ static void aarch64_neoverse_n2_initfn(Object *obj) * L1: 4-way set associative 64-byte line size, total 64K. * L2: 8-way set associative 64 byte line size, total either 512K or 1024K. */ - cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ - cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ - cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */ - + /* L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0); + /* L1 icache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; + /* L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 512 * KiB, 0); /* FIXME: Not documented -- copied from neoverse-v1 */ cpu->reset_sctlr = 0x30c50838; @@ -1083,6 +1089,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) void aarch64_max_tcg_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; uint64_t t; uint32_t u; @@ -1133,7 +1140,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, CTR_EL0, DIC, 1); cpu->ctr = t; - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */ @@ -1148,9 +1155,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */ t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */ t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */ - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */ t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED); t = FIELD_DP64(t, ID_AA64ISAR1, API, 1); @@ -1160,18 +1167,20 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */ t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */ t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */ - t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); /* FEAT_BF16 */ + t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 2); /* FEAT_BF16, FEAT_EBF16 */ t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */ - cpu->isar.id_aa64isar1 = t; + t = FIELD_DP64(t, ID_AA64ISAR1, XS, 1); /* FEAT_XS */ + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64isar2; + t = GET_IDREG(isar, ID_AA64ISAR2); + t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ - t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ + t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ - cpu->isar.id_aa64isar2 = t; + SET_IDREG(isar, ID_AA64ISAR2, t); - t = cpu->isar.id_aa64pfr0; + t = GET_IDREG(isar, ID_AA64PFR0); t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */ @@ -1180,9 +1189,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */ - cpu->isar.id_aa64pfr0 = t; + SET_IDREG(isar, ID_AA64PFR0, t); - t = cpu->isar.id_aa64pfr1; + t = GET_IDREG(isar, ID_AA64PFR1); t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */ t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */ /* @@ -1195,9 +1204,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */ - cpu->isar.id_aa64pfr1 = t; + SET_IDREG(isar, ID_AA64PFR1, t); - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */ @@ -1205,9 +1214,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */ t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */ t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(isar, ID_AA64MMFR0, t); - t = cpu->isar.id_aa64mmfr1; + t = GET_IDREG(isar, ID_AA64MMFR1); t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ @@ -1217,10 +1226,12 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ + t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ - cpu->isar.id_aa64mmfr1 = t; + t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ + SET_IDREG(isar, ID_AA64MMFR1, t); - t = cpu->isar.id_aa64mmfr2; + t = GET_IDREG(isar, ID_AA64MMFR2); t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */ t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */ @@ -1234,31 +1245,29 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ - cpu->isar.id_aa64mmfr2 = t; + SET_IDREG(isar, ID_AA64MMFR2, t); - t = cpu->isar.id_aa64mmfr3; - t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */ - cpu->isar.id_aa64mmfr3 = t; + FIELD_DP64_IDREG(isar, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */ - t = cpu->isar.id_aa64zfr0; + t = GET_IDREG(isar, ID_AA64ZFR0); t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */ t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */ - t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); /* FEAT_BF16 */ + t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */ t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */ t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */ t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */ - cpu->isar.id_aa64zfr0 = t; + SET_IDREG(isar, ID_AA64ZFR0, t); - t = cpu->isar.id_aa64dfr0; - t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */ + t = GET_IDREG(isar, ID_AA64DFR0); + t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_aa64dfr0 = t; + SET_IDREG(isar, ID_AA64DFR0, t); - t = cpu->isar.id_aa64smfr0; + t = GET_IDREG(isar, ID_AA64SMFR0); t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */ @@ -1266,7 +1275,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */ - cpu->isar.id_aa64smfr0 = t; + SET_IDREG(isar, ID_AA64SMFR0, t); /* Replicate the same data to the 32-bit id registers. */ aa32_max_features(cpu); @@ -1312,7 +1321,7 @@ static void aarch64_cpu_register_types(void) size_t i; for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { - aarch64_cpu_register(&aarch64_cpus[i]); + arm_cpu_register(&aarch64_cpus[i]); } } diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c index 7cadd61..3428bd1 100644 --- a/target/arm/tcg/crypto_helper.c +++ b/target/arm/tcg/crypto_helper.c @@ -10,14 +10,16 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" -#include "cpu.h" -#include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "crypto/aes-round.h" #include "crypto/sm4.h" #include "vec_internal.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + union CRYPTO_STATE { uint8_t bytes[16]; uint32_t words[4]; diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c index 56a1dc1..01867f8 100644 --- a/target/arm/tcg/gengvec.c +++ b/target/arm/tcg/gengvec.c @@ -88,6 +88,25 @@ GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) #undef GEN_CMP0 +void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + /* Signed shift out of range results in all-sign-bits */ + shift = MIN(shift, (8 << vece) - 1); + tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz); +} + +void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + /* Unsigned shift out of range results in all-zero-bits */ + if (shift >= (8 << vece)) { + tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); + } else { + tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz); + } +} + static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) { tcg_gen_vec_sar8i_i64(a, a, shift); @@ -285,7 +304,7 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) tcg_gen_add_i32(d, d, t); } - void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) { TCGv_i64 t = tcg_temp_new_i64(); @@ -297,10 +316,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); tcg_gen_shri_vec(vece, t, a, sh - 1); - tcg_gen_dupi_vec(vece, ones, 1); tcg_gen_and_vec(vece, t, t, ones); tcg_gen_sari_vec(vece, d, a, sh); tcg_gen_add_vec(vece, d, d, t); @@ -492,10 +510,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) { TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1); tcg_gen_shri_vec(vece, t, a, shift - 1); - tcg_gen_dupi_vec(vece, ones, 1); tcg_gen_and_vec(vece, t, t, ones); tcg_gen_shri_vec(vece, d, a, shift); tcg_gen_add_vec(vece, d, d, t); @@ -685,9 +702,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); + int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh); + TCGv_vec m = tcg_constant_vec_matching(d, vece, mi); - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); tcg_gen_shri_vec(vece, t, a, sh); tcg_gen_and_vec(vece, d, d, m); tcg_gen_or_vec(vece, d, d, t); @@ -773,10 +790,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh)); tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); tcg_gen_and_vec(vece, d, d, m); tcg_gen_or_vec(vece, d, d, t); } @@ -1044,14 +1060,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, TCGv_vec rval = tcg_temp_new_vec_matching(dst); TCGv_vec lsh = tcg_temp_new_vec_matching(dst); TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec msk, max; + TCGv_vec max, zero; tcg_gen_neg_vec(vece, rsh, shift); if (vece == MO_8) { tcg_gen_mov_vec(lsh, shift); } else { - msk = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, msk, 0xff); + TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); tcg_gen_and_vec(vece, lsh, shift, msk); tcg_gen_and_vec(vece, rsh, rsh, msk); } @@ -1064,26 +1079,21 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, tcg_gen_shlv_vec(vece, lval, src, lsh); tcg_gen_shrv_vec(vece, rval, src, rsh); - max = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, max, 8 << vece); - /* - * The choice of LT (signed) and GEU (unsigned) are biased toward + * The choice of GE (signed) and GEU (unsigned) are biased toward * the instructions of the x86_64 host. For MO_8, the whole byte * is significant so we must use an unsigned compare; otherwise we * have already masked to a byte and so a signed compare works. * Other tcg hosts have a full set of comparisons and do not care. */ + zero = tcg_constant_vec_matching(dst, vece, 0); + max = tcg_constant_vec_matching(dst, vece, 8 << vece); if (vece == MO_8) { - tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); - tcg_gen_andc_vec(vece, lval, lval, lsh); - tcg_gen_andc_vec(vece, rval, rval, rsh); + tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval); + tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval); } else { - tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); - tcg_gen_and_vec(vece, lval, lval, lsh); - tcg_gen_and_vec(vece, rval, rval, rsh); + tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval); + tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval); } tcg_gen_or_vec(vece, dst, lval, rval); } @@ -1093,7 +1103,7 @@ void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, { static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0 }; static const GVecGen3 ops[4] = { { .fniv = gen_ushl_vec, @@ -1169,7 +1179,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst, TCGv_vec rval = tcg_temp_new_vec_matching(dst); TCGv_vec lsh = tcg_temp_new_vec_matching(dst); TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec tmp = tcg_temp_new_vec_matching(dst); + TCGv_vec max, zero; /* * Rely on the TCG guarantee that out of range shifts produce @@ -1180,29 +1190,28 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst, if (vece == MO_8) { tcg_gen_mov_vec(lsh, shift); } else { - tcg_gen_dupi_vec(vece, tmp, 0xff); - tcg_gen_and_vec(vece, lsh, shift, tmp); - tcg_gen_and_vec(vece, rsh, rsh, tmp); + TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff); + tcg_gen_and_vec(vece, lsh, shift, msk); + tcg_gen_and_vec(vece, rsh, rsh, msk); } /* Bound rsh so out of bound right shift gets -1. */ - tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); - tcg_gen_umin_vec(vece, rsh, rsh, tmp); - tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); + max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1); + tcg_gen_umin_vec(vece, rsh, rsh, max); tcg_gen_shlv_vec(vece, lval, src, lsh); tcg_gen_sarv_vec(vece, rval, src, rsh); /* Select in-bound left shift. */ - tcg_gen_andc_vec(vece, lval, lval, tmp); + zero = tcg_constant_vec_matching(dst, vece, 0); + tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval); /* Select between left and right shift. */ if (vece == MO_8) { - tcg_gen_dupi_vec(vece, tmp, 0); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval); } else { - tcg_gen_dupi_vec(vece, tmp, 0x80); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); + TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval); } } @@ -1211,7 +1220,7 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, { static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, - INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 + INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0 }; static const GVecGen3 ops[4] = { { .fniv = gen_sshl_vec, @@ -1304,6 +1313,42 @@ void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, opr_sz, max_sz, 0, fns[vece]); } +void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_2_ptr * const fns[] = { + gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h, + gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(c >= 0 && c <= (8 << vece)); + tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]); +} + +void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_2_ptr * const fns[] = { + gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h, + gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(c >= 0 && c <= (8 << vece)); + tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]); +} + +void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_2_ptr * const fns[] = { + gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h, + gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(c >= 0 && c <= (8 << vece)); + tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]); +} + void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) { uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); @@ -2313,3 +2358,372 @@ void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, assert(vece <= MO_32); tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); } + +void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const GVecGen2 g[] = { + { .fni4 = gen_helper_neon_cls_s8, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_cls_s16, + .vece = MO_16 }, + { .fni4 = tcg_gen_clrsb_i32, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_clz32_i32(TCGv_i32 d, TCGv_i32 n) +{ + tcg_gen_clzi_i32(d, n, 32); +} + +void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const GVecGen2 g[] = { + { .fni4 = gen_helper_neon_clz_u8, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_clz_u16, + .vece = MO_16 }, + { .fni4 = gen_clz32_i32, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + assert(vece == MO_8); + tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0, + gen_helper_gvec_cnt_b); +} + +void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + assert(vece == MO_8); + tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0, + gen_helper_gvec_rbit_b); +} + +void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + assert(vece == MO_8); + tcg_gen_gvec_rotli(MO_16, rd_ofs, rn_ofs, 8, opr_sz, max_sz); +} + +static void gen_bswap32_i64(TCGv_i64 d, TCGv_i64 n) +{ + tcg_gen_bswap64_i64(d, n); + tcg_gen_rotli_i64(d, d, 32); +} + +void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const GVecGen2 g = { + .fni8 = gen_bswap32_i64, + .fni4 = tcg_gen_bswap32_i32, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_32 + }; + + switch (vece) { + case MO_16: + tcg_gen_gvec_rotli(MO_32, rd_ofs, rn_ofs, 16, opr_sz, max_sz); + break; + case MO_8: + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g); + break; + default: + g_assert_not_reached(); + } +} + +void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const GVecGen2 g[] = { + { .fni8 = tcg_gen_bswap64_i64, + .vece = MO_64 }, + { .fni8 = tcg_gen_hswap_i64, + .vece = MO_64 }, + }; + + switch (vece) { + case MO_32: + tcg_gen_gvec_rotli(MO_64, rd_ofs, rn_ofs, 32, opr_sz, max_sz); + break; + case MO_8: + case MO_16: + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); + break; + default: + g_assert_not_reached(); + } +} + +static void gen_saddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n) +{ + int half = 4 << vece; + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_shli_vec(vece, t, n, half); + tcg_gen_sari_vec(vece, d, n, half); + tcg_gen_sari_vec(vece, t, t, half); + tcg_gen_add_vec(vece, d, d, t); +} + +static void gen_saddlp_s_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_ext32s_i64(t, n); + tcg_gen_sari_i64(d, n, 32); + tcg_gen_add_i64(d, d, t); +} + +void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2 g[] = { + { .fniv = gen_saddlp_vec, + .fni8 = gen_helper_neon_addlp_s8, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_saddlp_vec, + .fni8 = gen_helper_neon_addlp_s16, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_saddlp_vec, + .fni8 = gen_saddlp_s_i64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_sadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + gen_saddlp_vec(vece, t, n); + tcg_gen_add_vec(vece, d, d, t); +} + +static void gen_sadalp_b_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_helper_neon_addlp_s8(t, n); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_sadalp_h_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_helper_neon_addlp_s16(t, n); + tcg_gen_vec_add32_i64(d, d, t); +} + +static void gen_sadalp_s_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_saddlp_s_i64(t, n); + tcg_gen_add_i64(d, d, t); +} + +void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2 g[] = { + { .fniv = gen_sadalp_vec, + .fni8 = gen_sadalp_b_i64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fniv = gen_sadalp_vec, + .fni8 = gen_sadalp_h_i64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fniv = gen_sadalp_vec, + .fni8 = gen_sadalp_s_i64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uaddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n) +{ + int half = 4 << vece; + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, half)); + + tcg_gen_shri_vec(vece, t, n, half); + tcg_gen_and_vec(vece, d, n, m); + tcg_gen_add_vec(vece, d, d, t); +} + +static void gen_uaddlp_b_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0xff)); + + tcg_gen_shri_i64(t, n, 8); + tcg_gen_and_i64(d, n, m); + tcg_gen_and_i64(t, t, m); + /* No carry between widened unsigned elements. */ + tcg_gen_add_i64(d, d, t); +} + +static void gen_uaddlp_h_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + TCGv_i64 m = tcg_constant_i64(dup_const(MO_32, 0xffff)); + + tcg_gen_shri_i64(t, n, 16); + tcg_gen_and_i64(d, n, m); + tcg_gen_and_i64(t, t, m); + /* No carry between widened unsigned elements. */ + tcg_gen_add_i64(d, d, t); +} + +static void gen_uaddlp_s_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_ext32u_i64(t, n); + tcg_gen_shri_i64(d, n, 32); + tcg_gen_add_i64(d, d, t); +} + +void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2 g[] = { + { .fniv = gen_uaddlp_vec, + .fni8 = gen_uaddlp_b_i64, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uaddlp_vec, + .fni8 = gen_uaddlp_h_i64, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uaddlp_vec, + .fni8 = gen_uaddlp_s_i64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + gen_uaddlp_vec(vece, t, n); + tcg_gen_add_vec(vece, d, d, t); +} + +static void gen_uadalp_b_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_uaddlp_b_i64(t, n); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_uadalp_h_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_uaddlp_h_i64(t, n); + tcg_gen_vec_add32_i64(d, d, t); +} + +static void gen_uadalp_s_i64(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_uaddlp_s_i64(t, n); + tcg_gen_add_i64(d, d, t); +} + +void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2 g[] = { + { .fniv = gen_uadalp_vec, + .fni8 = gen_uadalp_b_i64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uadalp_vec, + .fni8 = gen_uadalp_h_i64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uadalp_vec, + .fni8 = gen_uadalp_s_i64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]); +} + +void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz) +{ + uint64_t s_bit = 1ull << ((8 << vece) - 1); + tcg_gen_gvec_andi(vece, dofs, aofs, s_bit - 1, oprsz, maxsz); +} + +void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz) +{ + uint64_t s_bit = 1ull << ((8 << vece) - 1); + tcg_gen_gvec_xori(vece, dofs, aofs, s_bit, oprsz, maxsz); +} + +void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + assert(vece == MO_32); + tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0, + gen_helper_gvec_urecpe_s); +} + +void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + assert(vece == MO_32); + tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0, + gen_helper_gvec_ursqrte_s); +} diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 0ea8668..4f618ae 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -28,12 +28,20 @@ #include "qemu/bitops.h" #include "internals.h" #include "qemu/crc32c.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "exec/cpu-common.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "fpu/softfloat.h" -#include <zlib.h> /* For crc32 */ +#include <zlib.h> /* for crc32 */ +#ifdef CONFIG_USER_ONLY +#include "user/page-protection.h" +#endif +#include "vec_internal.h" /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -130,40 +138,38 @@ static inline uint32_t float_rel_to_flags(int res) return flags; } -uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) +uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) { return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) +uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) { return float_rel_to_flags(float16_compare(x, y, fp_status)); } -uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) +uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) { return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) +uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) { return float_rel_to_flags(float32_compare(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) +uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) { return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) +uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) { return float_rel_to_flags(float64_compare(x, y, fp_status)); } -float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) +float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float32_squash_input_denormal(a, fpst); b = float32_squash_input_denormal(b, fpst); @@ -176,10 +182,8 @@ float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) return float32_mul(a, b, fpst); } -float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) +float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float64_squash_input_denormal(a, fpst); b = float64_squash_input_denormal(b, fpst); @@ -193,184 +197,71 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) } /* 64bit/double versions of the neon float compare functions */ -uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_eq_quiet(a, b, fpst); } -uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_le(b, a, fpst); } -uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_lt(b, a, fpst); } -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 +/* + * Reciprocal step and sqrt step. Note that unlike the A32/T32 * versions, these do a fully fused multiply-add or * multiply-add-and-halve. + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. */ - -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_two; - } - return float16_muladd(a, b, float16_two, 0, fpst); -} - -float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_two; - } - return float32_muladd(a, b, float32_two, 0, fpst); -} - -float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_two; - } - return float64_muladd(a, b, float64_two, 0, fpst); -} - -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_one_point_five; - } - return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); -} - -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_one_point_five; - } - return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); -} - -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_one_point_five; - } - return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); -} - -/* Pairwise long add: add pairs of adjacent elements into - * double-width elements in the result (eg _s8 is an 8x8->16 op) - */ -uint64_t HELPER(neon_addlp_s8)(uint64_t a) -{ - uint64_t nsignmask = 0x0080008000800080ULL; - uint64_t wsignmask = 0x8000800080008000ULL; - uint64_t elementmask = 0x00ff00ff00ff00ffULL; - uint64_t tmp1, tmp2; - uint64_t res, signres; - - /* Extract odd elements, sign extend each to a 16 bit field */ - tmp1 = a & elementmask; - tmp1 ^= nsignmask; - tmp1 |= wsignmask; - tmp1 = (tmp1 - nsignmask) ^ wsignmask; - /* Ditto for the even elements */ - tmp2 = (a >> 8) & elementmask; - tmp2 ^= nsignmask; - tmp2 |= wsignmask; - tmp2 = (tmp2 - nsignmask) ^ wsignmask; - - /* calculate the result by summing bits 0..14, 16..22, etc, - * and then adjusting the sign bits 15, 23, etc manually. - * This ensures the addition can't overflow the 16 bit field. - */ - signres = (tmp1 ^ tmp2) & wsignmask; - res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); - res ^= signres; - - return res; -} - -uint64_t HELPER(neon_addlp_u8)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x00ff00ff00ff00ffULL; - tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; - return tmp; -} - -uint64_t HELPER(neon_addlp_s16)(uint64_t a) -{ - int32_t reslo, reshi; - - reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); - reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); - - return (uint32_t)reslo | (((uint64_t)reshi) << 32); -} - -uint64_t HELPER(neon_addlp_u16)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x0000ffff0000ffffULL; - tmp += (a >> 16) & 0x0000ffff0000ffffULL; - return tmp; -} +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _two; \ + } \ + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ + } + +DO_RECPS(recpsf_f16, uint32_t, float16, chs) +DO_RECPS(recpsf_f32, float32, float32, chs) +DO_RECPS(recpsf_f64, float64, float64, chs) +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) + +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _one_point_five; \ + } \ + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ + -1, 0, fpst); \ + } \ + +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ -uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) +uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) { - float_status *fpst = fpstp; uint16_t val16, sbit; int16_t exp; @@ -401,9 +292,8 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) } } -float32 HELPER(frecpx_f32)(float32 a, void *fpstp) +float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) { - float_status *fpst = fpstp; uint32_t val32, sbit; int32_t exp; @@ -434,9 +324,8 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp) } } -float64 HELPER(frecpx_f64)(float64 a, void *fpstp) +float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) { - float_status *fpst = fpstp; uint64_t val64, sbit; int64_t exp; @@ -467,28 +356,53 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp) } } -float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) +float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) { - /* Von Neumann rounding is implemented by using round-to-zero - * and then setting the LSB of the result if Inexact was raised. - */ float32 r; - float_status *fpst = &env->vfp.fp_status; - float_status tstat = *fpst; - int exflags; - - set_float_rounding_mode(float_round_to_zero, &tstat); - set_float_exception_flags(0, &tstat); - r = float64_to_float32(a, &tstat); - exflags = get_float_exception_flags(&tstat); - if (exflags & float_flag_inexact) { - r = make_float32(float32_val(r) | 1); - } - exflags |= get_float_exception_flags(fpst); - set_float_exception_flags(exflags, fpst); + int old = get_float_rounding_mode(fpst); + + set_float_rounding_mode(float_round_to_odd, fpst); + r = float64_to_float32(a, fpst); + set_float_rounding_mode(old, fpst); return r; } +/* + * AH=1 min/max have some odd special cases: + * comparing two zeroes (regardless of sign), (NaN, anything), + * or (anything, NaN) should return the second argument (possibly + * squashed to zero). + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. + */ +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + bool save; \ + CTYPE r; \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ + return b; \ + } \ + if (FLOATTYPE ## _is_any_nan(a) || \ + FLOATTYPE ## _is_any_nan(b)) { \ + float_raise(float_flag_invalid, fpst); \ + return b; \ + } \ + save = get_flush_to_zero(fpst); \ + set_flush_to_zero(false, fpst); \ + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ + set_flush_to_zero(save, fpst); \ + return r; \ + } + +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) + /* 64-bit versions of the CRC helpers. Note that although the operation * (and the prototypes of crc32c() and crc32() mean that only the bottom * 32 bits of the accumulator and result are used, we pass and return @@ -524,27 +438,17 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) #define ADVSIMD_HALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ +uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return float16_ ## name(a, b, fpst); \ } -ADVSIMD_HALFOP(add) -ADVSIMD_HALFOP(sub) -ADVSIMD_HALFOP(mul) -ADVSIMD_HALFOP(div) -ADVSIMD_HALFOP(min) -ADVSIMD_HALFOP(max) -ADVSIMD_HALFOP(minnum) -ADVSIMD_HALFOP(maxnum) - #define ADVSIMD_TWOHALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ +uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ + float_status *fpst) \ { \ float16 a1, a2, b1, b2; \ uint32_t r1, r2; \ - float_status *fpst = fpstp; \ a1 = extract32(two_a, 0, 16); \ a2 = extract32(two_a, 16, 16); \ b1 = extract32(two_b, 0, 16); \ @@ -564,10 +468,8 @@ ADVSIMD_TWOHALFOP(minnum) ADVSIMD_TWOHALFOP(maxnum) /* Data processing - scalar floating-point and advanced SIMD */ -static float16 float16_mulx(float16 a, float16 b, void *fpstp) +static float16 float16_mulx(float16 a, float16 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float16_squash_input_denormal(a, fpst); b = float16_squash_input_denormal(b, fpst); @@ -585,16 +487,14 @@ ADVSIMD_TWOHALFOP(mulx) /* fused multiply-accumulate */ uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, - void *fpstp) + float_status *fpst) { - float_status *fpst = fpstp; return float16_muladd(a, b, c, 0, fpst); } uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, - uint32_t two_c, void *fpstp) + uint32_t two_c, float_status *fpst) { - float_status *fpst = fpstp; float16 a1, a2, b1, b2, c1, c2; uint32_t r1, r2; a1 = extract32(two_a, 0, 16); @@ -616,31 +516,27 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 -uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare_quiet(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_equal); } -uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_greater || compare == float_relation_equal); } -uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_greater); } -uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float16 f0 = float16_abs(a); float16 f1 = float16_abs(b); int compare = float16_compare(f0, f1, fpst); @@ -648,9 +544,8 @@ uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) compare == float_relation_equal); } -uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float16 f0 = float16_abs(a); float16 f1 = float16_abs(b); int compare = float16_compare(f0, f1, fpst); @@ -658,12 +553,12 @@ uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) } /* round to integral */ -uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) +uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) { return float16_round_to_int(x, fp_status); } -uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) +uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; float16 ret; @@ -679,38 +574,6 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) return ret; } -/* - * Half-precision floating point conversion functions - * - * There are a multitude of conversion functions with various - * different rounding modes. This is dealt with by the calling code - * setting the mode appropriately before calling the helper. - */ - -uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int16(a, fpst); -} - -uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint16(a, fpst); -} - static int el_from_spsr(uint32_t spsr) { /* Return the exception level that this SPSR is requesting a return to, @@ -771,6 +634,7 @@ static void cpsr_write_from_spsr_elx(CPUARMState *env, void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) { + ARMCPU *cpu = env_archcpu(env); int cur_el = arm_current_el(env); unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); uint32_t spsr = env->banked_spsr[spsr_idx]; @@ -817,12 +681,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { + /* Return to AArch32 when CPU is AArch64-only */ + goto illegal_return; + } + if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { goto illegal_return; } bql_lock(); - arm_call_pre_el_change_hook(env_archcpu(env)); + arm_call_pre_el_change_hook(cpu); bql_unlock(); if (!return_to_aa64) { @@ -850,7 +719,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) int tbii; env->aarch64 = true; - spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); + spsr &= aarch64_pstate_valid_mask(&cpu->isar); pstate_write(env, spsr); if (!arm_singlestep_active(env)) { env->pstate &= ~PSTATE_SS; @@ -889,7 +758,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); bql_lock(); - arm_call_el_change_hook(env_archcpu(env)); + arm_call_el_change_hook(cpu); bql_unlock(); return; @@ -915,19 +784,10 @@ illegal_return: "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); } -/* - * Square Root and Reciprocal square root - */ - -uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) -{ - float_status *s = fpstp; - - return float16_sqrt(a, s); -} - void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) { + uintptr_t ra = GETPC(); + /* * Implement DC ZVA, which zeroes a fixed-length block of memory. * Note that we do not implement the (architecturally mandated) @@ -948,8 +808,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) #ifndef CONFIG_USER_ONLY if (unlikely(!mem)) { - uintptr_t ra = GETPC(); - /* * Trap if accessing an invalid page. DC_ZVA requires that we supply * the original pointer for an invalid page. But watchpoints require @@ -971,7 +829,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) } #endif + set_helper_retaddr(ra); memset(mem, 0, blocklen); + clear_helper_retaddr(); } void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, @@ -1120,7 +980,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); return setsize; } @@ -1163,7 +1025,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); mte_mops_set_tags(env, toaddr, setsize, *mtedesc); return setsize; } @@ -1286,7 +1150,6 @@ static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) @@ -1342,7 +1205,7 @@ static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, /* Do the actual memset: we leave the last partial page to SETE */ stagesetsize = setsize & TARGET_PAGE_MASK; while (stagesetsize > 0) { - step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); toaddr += step; setsize -= step; stagesetsize -= step; @@ -1497,7 +1360,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, } #endif /* Easy case: just memmove the host memory */ + set_helper_retaddr(ra); memmove(wmem, rmem, copysize); + clear_helper_retaddr(); return copysize; } @@ -1572,7 +1437,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, * Easy case: just memmove the host memory. Note that wmem and * rmem here point to the *last* byte to copy. */ + set_helper_retaddr(ra); memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); + clear_helper_retaddr(); return copysize; } @@ -1682,7 +1549,6 @@ static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, @@ -1867,3 +1733,42 @@ void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, { do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); } + +static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + return page_get_flags(addr) & PAGE_BTI; +#else + CPUTLBEntryFull *full; + void *host; + int mmu_idx = cpu_mmu_index(env_cpu(env), true); + int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, + false, &host, &full, ra); + + assert(!(flags & TLB_INVALID_MASK)); + return full->extra.arm.guarded; +#endif +} + +void HELPER(guarded_page_check)(CPUARMState *env) +{ + /* + * We have already verified that bti is enabled, and that the + * instruction at PC is not ok for BTYPE. This is always at + * the beginning of a block, so PC is always up-to-date and + * no unwind is required. + */ + if (is_guarded_page(env, env->pc, 0)) { + raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), + exception_target_el(env)); + } +} + +void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) +{ + /* + * We have already checked for branch via x16 and x17. + * What remains for choosing BTYPE is checking for a guarded page. + */ + env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; +} diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 371388f..8502346 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -23,64 +23,62 @@ DEF_HELPER_2(msr_i_spsel, void, env, i32) DEF_HELPER_2(msr_i_daifset, void, env, i32) DEF_HELPER_2(msr_i_daifclear, void, env, i32) DEF_HELPER_1(msr_set_allint_el1, void, env) -DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr) -DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr) -DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) -DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) -DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) -DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) -DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) -DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) -DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) -DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) -DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) -DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) -DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) -DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) -DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) -DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64) -DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) -DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr) -DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) +DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, fpst) +DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, fpst) +DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, fpst) +DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, fpst) +DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, fpst) +DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, fpst) +DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) +DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) +DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) +DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, fpst) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) -DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_FLAGS_3(advsimd_minnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_addh, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_subh, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_mulh, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_divh, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, ptr) -DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, ptr) -DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, ptr) -DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, ptr) -DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, ptr) -DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr) -DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr) -DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr) -DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr) -DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr) -DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr) -DEF_HELPER_2(advsimd_rinth, f16, f16, ptr) -DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr) -DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr) -DEF_HELPER_2(sqrt_f16, f16, f16, ptr) +DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, fpst) +DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, fpst) +DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, fpst) +DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, fpst) +DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, fpst) +DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, fpst) +DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, fpst) +DEF_HELPER_3(advsimd_add2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_div2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_max2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_min2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, fpst) +DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, fpst) +DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst) +DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst) +DEF_HELPER_2(advsimd_rinth, f16, f16, fpst) + +DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) DEF_HELPER_2(exception_return, void, env, i64) DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) @@ -133,14 +131,17 @@ DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) -DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_1(guarded_page_check, TCG_CALL_NO_WG, void, env) +DEF_HELPER_FLAGS_2(guarded_page_br, TCG_CALL_NO_RWG, void, env, tl) + +DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h index 27eef49..858d691 100644 --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -121,13 +121,13 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index cc4e1d8..0b1b588 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -541,10 +541,18 @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -959,433 +967,545 @@ DEF_HELPER_FLAGS_4(sve_umini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(sve_umini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_5(gvec_recps_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_recps_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_recps_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_rsqrts_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_faddv_d, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxnmv_h, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxnmv_s, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxnmv_d, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminnmv_h, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminnmv_s, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminnmv_d, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxv_h, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxv_s, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fmaxv_d, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminv_h, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG, - i64, ptr, ptr, ptr, i32) + i64, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, + i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, - i64, i64, ptr, ptr, ptr, i32) + i64, i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, - i64, i64, ptr, ptr, ptr, i32) + i64, i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fadda_d, TCG_CALL_NO_RWG, - i64, i64, ptr, ptr, ptr, i32) + i64, i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmge0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmge0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmge0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmgt0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmgt0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmgt0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmlt0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmlt0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmlt0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmle0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmle0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmle0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmeq0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmeq0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmeq0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmne0_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fdiv_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fabd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fscalbn_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmulx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmulx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmulx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadds_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadds_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadds_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubs_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubs_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubs_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmuls_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmuls_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmuls_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubrs_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubrs_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsubrs_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnms_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnms_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnms_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnms_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnms_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnms_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxs_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxs_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxs_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmins_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, i64, ptr, i32) + void, ptr, ptr, ptr, i64, fpst, i32) + +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) + +DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i64, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_hs, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_hs, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_ss, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_hd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_dd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_hh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_hs, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_ss, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_hd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fcvtzu_dd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frint_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frint_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frint_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frintx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frintx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frintx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frecpx_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frecpx_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_frecpx_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fsqrt_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fsqrt_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fsqrt_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_hh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_sh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_dh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_ss, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_scvt_dd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_hh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_sh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_dh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_ss, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_ucvt_dd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmge_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmge_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmge_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmgt_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmgt_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmgt_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmeq_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmeq_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmeq_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmne_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmne_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmne_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmuo_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmuo_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcmuo_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facge_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facge_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facge_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facgt_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facgt_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_facgt_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcadd_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve2_saddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_saddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -2582,39 +2702,39 @@ DEF_HELPER_FLAGS_4(sve2_xar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_xar_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -2682,8 +2802,8 @@ DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -2755,20 +2875,20 @@ DEF_HELPER_FLAGS_5(sve2_cdot_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve2_fcvtlt_sd, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h new file mode 100644 index 0000000..80db7c2 --- /dev/null +++ b/target/arm/tcg/helper.h @@ -0,0 +1,1153 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) + +DEF_HELPER_3(add_setq, i32, env, i32, i32) +DEF_HELPER_3(add_saturate, i32, env, i32, i32) +DEF_HELPER_3(sub_saturate, i32, env, i32, i32) +DEF_HELPER_3(add_usaturate, i32, env, i32, i32) +DEF_HELPER_3(sub_usaturate, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32) +DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32) + +#define PAS_OP(pfx) \ + DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr) + +PAS_OP(s) +PAS_OP(u) +#undef PAS_OP + +#define PAS_OP(pfx) \ + DEF_HELPER_2(pfx ## add8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## add16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \ + DEF_HELPER_2(pfx ## subaddx, i32, i32, i32) +PAS_OP(q) +PAS_OP(sh) +PAS_OP(uq) +PAS_OP(uh) +#undef PAS_OP + +DEF_HELPER_3(ssat, i32, env, i32, i32) +DEF_HELPER_3(usat, i32, env, i32, i32) +DEF_HELPER_3(ssat16, i32, env, i32, i32) +DEF_HELPER_3(usat16, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) + +DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, + i32, i32, i32, i32) +DEF_HELPER_2(exception_internal, noreturn, env, i32) +DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32) +DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32) +DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32) +DEF_HELPER_2(exception_swstep, noreturn, env, i32) +DEF_HELPER_2(exception_pc_alignment, noreturn, env, vaddr) +DEF_HELPER_1(setend, void, env) +DEF_HELPER_2(wfi, void, env, i32) +DEF_HELPER_1(wfe, void, env) +DEF_HELPER_2(wfit, void, env, i64) +DEF_HELPER_1(yield, void, env) +DEF_HELPER_1(pre_hvc, void, env) +DEF_HELPER_2(pre_smc, void, env, i32) +DEF_HELPER_1(vesb, void, env) + +DEF_HELPER_3(cpsr_write, void, env, i32, i32) +DEF_HELPER_2(cpsr_write_eret, void, env, i32) +DEF_HELPER_1(cpsr_read, i32, env) + +DEF_HELPER_3(v7m_msr, void, env, i32, i32) +DEF_HELPER_2(v7m_mrs, i32, env, i32) + +DEF_HELPER_2(v7m_bxns, void, env, i32) +DEF_HELPER_2(v7m_blxns, void, env, i32) + +DEF_HELPER_3(v7m_tt, i32, env, i32, i32) + +DEF_HELPER_1(v7m_preserve_fp_state, void, env) + +DEF_HELPER_2(v7m_vlstm, void, env, i32) +DEF_HELPER_2(v7m_vlldm, void, env, i32) + +DEF_HELPER_2(v8m_stackcheck, void, env, i32) + +DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) + +DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) +DEF_HELPER_2(get_cp_reg, i32, env, cptr) +DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) +DEF_HELPER_2(get_cp_reg64, i64, env, cptr) + +DEF_HELPER_2(get_r13_banked, i32, env, i32) +DEF_HELPER_3(set_r13_banked, void, env, i32, i32) + +DEF_HELPER_3(mrs_banked, i32, env, i32, i32) +DEF_HELPER_4(msr_banked, void, env, i32, i32, i32) + +DEF_HELPER_2(get_user_reg, i32, env, i32) +DEF_HELPER_3(set_user_reg, void, env, i32, i32) + +DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) + +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, vaddr, i32, i32, i32) + +DEF_HELPER_1(vfp_get_fpscr, i32, env) +DEF_HELPER_2(vfp_set_fpscr, void, env, i32) + +DEF_HELPER_3(vfp_addh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_adds, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_addd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_subh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_subs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_subd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_mulh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_muls, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_muld, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_divh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_divs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_divd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_maxh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_maxs, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_maxd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_minh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_mins, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_mind, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_maxnums, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, fpst) +DEF_HELPER_3(vfp_minnumh, f16, f16, f16, fpst) +DEF_HELPER_3(vfp_minnums, f32, f32, f32, fpst) +DEF_HELPER_3(vfp_minnumd, f64, f64, f64, fpst) +DEF_HELPER_2(vfp_sqrth, f16, f16, fpst) +DEF_HELPER_2(vfp_sqrts, f32, f32, fpst) +DEF_HELPER_2(vfp_sqrtd, f64, f64, fpst) +DEF_HELPER_3(vfp_cmph, void, f16, f16, env) +DEF_HELPER_3(vfp_cmps, void, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) +DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env) +DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) +DEF_HELPER_3(vfp_cmped, void, f64, f64, env) + +DEF_HELPER_2(vfp_fcvtds, f64, f32, fpst) +DEF_HELPER_2(vfp_fcvtsd, f32, f64, fpst) +DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, fpst) +DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, fpst) + +DEF_HELPER_2(vfp_uitoh, f16, i32, fpst) +DEF_HELPER_2(vfp_uitos, f32, i32, fpst) +DEF_HELPER_2(vfp_uitod, f64, i32, fpst) +DEF_HELPER_2(vfp_sitoh, f16, i32, fpst) +DEF_HELPER_2(vfp_sitos, f32, i32, fpst) +DEF_HELPER_2(vfp_sitod, f64, i32, fpst) + +DEF_HELPER_2(vfp_touih, i32, f16, fpst) +DEF_HELPER_2(vfp_touis, i32, f32, fpst) +DEF_HELPER_2(vfp_touid, i32, f64, fpst) +DEF_HELPER_2(vfp_touizh, i32, f16, fpst) +DEF_HELPER_2(vfp_touizs, i32, f32, fpst) +DEF_HELPER_2(vfp_touizd, i32, f64, fpst) +DEF_HELPER_2(vfp_tosih, s32, f16, fpst) +DEF_HELPER_2(vfp_tosis, s32, f32, fpst) +DEF_HELPER_2(vfp_tosid, s32, f64, fpst) +DEF_HELPER_2(vfp_tosizh, s32, f16, fpst) +DEF_HELPER_2(vfp_tosizs, s32, f32, fpst) +DEF_HELPER_2(vfp_tosizd, s32, f64, fpst) + +DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toshh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toulh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_toslh, i32, f16, i32, fpst) +DEF_HELPER_3(vfp_touqh, i64, f16, i32, fpst) +DEF_HELPER_3(vfp_tosqh, i64, f16, i32, fpst) +DEF_HELPER_3(vfp_toshs, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosls, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_tosqs, i64, f32, i32, fpst) +DEF_HELPER_3(vfp_touhs, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touls, i32, f32, i32, fpst) +DEF_HELPER_3(vfp_touqs, i64, f32, i32, fpst) +DEF_HELPER_3(vfp_toshd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosld, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tosqd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touhd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_tould, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_touqd, i64, f64, i32, fpst) +DEF_HELPER_3(vfp_shtos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sltos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sqtos, f32, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_ultos, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_uqtos, f32, i64, i32, fpst) +DEF_HELPER_3(vfp_shtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sltod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sqtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_ultod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uqtod, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_shtoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sltoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_ultoh, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, fpst) +DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, fpst) + +DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, fpst) +DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, fpst) +DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, fpst) +DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, fpst) + +DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, fpst) + +DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, fpst, i32) +DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, fpst, i32) + +DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, fpst) +DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, fpst) +DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst) + +DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64) + +DEF_HELPER_3(shl_cc, i32, env, i32, i32) +DEF_HELPER_3(shr_cc, i32, env, i32, i32) +DEF_HELPER_3(sar_cc, i32, env, i32, i32) +DEF_HELPER_3(ror_cc, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, fpst) +DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, fpst) + +DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env) +DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, fpst) + +DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) + +/* neon_helper.c */ +DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) + +DEF_HELPER_2(neon_shl_u16, i32, i32, i32) +DEF_HELPER_2(neon_shl_s16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) +DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) +DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) +DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_2(neon_add_u8, i32, i32, i32) +DEF_HELPER_2(neon_add_u16, i32, i32, i32) +DEF_HELPER_2(neon_sub_u8, i32, i32, i32) +DEF_HELPER_2(neon_sub_u16, i32, i32, i32) +DEF_HELPER_2(neon_mul_u8, i32, i32, i32) +DEF_HELPER_2(neon_mul_u16, i32, i32, i32) + +DEF_HELPER_2(neon_tst_u8, i32, i32, i32) +DEF_HELPER_2(neon_tst_u16, i32, i32, i32) +DEF_HELPER_2(neon_tst_u32, i32, i32, i32) + +DEF_HELPER_1(neon_clz_u8, i32, i32) +DEF_HELPER_1(neon_clz_u16, i32, i32) +DEF_HELPER_1(neon_cls_s8, i32, i32) +DEF_HELPER_1(neon_cls_s16, i32, i32) +DEF_HELPER_1(neon_cls_s32, i32, i32) +DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) +DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32) +DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32) +DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) +DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32) +DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32) + +DEF_HELPER_1(neon_narrow_u8, i64, i64) +DEF_HELPER_1(neon_narrow_u16, i64, i64) +DEF_HELPER_2(neon_unarrow_sat8, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u8, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s8, i64, env, i64) +DEF_HELPER_2(neon_unarrow_sat16, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u16, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s16, i64, env, i64) +DEF_HELPER_2(neon_unarrow_sat32, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_u32, i64, env, i64) +DEF_HELPER_2(neon_narrow_sat_s32, i64, env, i64) +DEF_HELPER_1(neon_narrow_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_high_u16, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64) +DEF_HELPER_1(neon_widen_u8, i64, i32) +DEF_HELPER_1(neon_widen_s8, i64, i32) +DEF_HELPER_1(neon_widen_u16, i64, i32) +DEF_HELPER_1(neon_widen_s16, i64, i32) + +DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) +DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u64, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s64, i64, i32, i32) +DEF_HELPER_2(neon_mull_u8, i64, i32, i32) +DEF_HELPER_2(neon_mull_s8, i64, i32, i32) +DEF_HELPER_2(neon_mull_u16, i64, i32, i32) +DEF_HELPER_2(neon_mull_s16, i64, i32, i32) + +DEF_HELPER_1(neon_negl_u16, i64, i64) +DEF_HELPER_1(neon_negl_u32, i64, i64) + +DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) + +DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_cge_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acge_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst) +DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst) +DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst) + +/* iwmmxt_helper.c */ +DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) +DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) +DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64) +DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) +DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) + +#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ +DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ +DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ +DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ + +DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) +DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) + +DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) + +DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) +DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) +DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) + +DEF_IWMMXT_HELPER_SIZE_ENV(mins) +DEF_IWMMXT_HELPER_SIZE_ENV(minu) +DEF_IWMMXT_HELPER_SIZE_ENV(maxs) +DEF_IWMMXT_HELPER_SIZE_ENV(maxu) + +DEF_IWMMXT_HELPER_SIZE_ENV(subn) +DEF_IWMMXT_HELPER_SIZE_ENV(addn) +DEF_IWMMXT_HELPER_SIZE_ENV(subu) +DEF_IWMMXT_HELPER_SIZE_ENV(addu) +DEF_IWMMXT_HELPER_SIZE_ENV(subs) +DEF_IWMMXT_HELPER_SIZE_ENV(adds) + +DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) + +DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) +DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) + +DEF_HELPER_1(iwmmxt_bcstb, i64, i32) +DEF_HELPER_1(iwmmxt_bcstw, i64, i32) +DEF_HELPER_1(iwmmxt_bcstl, i64, i32) + +DEF_HELPER_1(iwmmxt_addcb, i64, i64) +DEF_HELPER_1(iwmmxt_addcw, i64, i64) +DEF_HELPER_1(iwmmxt_addcl, i64, i64) + +DEF_HELPER_1(iwmmxt_msbb, i32, i64) +DEF_HELPER_1(iwmmxt_msbw, i32, i64) +DEF_HELPER_1(iwmmxt_msbl, i32, i64) + +DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) + +DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) + +DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) +DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) +DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) + +DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) + +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) +DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) + +DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, fpst) +DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, fpst) +DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, fpst) + +DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index f03977b..1ccec63 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -9,9 +9,13 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/helper-proto.h" +#include "exec/translation-block.h" +#include "accel/tcg/cpu-ops.h" #include "cpregs.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + static inline bool fgt_svc(CPUARMState *env, int el) { /* @@ -63,6 +67,15 @@ static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr) #endif } +bool access_secure_reg(CPUARMState *env) +{ + bool ret = (arm_feature(env, ARM_FEATURE_EL3) && + !arm_el_is_aa64(env, 3) && + !(env->cp15.scr_el3 & SCR_NS)); + + return ret; +} + static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, ARMMMUIdx mmu_idx, CPUARMTBFlags flags) @@ -404,6 +417,19 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } + if (env->vfp.fpcr & FPCR_AH) { + DP_TBFLAG_A64(flags, AH, 1); + } + if (env->vfp.fpcr & FPCR_NEP) { + /* + * In streaming-SVE without FA64, NEP behaves as if zero; + * compare pseudocode IsMerging() + */ + if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) { + DP_TBFLAG_A64(flags, NEP, 1); + } + } + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } @@ -476,7 +502,7 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx); } -void assert_hflags_rebuild_correctly(CPUARMState *env) +static void assert_hflags_rebuild_correctly(CPUARMState *env) { #ifdef CONFIG_DEBUG_TCG CPUARMTBFlags c = env->hflags; @@ -484,10 +510,123 @@ void assert_hflags_rebuild_correctly(CPUARMState *env) if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { fprintf(stderr, "TCG hflags mismatch " - "(current:(0x%08x,0x" TARGET_FMT_lx ")" - " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", + "(current:(0x%08x,0x%016" PRIx64 ")" + " rebuilt:(0x%08x,0x%016" PRIx64 ")\n", c.flags, c.flags2, r.flags, r.flags2); abort(); } #endif } + +static bool mve_no_pred(CPUARMState *env) +{ + /* + * Return true if there is definitely no predication of MVE + * instructions by VPR or LTPSIZE. (Returning false even if there + * isn't any predication is OK; generated code will just be + * a little worse.) + * If the CPU does not implement MVE then this TB flag is always 0. + * + * NOTE: if you change this logic, the "recalculate s->mve_no_pred" + * logic in gen_update_fp_context() needs to be updated to match. + * + * We do not include the effect of the ECI bits here -- they are + * tracked in other TB flags. This simplifies the logic for + * "when did we emit code that changes the MVE_NO_PRED TB flag + * and thus need to end the TB?". + */ + if (cpu_isar_feature(aa32_mve, env_archcpu(env))) { + return false; + } + if (env->v7m.vpr) { + return false; + } + if (env->v7m.ltpsize < 4) { + return false; + } + return true; +} + +TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs) +{ + CPUARMState *env = cpu_env(cs); + CPUARMTBFlags flags; + vaddr pc; + + assert_hflags_rebuild_correctly(env); + flags = env->hflags; + + if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) { + pc = env->pc; + if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { + DP_TBFLAG_A64(flags, BTYPE, env->btype); + } + } else { + pc = env->regs[15]; + + if (arm_feature(env, ARM_FEATURE_M)) { + if (arm_feature(env, ARM_FEATURE_M_SECURITY) && + FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) + != env->v7m.secure) { + DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1); + } + + if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) && + (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) || + (env->v7m.secure && + !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) { + /* + * ASPEN is set, but FPCA/SFPA indicate that there is no + * active FP context; we must create a new FP context before + * executing any FP insn. + */ + DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1); + } + + bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; + if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { + DP_TBFLAG_M32(flags, LSPACT, 1); + } + + if (mve_no_pred(env)) { + DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); + } + } else { + /* + * Note that XSCALE_CPAR shares bits with VECSTRIDE. + * Note that VECLEN+VECSTRIDE are RES0 for M-profile. + */ + if (arm_feature(env, ARM_FEATURE_XSCALE)) { + DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); + } else { + DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); + DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { + DP_TBFLAG_A32(flags, VFPEN, 1); + } + } + + DP_TBFLAG_AM32(flags, THUMB, env->thumb); + DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits); + } + + /* + * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB. + */ + if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) { + DP_TBFLAG_ANY(flags, PSTATE__SS, 1); + } + + return (TCGTBCPUState){ + .pc = pc, + .flags = flags.flags, + .cs_base = flags.flags2, + }; +} diff --git a/target/arm/tcg/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c index 610b1b2..ba054b6 100644 --- a/target/arm/tcg/iwmmxt_helper.c +++ b/target/arm/tcg/iwmmxt_helper.c @@ -22,7 +22,9 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/helper-proto.h" + +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" /* iwMMXt macros extracted from GNU gdb. */ diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index 23d7f73..6614719 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -15,10 +15,9 @@ #include "qemu/main-loop.h" #include "qemu/bitops.h" #include "qemu/log.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #ifdef CONFIG_TCG -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "semihosting/common-semi.h" #endif #if !defined(CONFIG_USER_ONLY) @@ -222,7 +221,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, int exc; bool exc_secure; - if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &res, &fi)) { + if (get_phys_addr(env, addr, MMU_DATA_STORE, 0, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -311,7 +310,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, bool exc_secure; uint32_t value; - if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) { + if (get_phys_addr(env, addr, MMU_DATA_LOAD, 0, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -2009,7 +2008,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool secure, "...really SecureFault with SFSR.INVEP\n"); return false; } - if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &res, &fi)) { + if (get_phys_addr(env, addr, MMU_INST_FETCH, 0, mmu_idx, &res, &fi)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); @@ -2045,7 +2044,7 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx, ARMMMUFaultInfo fi = {}; uint32_t value; - if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) { + if (get_phys_addr(env, addr, MMU_DATA_LOAD, 0, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index 508932a..c59f0f0 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -30,14 +30,9 @@ arm_ss.add(files( 'translate-mve.c', 'translate-neon.c', 'translate-vfp.c', - 'crypto_helper.c', - 'hflags.c', - 'iwmmxt_helper.c', 'm_helper.c', 'mve_helper.c', - 'neon_helper.c', 'op_helper.c', - 'tlb_helper.c', 'vec_helper.c', )) @@ -60,3 +55,26 @@ arm_system_ss.add(files( arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) + +arm_common_ss.add(zlib) + +arm_common_ss.add(files( + 'arith_helper.c', + 'crypto_helper.c', +)) + +arm_common_system_ss.add(files( + 'hflags.c', + 'iwmmxt_helper.c', + 'neon_helper.c', + 'tlb_helper.c', + 'tlb-insns.c', + 'vfp_helper.c', +)) +arm_user_ss.add(files( + 'hflags.c', + 'iwmmxt_helper.c', + 'neon_helper.c', + 'tlb_helper.c', + 'vfp_helper.c', +)) diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index 037ac6d..0efc18a 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -21,15 +21,21 @@ #include "qemu/log.h" #include "cpu.h" #include "internals.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" -#include "exec/ram_addr.h" -#include "exec/cpu_ldst.h" +#ifdef CONFIG_USER_ONLY +#include "user/cpu_loop.h" +#include "user/page-protection.h" +#else +#include "system/ram_addr.h" +#endif +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/helper-proto.h" -#include "hw/core/tcg-cpu-ops.h" +#include "exec/tlb-flags.h" +#include "accel/tcg/cpu-ops.h" #include "qapi/error.h" #include "qemu/guest-random.h" - +#include "mte_helper.h" static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) { @@ -50,44 +56,13 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) return tag; } -/** - * allocation_tag_mem_probe: - * @env: the cpu environment - * @ptr_mmu_idx: the addressing regime to use for the virtual address - * @ptr: the virtual address for which to look up tag memory - * @ptr_access: the access to use for the virtual address - * @ptr_size: the number of bytes in the normal memory access - * @tag_access: the access to use for the tag memory - * @probe: true to merely probe, never taking an exception - * @ra: the return address for exception handling - * - * Our tag memory is formatted as a sequence of little-endian nibbles. - * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two - * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] - * for the higher addr. - * - * Here, resolve the physical address from the virtual address, and return - * a pointer to the corresponding tag byte. - * - * If there is no tag storage corresponding to @ptr, return NULL. - * - * If the page is inaccessible for @ptr_access, or has a watchpoint, there are - * three options: - * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not - * accessible, and do not take watchpoint traps. The calling code must - * handle those cases in the right priority compared to MTE traps. - * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees - * that the page is going to be accessible. We will take watchpoint traps. - * (3) probe = false, ra != 0 : non-probe -- we will take both memory access - * traps and watchpoint traps. - * (probe = true, ra != 0 is invalid and will assert.) - */ -static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, - uint64_t ptr, MMUAccessType ptr_access, - int ptr_size, MMUAccessType tag_access, - bool probe, uintptr_t ra) +uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + bool probe, uintptr_t ra) { #ifdef CONFIG_USER_ONLY + const size_t page_data_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1); uint64_t clean_ptr = useronly_clean_ptr(ptr); int flags = page_get_flags(clean_ptr); uint8_t *tags; @@ -96,6 +71,9 @@ static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, assert(!(probe && ra)); if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { + if (probe) { + return NULL; + } cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access, !(flags & PAGE_VALID), ra); } @@ -105,7 +83,7 @@ static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, return NULL; } - tags = page_get_target_data(clean_ptr); + tags = page_get_target_data(clean_ptr, page_data_size); index = extract32(ptr, LOG2_TAG_GRANULE + 1, TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1); @@ -284,7 +262,7 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, return address_with_allocation_tag(ptr + offset, rtag); } -static int load_tag1(uint64_t ptr, uint8_t *mem) +int load_tag1(uint64_t ptr, uint8_t *mem) { int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; return extract32(*mem, ofs, 4); @@ -318,7 +296,7 @@ static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) } /* For use in a non-parallel context, store to the given nibble. */ -static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) +void store_tag1(uint64_t ptr, uint8_t *mem, int tag) { int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; *mem = deposit32(*mem, ofs, 4, tag); diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h new file mode 100644 index 0000000..1f471fb --- /dev/null +++ b/target/arm/tcg/mte_helper.h @@ -0,0 +1,66 @@ +/* + * ARM MemTag operation helpers. + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef TARGET_ARM_MTE_H +#define TARGET_ARM_MTE_H + +#include "exec/mmu-access-type.h" + +/** + * allocation_tag_mem_probe: + * @env: the cpu environment + * @ptr_mmu_idx: the addressing regime to use for the virtual address + * @ptr: the virtual address for which to look up tag memory + * @ptr_access: the access to use for the virtual address + * @ptr_size: the number of bytes in the normal memory access + * @tag_access: the access to use for the tag memory + * @probe: true to merely probe, never taking an exception + * @ra: the return address for exception handling + * + * Our tag memory is formatted as a sequence of little-endian nibbles. + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] + * for the higher addr. + * + * Here, resolve the physical address from the virtual address, and return + * a pointer to the corresponding tag byte. + * + * If there is no tag storage corresponding to @ptr, return NULL. + * + * If the page is inaccessible for @ptr_access, or has a watchpoint, there are + * three options: + * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not + * accessible, and do not take watchpoint traps. The calling code must + * handle those cases in the right priority compared to MTE traps. + * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees + * that the page is going to be accessible. We will take watchpoint traps. + * (3) probe = false, ra != 0 : non-probe -- we will take both memory access + * traps and watchpoint traps. + * (probe = true, ra != 0 is invalid and will assert.) + */ +uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + bool probe, uintptr_t ra); + +/** + * load_tag1 - Load 1 tag (nibble) from byte + * @ptr: The tagged address + * @mem: The tag address (packed, 2 tags in byte) + */ +int load_tag1(uint64_t ptr, uint8_t *mem); + +/** + * store_tag1 - Store 1 tag (nibble) into byte + * @ptr: The tagged address + * @mem: The tag address (packed, 2 tags in byte) + * @tag: The tag to be stored in the nibble + */ +void store_tag1(uint64_t ptr, uint8_t *mem, int tag); + +#endif /* TARGET_ARM_MTE_H */ diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c index 8b99736..506d1c3 100644 --- a/target/arm/tcg/mve_helper.c +++ b/target/arm/tcg/mve_helper.c @@ -22,8 +22,7 @@ #include "internals.h" #include "vec_internal.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/tcg.h" #include "fpu/softfloat.h" #include "crypto/clmul.h" @@ -1115,21 +1114,21 @@ static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m, if (update_flags) { /* Store C, clear NZV. */ - env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK; - env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C; + env->vfp.fpsr &= ~FPSR_NZCV_MASK; + env->vfp.fpsr |= carry_in * FPSR_C; } mve_advance_vpt(env); } void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm) { - bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C; + bool carry_in = env->vfp.fpsr & FPSR_C; do_vadc(env, vd, vn, vm, 0, carry_in, false); } void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm) { - bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C; + bool carry_in = env->vfp.fpsr & FPSR_C; do_vadc(env, vd, vn, vm, -1, carry_in, false); } @@ -2814,8 +2813,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -2888,8 +2886,7 @@ DO_2OP_FP_ALL(vminnma, minnuma) r[e] = 0; \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(tm & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -2926,8 +2923,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -2964,8 +2960,7 @@ DO_VFMA(vfmss, 4, float32, true) if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \ continue; \ } \ - fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ fpst1 = fpst0; \ if (!(mask & 1)) { \ scratch_fpst = *fpst0; \ @@ -3049,8 +3044,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -3084,8 +3078,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -3116,9 +3109,8 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS) unsigned e; \ TYPE *m = vm; \ TYPE ra = (TYPE)ra_in; \ - float_status *fpst = (ESIZE == 2) ? \ - &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + float_status *fpst = \ + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ if (mask & 1) { \ TYPE v = m[H##ESIZE(e)]; \ @@ -3168,8 +3160,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) if ((mask & emask) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & (1 << (e * ESIZE)))) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -3202,8 +3193,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum) if ((mask & emask) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & (1 << (e * ESIZE)))) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -3267,8 +3257,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ @@ -3300,9 +3289,8 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero) unsigned e; \ float_status *fpst; \ float_status scratch_fpst; \ - float_status *base_fpst = (ESIZE == 2) ? \ - &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + float_status *base_fpst = \ + &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \ set_float_rounding_mode(rmode, base_fpst); \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ @@ -3343,11 +3331,11 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) uint32_t *m = vm; uint16_t r; uint16_t mask = mve_element_mask(env); - bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); + bool ieee = !(env->vfp.fpcr & FPCR_AHP); unsigned e; float_status *fpst; float_status scratch_fpst; - float_status *base_fpst = &env->vfp.standard_fp_status; + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; bool old_fz = get_flush_to_zero(base_fpst); set_flush_to_zero(false, base_fpst); for (e = 0; e < 16 / 4; e++, mask >>= 4) { @@ -3373,11 +3361,11 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) uint16_t *m = vm; uint32_t r; uint16_t mask = mve_element_mask(env); - bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); + bool ieee = !(env->vfp.fpcr & FPCR_AHP); unsigned e; float_status *fpst; float_status scratch_fpst; - float_status *base_fpst = &env->vfp.standard_fp_status; + float_status *base_fpst = &env->vfp.fp_status[FPST_STD]; bool old_fiz = get_flush_inputs_to_zero(base_fpst); set_flush_inputs_to_zero(false, base_fpst); for (e = 0; e < 16 / 4; e++, mask >>= 4) { @@ -3427,8 +3415,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \ continue; \ } \ - fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \ - &env->vfp.standard_fp_status; \ + fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \ if (!(mask & 1)) { \ /* We need the result but without updating flags */ \ scratch_fpst = *fpst; \ diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode index 788578c..e883c6a 100644 --- a/target/arm/tcg/neon-dp.decode +++ b/target/arm/tcg/neon-dp.decode @@ -291,17 +291,17 @@ VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b -VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b -VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b -VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index 082bfd8..2cc8241 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -9,11 +9,13 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "vec_internal.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" + #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) @@ -130,17 +132,28 @@ void HELPER(name)(void *vd, void *vn, void *vm, uint32_t desc) \ } #define NEON_GVEC_VOP2_ENV(name, vtype) \ -void HELPER(name)(void *vd, void *vn, void *vm, void *venv, uint32_t desc) \ +void HELPER(name)(void *vd, void *vn, void *vm, CPUARMState *env, uint32_t desc) \ { \ intptr_t i, opr_sz = simd_oprsz(desc); \ vtype *d = vd, *n = vn, *m = vm; \ - CPUARMState *env = venv; \ for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ NEON_FN(d[i], n[i], m[i]); \ } \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } +#define NEON_GVEC_VOP2i_ENV(name, vtype) \ +void HELPER(name)(void *vd, void *vn, CPUARMState *env, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + int imm = simd_data(desc); \ + vtype *d = vd, *n = vn; \ + for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ + NEON_FN(d[i], n[i], imm); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + /* Pairwise operations. */ /* For 32-bit elements each segment only contains a single element, so the elementwise and pairwise operations are the same. */ @@ -271,22 +284,26 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift) (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u8, neon_u8, 4) NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t) +NEON_GVEC_VOP2i_ENV(neon_uqshli_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u16, neon_u16, 2) NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t) +NEON_GVEC_VOP2i_ENV(neon_uqshli_h, uint16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t) +NEON_GVEC_VOP2i_ENV(neon_uqshli_s, uint32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t) +NEON_GVEC_VOP2i_ENV(neon_uqshli_d, uint64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -303,22 +320,26 @@ uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s8, neon_s8, 4) NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t) +NEON_GVEC_VOP2i_ENV(neon_sqshli_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s16, neon_s16, 2) NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t) +NEON_GVEC_VOP2i_ENV(neon_sqshli_h, int16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t) +NEON_GVEC_VOP2i_ENV(neon_sqshli_s, int32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t) +NEON_GVEC_VOP2i_ENV(neon_sqshli_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -334,11 +355,13 @@ uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshlu_s8, neon_s8, 4) +NEON_GVEC_VOP2i_ENV(neon_sqshlui_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshlu_s16, neon_s16, 2) +NEON_GVEC_VOP2i_ENV(neon_sqshlui_h, int16_t) #undef NEON_FN uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -352,6 +375,16 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift) } #define NEON_FN(dest, src1, src2) \ + (dest = do_suqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) +NEON_GVEC_VOP2i_ENV(neon_sqshlui_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_suqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) +NEON_GVEC_VOP2i_ENV(neon_sqshlui_d, int64_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u8, neon_u8, 4) NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t) @@ -492,27 +525,6 @@ uint32_t HELPER(neon_cls_s32)(uint32_t x) return count - 1; } -/* Bit count. */ -uint32_t HELPER(neon_cnt_u8)(uint32_t x) -{ - x = (x & 0x55555555) + ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); - return x; -} - -/* Reverse bits in each 8 bit word */ -uint32_t HELPER(neon_rbit_u8)(uint32_t x) -{ - x = ((x & 0xf0f0f0f0) >> 4) - | ((x & 0x0f0f0f0f) << 4); - x = ((x & 0x88888888) >> 3) - | ((x & 0x44444444) >> 1) - | ((x & 0x22222222) << 1) - | ((x & 0x11111111) << 3); - return x; -} - #define NEON_QDMULH16(dest, src1, src2, round) do { \ uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ @@ -565,13 +577,15 @@ NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) #undef NEON_FN #undef NEON_QDMULH32 -uint32_t HELPER(neon_narrow_u8)(uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_u8)(uint64_t x) { return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) | ((x >> 24) & 0xff000000u); } -uint32_t HELPER(neon_narrow_u16)(uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_u16)(uint64_t x) { return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); } @@ -602,7 +616,8 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); } -uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x) { uint16_t s; uint8_t d; @@ -629,7 +644,8 @@ uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x) return res; } -uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x) { uint16_t s; uint8_t d; @@ -652,7 +668,8 @@ uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x) return res; } -uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x) { int16_t s; uint8_t d; @@ -675,7 +692,8 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x) return res; } -uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x) { uint32_t high; uint32_t low; @@ -695,10 +713,11 @@ uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x) high = 0xffff; SET_QC(); } - return low | (high << 16); + return deposit32(low, 16, 16, high); } -uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x) { uint32_t high; uint32_t low; @@ -712,10 +731,11 @@ uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x) high = 0xffff; SET_QC(); } - return low | (high << 16); + return deposit32(low, 16, 16, high); } -uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x) { int32_t low; int32_t high; @@ -729,10 +749,11 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x) high = (high >> 31) ^ 0x7fff; SET_QC(); } - return (uint16_t)low | (high << 16); + return deposit32(low, 16, 16, high); } -uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x) { if (x & 0x8000000000000000ull) { SET_QC(); @@ -745,7 +766,8 @@ uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x) return x; } -uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x) { if (x > 0xffffffffu) { SET_QC(); @@ -754,13 +776,14 @@ uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x) return x; } -uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x) +/* Only the low 32-bits of output are significant. */ +uint64_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x) { if ((int64_t)x != (int32_t)x) { SET_QC(); - return ((int64_t)x >> 63) ^ 0x7fffffff; + return (uint32_t)((int64_t)x >> 63) ^ 0x7fffffff; } - return x; + return (uint32_t)x; } uint64_t HELPER(neon_widen_u8)(uint32_t x) @@ -803,62 +826,47 @@ uint64_t HELPER(neon_widen_s16)(uint32_t x) return ((uint32_t)(int16_t)x) | (high << 32); } -uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) -{ - uint64_t mask; - mask = (a ^ b) & 0x8000800080008000ull; - a &= ~0x8000800080008000ull; - b &= ~0x8000800080008000ull; - return (a + b) ^ mask; -} - -uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) -{ - uint64_t mask; - mask = (a ^ b) & 0x8000000080000000ull; - a &= ~0x8000000080000000ull; - b &= ~0x8000000080000000ull; - return (a + b) ^ mask; -} - -uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) -{ - uint64_t tmp; - uint64_t tmp2; +/* Pairwise long add: add pairs of adjacent elements into + * double-width elements in the result (eg _s8 is an 8x8->16 op) + */ +uint64_t HELPER(neon_addlp_s8)(uint64_t a) +{ + uint64_t nsignmask = 0x0080008000800080ULL; + uint64_t wsignmask = 0x8000800080008000ULL; + uint64_t elementmask = 0x00ff00ff00ff00ffULL; + uint64_t tmp1, tmp2; + uint64_t res, signres; + + /* Extract odd elements, sign extend each to a 16 bit field */ + tmp1 = a & elementmask; + tmp1 ^= nsignmask; + tmp1 |= wsignmask; + tmp1 = (tmp1 - nsignmask) ^ wsignmask; + /* Ditto for the even elements */ + tmp2 = (a >> 8) & elementmask; + tmp2 ^= nsignmask; + tmp2 |= wsignmask; + tmp2 = (tmp2 - nsignmask) ^ wsignmask; + + /* calculate the result by summing bits 0..14, 16..22, etc, + * and then adjusting the sign bits 15, 23, etc manually. + * This ensures the addition can't overflow the 16 bit field. + */ + signres = (tmp1 ^ tmp2) & wsignmask; + res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); + res ^= signres; - tmp = a & 0x0000ffff0000ffffull; - tmp += (a >> 16) & 0x0000ffff0000ffffull; - tmp2 = b & 0xffff0000ffff0000ull; - tmp2 += (b << 16) & 0xffff0000ffff0000ull; - return ( tmp & 0xffff) - | ((tmp >> 16) & 0xffff0000ull) - | ((tmp2 << 16) & 0xffff00000000ull) - | ( tmp2 & 0xffff000000000000ull); + return res; } -uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) +uint64_t HELPER(neon_addlp_s16)(uint64_t a) { - uint32_t low = a + (a >> 32); - uint32_t high = b + (b >> 32); - return low + ((uint64_t)high << 32); -} + int32_t reslo, reshi; -uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) -{ - uint64_t mask; - mask = (a ^ ~b) & 0x8000800080008000ull; - a |= 0x8000800080008000ull; - b &= ~0x8000800080008000ull; - return (a - b) ^ mask; -} + reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); + reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); -uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) -{ - uint64_t mask; - mask = (a ^ ~b) & 0x8000000080000000ull; - a |= 0x8000000080000000ull; - b &= ~0x8000000080000000ull; - return (a - b) ^ mask; + return (uint32_t)reslo | (((uint64_t)reshi) << 32); } uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b) @@ -1172,51 +1180,44 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires. */ -uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; return -float32_eq_quiet(make_float32(a), make_float32(b), fpst); } -uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; return -float32_le(make_float32(b), make_float32(a), fpst); } -uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; return -float32_lt(make_float32(b), make_float32(a), fpst); } -uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float32 f0 = float32_abs(make_float32(a)); float32 f1 = float32_abs(make_float32(b)); return -float32_le(f1, f0, fpst); } -uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float32 f0 = float32_abs(make_float32(a)); float32 f1 = float32_abs(make_float32(b)); return -float32_lt(f1, f0, fpst); } -uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp) +uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, float_status *fpst) { - float_status *fpst = fpstp; float64 f0 = float64_abs(make_float64(a)); float64 f1 = float64_abs(make_float64(b)); return -float64_le(f1, f0, fpst); } -uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp) +uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, float_status *fpst) { - float_status *fpst = fpstp; float64 f0 = float64_abs(make_float64(a)); float64 f1 = float64_abs(make_float64(b)); return -float64_lt(f1, f0, fpst); diff --git a/target/arm/op_addsub.h b/target/arm/tcg/op_addsub.c.inc index ca4a189..ca4a189 100644 --- a/target/arm/op_addsub.h +++ b/target/arm/tcg/op_addsub.c.inc diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index c083e5c..575e566 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -20,10 +20,11 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "cpregs.h" #define SIGNBIT (uint32_t)0x80000000 @@ -313,15 +314,19 @@ void HELPER(check_bxj_trap)(CPUARMState *env, uint32_t rm) } #ifndef CONFIG_USER_ONLY -/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped. +/* + * Function checks whether WFx (WFI/WFE) instructions are set up to be trapped. * The function returns the target EL (1-3) if the instruction is to be trapped; * otherwise it returns 0 indicating it is not trapped. + * For a trap, *excp is updated with the EXCP_* trap type to use. */ -static inline int check_wfx_trap(CPUARMState *env, bool is_wfe) +static inline int check_wfx_trap(CPUARMState *env, bool is_wfe, uint32_t *excp) { int cur_el = arm_current_el(env); uint64_t mask; + *excp = EXCP_UDEF; + if (arm_feature(env, ARM_FEATURE_M)) { /* M profile cores can never trap WFI/WFE. */ return 0; @@ -331,18 +336,9 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe) * WFx instructions being trapped to EL1. These trap bits don't exist in v7. */ if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) { - int target_el; - mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI; - if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) { - /* Secure EL0 and Secure PL1 is at EL3 */ - target_el = 3; - } else { - target_el = 1; - } - - if (!(env->cp15.sctlr_el[target_el] & mask)) { - return target_el; + if (!(arm_sctlr(env, cur_el) & mask)) { + return exception_target_el(env); } } @@ -358,9 +354,12 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe) } /* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */ - if (cur_el < 3) { + if (arm_feature(env, ARM_FEATURE_V8) && !arm_is_el3_or_mon(env)) { mask = (is_wfe) ? SCR_TWE : SCR_TWI; if (env->cp15.scr_el3 & mask) { + if (!arm_el_is_aa64(env, 3)) { + *excp = EXCP_MON_TRAP; + } return 3; } } @@ -383,7 +382,8 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len) return; #else CPUState *cs = env_cpu(env); - int target_el = check_wfx_trap(env, false); + uint32_t excp; + int target_el = check_wfx_trap(env, false, &excp); if (cpu_has_work(cs)) { /* Don't bother to go into our "low power state" if @@ -399,7 +399,7 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len) env->regs[15] -= insn_len; } - raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, insn_len == 2), + raise_exception(env, excp, syn_wfx(1, 0xe, 0, insn_len == 2), target_el); } @@ -424,10 +424,17 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout) #else ARMCPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); - int target_el = check_wfx_trap(env, false); + uint32_t excp; + int target_el = check_wfx_trap(env, false, &excp); /* The WFIT should time out when CNTVCT_EL0 >= the specified value. */ uint64_t cntval = gt_get_countervalue(env); - uint64_t offset = gt_virt_cnt_offset(env); + /* + * We want the value that we would get if we read CNTVCT_EL0 from + * the current exception level, so the direct_access offset, not + * the indirect_access one. Compare the pseudocode LocalTimeoutEvent(), + * which calls VirtualCounterTimer(). + */ + uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT); uint64_t cntvct = cntval - offset; uint64_t nexttick; @@ -441,8 +448,7 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout) if (target_el) { env->pc -= 4; - raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, false), - target_el); + raise_exception(env, excp, syn_wfx(1, 0xe, 0, false), target_el); } if (uadd64_overflow(timeout, offset, &nexttick)) { @@ -758,12 +764,13 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key); CPAccessResult res = CP_ACCESS_OK; int target_el; + uint32_t excp; assert(ri != NULL); if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14 && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) { - res = CP_ACCESS_TRAP; + res = CP_ACCESS_UNDEFINED; goto fail; } @@ -780,7 +787,7 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, * the other trap takes priority. So we take the "check HSTR_EL2" path * for all of those cases.) */ - if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) == 0) && + if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) < 2) && arm_current_el(env) == 0) { goto fail; } @@ -817,6 +824,7 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, unsigned int idx = FIELD_EX32(ri->fgt, FGT, IDX); unsigned int bitpos = FIELD_EX32(ri->fgt, FGT, BITPOS); bool rev = FIELD_EX32(ri->fgt, FGT, REV); + bool nxs = FIELD_EX32(ri->fgt, FGT, NXS); bool trapbit; if (ri->fgt & FGT_EXEC) { @@ -830,7 +838,15 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, trapword = env->cp15.fgt_write[idx]; } - trapbit = extract64(trapword, bitpos, 1); + if (nxs && (arm_hcrx_el2_eff(env) & HCRX_FGTNXS)) { + /* + * If HCRX_EL2.FGTnXS is 1 then the fine-grained trap for + * TLBI maintenance insns does *not* apply to the nXS variant. + */ + trapbit = 0; + } else { + trapbit = extract64(trapword, bitpos, 1); + } if (trapbit != rev) { res = CP_ACCESS_TRAP_EL2; goto fail; @@ -842,12 +858,25 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, } fail: - switch (res & ~CP_ACCESS_EL_MASK) { - case CP_ACCESS_TRAP: + excp = EXCP_UDEF; + switch (res) { + /* CP_ACCESS_TRAP* traps are always direct to a specified EL */ + case CP_ACCESS_TRAP_EL3: + /* + * If EL3 is AArch32 then there's no syndrome register; the cases + * where we would raise a SystemAccessTrap to AArch64 EL3 all become + * raising a Monitor trap exception. (Because there's no visible + * syndrome it doesn't matter what we pass to raise_exception().) + */ + if (!arm_el_is_aa64(env, 3)) { + excp = EXCP_MON_TRAP; + } break; - case CP_ACCESS_TRAP_UNCATEGORIZED: - /* Only CP_ACCESS_TRAP traps are direct to a specified EL */ - assert((res & CP_ACCESS_EL_MASK) == 0); + case CP_ACCESS_TRAP_EL2: + case CP_ACCESS_TRAP_EL1: + break; + case CP_ACCESS_UNDEFINED: + /* CP_ACCESS_UNDEFINED is never direct to a specified EL */ if (cpu_isar_feature(aa64_ids, cpu) && isread && arm_cpreg_in_idspace(ri)) { /* @@ -867,6 +896,9 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, case 0: target_el = exception_target_el(env); break; + case 1: + assert(arm_current_el(env) < 2); + break; case 2: assert(arm_current_el(env) != 3); assert(arm_is_el2_enabled(env)); @@ -875,11 +907,10 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, assert(arm_feature(env, ARM_FEATURE_EL3)); break; default: - /* No "direct" traps to EL1 */ g_assert_not_reached(); } - raise_exception(env, EXCP_UDEF, syndrome, target_el); + raise_exception(env, excp, syndrome, target_el); } const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key) @@ -912,7 +943,19 @@ void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome) { /* See arm_sctlr(), but we also need the sctlr el. */ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + int target_el; + + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + target_el = 2; + break; + case ARMMMUIdx_E30_0: + target_el = 3; + break; + default: + target_el = 1; + break; + } /* * The bit is not valid unless the target el is aa64, but since the @@ -1179,7 +1222,7 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) } } -void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, +void HELPER(probe_access)(CPUARMState *env, vaddr ptr, uint32_t access_type, uint32_t mmu_idx, uint32_t size) { diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c index c4b1430..c591c30 100644 --- a/target/arm/tcg/pauth_helper.c +++ b/target/arm/tcg/pauth_helper.c @@ -21,8 +21,7 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "qemu/xxhash.h" diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c index 51d2ca3..cabed43 100644 --- a/target/arm/tcg/psci.c +++ b/target/arm/tcg/psci.c @@ -21,7 +21,7 @@ #include "exec/helper-proto.h" #include "kvm-consts.h" #include "qemu/main-loop.h" -#include "sysemu/runstate.h" +#include "system/runstate.h" #include "internals.h" #include "arm-powerctl.h" #include "target/arm/multiprocessing.h" diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index e2e0575..de0c6e5 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -22,8 +22,8 @@ #include "internals.h" #include "tcg/tcg-gvec-desc.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" #include "qemu/int128.h" #include "fpu/softfloat.h" #include "vec_internal.h" @@ -517,6 +517,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, clr_fn(za, 0, reg_off); } + set_helper_retaddr(ra); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -529,6 +531,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -543,6 +547,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(ra); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -554,6 +560,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, reg_off += esize; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -701,6 +709,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(ra); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -711,6 +721,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -725,6 +737,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(ra); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -734,6 +748,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_off += 1 << esz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -888,7 +904,7 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, } void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, void *vst, uint32_t desc) + void *vpm, float_status *fpst_in, uint32_t desc) { intptr_t row, col, oprsz = simd_maxsz(desc); uint32_t neg = simd_data(desc) << 31; @@ -900,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, * update the cumulative fp exception status. It also produces * default nans. */ - fpst = *(float_status *)vst; + fpst = *fpst_in; set_default_nan_mode(true, &fpst); for (row = 0; row < oprsz; ) { @@ -916,7 +932,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, if (pb & 1) { uint32_t *a = vza_row + H1_4(col); uint32_t *m = vzm + H1_4(col); - *a = float32_muladd(n, *m, *a, 0, vst); + *a = float32_muladd(n, *m, *a, 0, &fpst); } col += 4; pb >>= 4; @@ -930,13 +946,13 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, } void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, void *vst, uint32_t desc) + void *vpm, float_status *fpst_in, uint32_t desc) { intptr_t row, col, oprsz = simd_oprsz(desc) / 8; uint64_t neg = (uint64_t)simd_data(desc) << 63; uint64_t *za = vza, *zn = vzn, *zm = vzm; uint8_t *pn = vpn, *pm = vpm; - float_status fpst = *(float_status *)vst; + float_status fpst = *fpst_in; set_default_nan_mode(true, &fpst); @@ -976,12 +992,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) } static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, - float_status *s_std, float_status *s_odd) + float_status *s_f16, float_status *s_std, + float_status *s_odd) { - float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std); - float64 e1c = float16_to_float64(e1 >> 16, true, s_std); - float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std); - float64 e2c = float16_to_float64(e2 >> 16, true, s_std); + /* + * We need three different float_status for different parts of this + * operation: + * - the input conversion of the float16 values must use the + * f16-specific float_status, so that the FPCR.FZ16 control is applied + * - operations on float32 including the final accumulation must use + * the normal float_status, so that FPCR.FZ is applied + * - we have pre-set-up copy of s_std which is set to round-to-odd, + * for the multiply (see below) + */ + float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16); + float64 e1c = float16_to_float64(e1 >> 16, true, s_f16); + float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16); + float64 e2c = float16_to_float64(e2 >> 16, true, s_f16); float64 t64; float32 t32; @@ -1003,20 +1030,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, } void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, void *vst, uint32_t desc) + void *vpm, CPUARMState *env, uint32_t desc) { intptr_t row, col, oprsz = simd_maxsz(desc); uint32_t neg = simd_data(desc) * 0x80008000u; uint16_t *pn = vpn, *pm = vpm; - float_status fpst_odd, fpst_std; + float_status fpst_odd, fpst_std, fpst_f16; /* - * Make a copy of float_status because this operation does not - * update the cumulative fp exception status. It also produces - * default nans. Make a second copy with round-to-odd -- see above. + * Make copies of the fp status fields we use, because this operation + * does not update the cumulative fp exception status. It also + * produces default NaNs. We also need a second copy of fp_status with + * round-to-odd -- see above. */ - fpst_std = *(float_status *)vst; + fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; + fpst_std = env->vfp.fp_status[FPST_A64]; set_default_nan_mode(true, &fpst_std); + set_default_nan_mode(true, &fpst_f16); fpst_odd = fpst_std; set_float_rounding_mode(float_round_to_odd, &fpst_odd); @@ -1036,7 +1066,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, uint32_t m = *(uint32_t *)(vzm + H1_4(col)); m = f16mop_adj_pair(m, pcol, 0); - *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd); + *a = f16_dotadd(*a, n, m, + &fpst_f16, &fpst_std, &fpst_odd); } col += 4; pcol >>= 4; @@ -1048,38 +1079,68 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, } } -void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, uint32_t desc) +void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, + void *vpn, void *vpm, CPUARMState *env, uint32_t desc) { intptr_t row, col, oprsz = simd_maxsz(desc); uint32_t neg = simd_data(desc) * 0x80008000u; uint16_t *pn = vpn, *pm = vpm; + float_status fpst, fpst_odd; - for (row = 0; row < oprsz; ) { - uint16_t prow = pn[H2(row >> 4)]; - do { - void *vza_row = vza + tile_vslice_offset(row); - uint32_t n = *(uint32_t *)(vzn + H1_4(row)); + if (is_ebf(env, &fpst, &fpst_odd)) { + for (row = 0; row < oprsz; ) { + uint16_t prow = pn[H2(row >> 4)]; + do { + void *vza_row = vza + tile_vslice_offset(row); + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + n = f16mop_adj_pair(n, prow, neg); - for (col = 0; col < oprsz; ) { - uint16_t pcol = pm[H2(col >> 4)]; - do { - if (prow & pcol & 0b0101) { - uint32_t *a = vza_row + H1_4(col); - uint32_t m = *(uint32_t *)(vzm + H1_4(col)); + for (col = 0; col < oprsz; ) { + uint16_t pcol = pm[H2(col >> 4)]; + do { + if (prow & pcol & 0b0101) { + uint32_t *a = vza_row + H1_4(col); + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); - m = f16mop_adj_pair(m, pcol, 0); - *a = bfdotadd(*a, n, m); - } - col += 4; - pcol >>= 4; - } while (col & 15); - } - row += 4; - prow >>= 4; - } while (row & 15); + m = f16mop_adj_pair(m, pcol, 0); + *a = bfdotadd_ebf(*a, n, m, &fpst, &fpst_odd); + } + col += 4; + pcol >>= 4; + } while (col & 15); + } + row += 4; + prow >>= 4; + } while (row & 15); + } + } else { + for (row = 0; row < oprsz; ) { + uint16_t prow = pn[H2(row >> 4)]; + do { + void *vza_row = vza + tile_vslice_offset(row); + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); + + n = f16mop_adj_pair(n, prow, neg); + + for (col = 0; col < oprsz; ) { + uint16_t pcol = pm[H2(col >> 4)]; + do { + if (prow & pcol & 0b0101) { + uint32_t *a = vza_row + H1_4(col); + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); + + m = f16mop_adj_pair(m, pcol, 0); + *a = bfdotadd(*a, n, m, &fpst); + } + col += 4; + pcol >>= 4; + } while (col & 15); + } + row += 4; + prow >>= 4; + } while (row & 15); + } } } @@ -1146,10 +1207,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ uint64_t sum = 0; \ /* Apply P to N as a mask, making the inactive elements 0. */ \ n &= expand_pred_h(p); \ - sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ - sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ - sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ - sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ + sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ + sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ + sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ + sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ return neg ? a - sum : a + sum; \ } diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index dd49e67..a2c363a 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -20,15 +20,22 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internals.h" -#include "exec/exec-all.h" #include "exec/page-protection.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "tcg/tcg.h" #include "vec_internal.h" #include "sve_ldst_internal.h" -#include "hw/core/tcg-cpu-ops.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/cpu-ops.h" +#include "accel/tcg/probe.h" +#ifdef CONFIG_USER_ONLY +#include "user/page-protection.h" +#endif /* Return a value for NZCV as per the ARM PredTest pseudofunction. @@ -730,7 +737,7 @@ DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN) #define DO_ZPZZ_PAIR_FP(NAME, TYPE, H, OP) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ intptr_t i, opr_sz = simd_oprsz(desc); \ for (i = 0; i < opr_sz; ) { \ @@ -876,12 +883,28 @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) +#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N)) +#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N)) +#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N)) + +DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H) +DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S) +DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D) + #define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) +#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N)) +#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N)) +#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N)) + +DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H) +DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S) +DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D) + #define DO_NOT(N) (~N) DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) @@ -2536,6 +2559,7 @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 2; + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); uint16_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz; i += 1) { uint16_t nn = n[i]; @@ -2543,13 +2567,17 @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) if (mm & 1) { nn = float16_one; } - d[i] = nn ^ (mm & 2) << 14; + if (mm & 2) { + nn = float16_maybe_ah_chs(nn, fpcr_ah); + } + d[i] = nn; } } void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 4; + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); uint32_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz; i += 1) { uint32_t nn = n[i]; @@ -2557,13 +2585,17 @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) if (mm & 1) { nn = float32_one; } - d[i] = nn ^ (mm & 2) << 30; + if (mm & 2) { + nn = float32_maybe_ah_chs(nn, fpcr_ah); + } + d[i] = nn; } } void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1); uint64_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz; i += 1) { uint64_t nn = n[i]; @@ -2571,7 +2603,10 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) if (mm & 1) { nn = float64_one; } - d[i] = nn ^ (mm & 2) << 62; + if (mm & 2) { + nn = float64_maybe_ah_chs(nn, fpcr_ah); + } + d[i] = nn; } } @@ -4187,10 +4222,10 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ uintptr_t half = n / 2; \ TYPE lo = NAME##_reduce(data, status, half); \ TYPE hi = NAME##_reduce(data + half, status, half); \ - return TYPE##_##FUNC(lo, hi, status); \ + return FUNC(lo, hi, status); \ } \ } \ -uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ +uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ { \ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ @@ -4205,34 +4240,45 @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ for (; i < maxsz; i += sizeof(TYPE)) { \ *(TYPE *)((void *)data + i) = IDENT; \ } \ - return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ + return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ } -DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) -DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) -DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero) +DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) +DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) +DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) /* Identity is floatN_default_nan, without the function call. */ -DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) -DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) -DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) +DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) +DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) + +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) +DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) +DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) +DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) -DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) -DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) -DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity) +DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) +DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) +DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) -DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) -DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) -DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity)) +DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) +DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) +DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) + +DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, + float16_chs(float16_infinity)) +DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, + float32_chs(float32_infinity)) +DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, + float64_chs(float64_infinity)) #undef DO_REDUCE uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) + float_status *status, uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float16 result = nn; @@ -4252,7 +4298,7 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, } uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) + float_status *status, uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float32 result = nn; @@ -4272,7 +4318,7 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, } uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) + float_status *status, uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8; uint64_t *m = vm; @@ -4292,7 +4338,7 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, */ #define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc); \ uint64_t *g = vg; \ @@ -4333,6 +4379,14 @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) +DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) +DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) +DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) + +DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) +DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) +DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) + DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) @@ -4356,9 +4410,31 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) return float64_abs(float64_sub(a, b, s)); } +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ +static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat) +{ + float16 r = float16_sub(op1, op2, stat); + return float16_is_any_nan(r) ? r : float16_abs(r); +} + +static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat) +{ + float32 r = float32_sub(op1, op2, stat); + return float32_is_any_nan(r) ? r : float32_abs(r); +} + +static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat) +{ + float64 r = float64_sub(op1, op2, stat); + return float64_is_any_nan(r) ? r : float64_abs(r); +} + DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d) +DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h) +DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s) +DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d) static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) { @@ -4381,7 +4457,7 @@ DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd) */ #define DO_ZPZS_FP(NAME, TYPE, H, OP) \ void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ - void *status, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc); \ uint64_t *g = vg; \ @@ -4445,11 +4521,20 @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min) +DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh) +DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs) +DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd) + +DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh) +DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins) +DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind) + /* Fully general two-operand expander, controlled by a predicate, * With the extra float_status parameter. */ #define DO_ZPZ_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc); \ uint64_t *g = vg; \ @@ -4654,7 +4739,7 @@ static int16_t do_float16_logb_as_int(float16 a, float_status *s) return -15 - clz32(frac); } /* flush to zero */ - float_raise(float_flag_input_denormal, s); + float_raise(float_flag_input_denormal_flushed, s); } } else if (unlikely(exp == 0x1f)) { if (frac == 0) { @@ -4682,7 +4767,7 @@ static int32_t do_float32_logb_as_int(float32 a, float_status *s) return -127 - clz32(frac); } /* flush to zero */ - float_raise(float_flag_input_denormal, s); + float_raise(float_flag_input_denormal_flushed, s); } } else if (unlikely(exp == 0xff)) { if (frac == 0) { @@ -4710,7 +4795,7 @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s) return -1023 - clz64(frac); } /* flush to zero */ - float_raise(float_flag_input_denormal, s); + float_raise(float_flag_input_denormal_flushed, s); } } else if (unlikely(exp == 0x7ff)) { if (frac == 0) { @@ -4733,7 +4818,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint16_t neg1, uint16_t neg3) + uint16_t neg1, uint16_t neg3, int flags) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4748,7 +4833,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; e2 = *(uint16_t *)(vm + H1_2(i)); e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; - r = float16_muladd(e1, e2, e3, 0, status); + r = float16_muladd(e1, e2, e3, flags, status); *(uint16_t *)(vd + H1_2(i)) = r; } } while (i & 63); @@ -4756,32 +4841,53 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, } void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0); } void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); } void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); } void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); } static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint32_t neg1, uint32_t neg3) + uint32_t neg1, uint32_t neg3, int flags) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4796,7 +4902,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1; e2 = *(uint32_t *)(vm + H1_4(i)); e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3; - r = float32_muladd(e1, e2, e3, 0, status); + r = float32_muladd(e1, e2, e3, flags, status); *(uint32_t *)(vd + H1_4(i)) = r; } } while (i & 63); @@ -4804,32 +4910,53 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, } void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0); } void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0); } void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0); } void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); } static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, - uint64_t neg1, uint64_t neg3) + uint64_t neg1, uint64_t neg3, int flags) { intptr_t i = simd_oprsz(desc); uint64_t *g = vg; @@ -4844,7 +4971,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, e1 = *(uint64_t *)(vn + i) ^ neg1; e2 = *(uint64_t *)(vm + i); e3 = *(uint64_t *)(va + i) ^ neg3; - r = float64_muladd(e1, e2, e3, 0, status); + r = float64_muladd(e1, e2, e3, flags, status); *(uint64_t *)(vd + i) = r; } } while (i & 63); @@ -4852,27 +4979,48 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, } void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0); } void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0); } void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0); } void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) { - do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); } /* Two operand floating-point comparison controlled by a predicate. @@ -4882,7 +5030,7 @@ void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, */ #define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ uint64_t *d = vd, *g = vg; \ @@ -4944,7 +5092,7 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) */ #define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ void HELPER(NAME)(void *vd, void *vn, void *vg, \ - void *status, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ uint64_t *d = vd, *g = vg; \ @@ -4982,27 +5130,37 @@ DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE) /* FP Trig Multiply-Add. */ -void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, + float_status *s, uint32_t desc) { static const float16 coeff[16] = { 0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); - intptr_t x = simd_data(desc); + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); float16 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { float16 mm = m[i]; intptr_t xx = x; + int flags = 0; + if (float16_is_neg(mm)) { - mm = float16_abs(mm); + if (fpcr_ah) { + flags = float_muladd_negate_product; + } else { + mm = float16_abs(mm); + } xx += 8; } - d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs); + d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s); } } -void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, + float_status *s, uint32_t desc) { static const float32 coeff[16] = { 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, @@ -5011,20 +5169,29 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); - intptr_t x = simd_data(desc); + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); float32 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { float32 mm = m[i]; intptr_t xx = x; + int flags = 0; + if (float32_is_neg(mm)) { - mm = float32_abs(mm); + if (fpcr_ah) { + flags = float_muladd_negate_product; + } else { + mm = float32_abs(mm); + } xx += 8; } - d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs); + d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s); } } -void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, + float_status *s, uint32_t desc) { static const float64 coeff[16] = { 0x3ff0000000000000ull, 0xbfc5555555555543ull, @@ -5037,16 +5204,24 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); - intptr_t x = simd_data(desc); + intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1); float64 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { float64 mm = m[i]; intptr_t xx = x; + int flags = 0; + if (float64_is_neg(mm)) { - mm = float64_abs(mm); + if (fpcr_ah) { + flags = float_muladd_negate_product; + } else { + mm = float64_abs(mm); + } xx += 8; } - d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs); + d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s); } } @@ -5055,12 +5230,12 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) */ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) + float_status *s, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; - float16 neg_imag = float16_set_sign(0, simd_data(desc)); - float16 neg_real = float16_chs(neg_imag); + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5072,27 +5247,33 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, i -= 2 * sizeof(float16); e0 = *(float16 *)(vn + H1_2(i)); - e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real; + e1 = *(float16 *)(vm + H1_2(j)); e2 = *(float16 *)(vn + H1_2(j)); - e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag; + e3 = *(float16 *)(vm + H1_2(i)); + + if (rot) { + e3 = float16_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float16_maybe_ah_chs(e1, fpcr_ah); + } if (likely((pg >> (i & 63)) & 1)) { - *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, vs); + *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s); } if (likely((pg >> (j & 63)) & 1)) { - *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, vs); + *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, s); } } while (i & 63); } while (i != 0); } void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) + float_status *s, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; - float32 neg_imag = float32_set_sign(0, simd_data(desc)); - float32 neg_real = float32_chs(neg_imag); + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5104,27 +5285,33 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, i -= 2 * sizeof(float32); e0 = *(float32 *)(vn + H1_2(i)); - e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real; + e1 = *(float32 *)(vm + H1_2(j)); e2 = *(float32 *)(vn + H1_2(j)); - e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag; + e3 = *(float32 *)(vm + H1_2(i)); + + if (rot) { + e3 = float32_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float32_maybe_ah_chs(e1, fpcr_ah); + } if (likely((pg >> (i & 63)) & 1)) { - *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, vs); + *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s); } if (likely((pg >> (j & 63)) & 1)) { - *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, vs); + *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, s); } } while (i & 63); } while (i != 0); } void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) + float_status *s, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; - float64 neg_imag = float64_set_sign(0, simd_data(desc)); - float64 neg_real = float64_chs(neg_imag); + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5136,15 +5323,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, i -= 2 * sizeof(float64); e0 = *(float64 *)(vn + H1_2(i)); - e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real; + e1 = *(float64 *)(vm + H1_2(j)); e2 = *(float64 *)(vn + H1_2(j)); - e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag; + e3 = *(float64 *)(vm + H1_2(i)); + + if (rot) { + e3 = float64_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float64_maybe_ah_chs(e1, fpcr_ah); + } if (likely((pg >> (i & 63)) & 1)) { - *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, vs); + *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s); } if (likely((pg >> (j & 63)) & 1)) { - *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, vs); + *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, s); } } while (i & 63); } while (i != 0); @@ -5155,16 +5348,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, */ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float16 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float16 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float16_set_sign(0, (rot & 2) != 0); - neg_real = float16_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 15; + negx_imag = (negf_imag & ~fpcr_ah) << 15; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5181,18 +5379,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, mi = *(float16 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float16 *)(va + H1_2(i)); - d = float16_muladd(e2, e1, d, 0, status); + d = float16_muladd(e2, e1, d, negf_real, status); *(float16 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float16 *)(va + H1_2(j)); - d = float16_muladd(e4, e3, d, 0, status); + d = float16_muladd(e4, e3, d, negf_imag, status); *(float16 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -5200,16 +5398,21 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, } void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float32 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float32 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float32_set_sign(0, (rot & 2) != 0); - neg_real = float32_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 31; + negx_imag = (negf_imag & ~fpcr_ah) << 31; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5226,18 +5429,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, mi = *(float32 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float32 *)(va + H1_2(i)); - d = float32_muladd(e2, e1, d, 0, status); + d = float32_muladd(e2, e1, d, negf_real, status); *(float32 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float32 *)(va + H1_2(j)); - d = float32_muladd(e4, e3, d, 0, status); + d = float32_muladd(e4, e3, d, negf_imag, status); *(float32 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -5245,16 +5448,21 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, } void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, - void *vg, void *status, uint32_t desc) + void *vg, float_status *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rot = simd_data(desc); - bool flip = rot & 1; - float64 neg_imag, neg_real; + bool flip = extract32(desc, SIMD_DATA_SHIFT, 1); + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float64 negx_imag, negx_real; uint64_t *g = vg; - neg_imag = float64_set_sign(0, (rot & 2) != 0); - neg_real = float64_set_sign(0, rot == 1 || rot == 2); + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); do { uint64_t pg = g[(i - 1) >> 6]; @@ -5271,18 +5479,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, mi = *(float64 *)(vm + H1_2(j)); e2 = (flip ? ni : nr); - e1 = (flip ? mi : mr) ^ neg_real; + e1 = (flip ? mi : mr) ^ negx_real; e4 = e2; - e3 = (flip ? mr : mi) ^ neg_imag; + e3 = (flip ? mr : mi) ^ negx_imag; if (likely((pg >> (i & 63)) & 1)) { d = *(float64 *)(va + H1_2(i)); - d = float64_muladd(e2, e1, d, 0, status); + d = float64_muladd(e2, e1, d, negf_real, status); *(float64 *)(vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float64 *)(va + H1_2(j)); - d = float64_muladd(e4, e3, d, 0, status); + d = float64_muladd(e4, e3, d, negf_imag, status); *(float64 *)(vd + H1_2(j)) = d; } } while (i & 63); @@ -5738,6 +5946,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -5752,6 +5962,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -5771,6 +5983,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -5784,6 +5998,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, mem_off += N << msz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -5934,15 +6150,11 @@ DO_LDN_2(4, dd, MO_64) /* * Load contiguous data, first-fault and no-fault. * - * For user-only, one could argue that we should hold the mmap_lock during - * the operation so that there is no race between page_check_range and the - * load operation. However, unmapping pages out from under a running thread - * is extraordinarily unlikely. This theoretical race condition also affects - * linux-user/ in its get_user/put_user macros. - * - * TODO: Construct some helpers, written in assembly, that interact with - * host_signal_handler to produce memory ops which can properly report errors - * without racing. + * For user-only, we control the race between page_check_range and + * another thread's munmap by using set/clear_helper_retaddr. Any + * SEGV that occurs between those markers is assumed to be because + * the guest page vanished. Keep that block as small as possible + * so that unrelated QEMU bugs are not blamed on the guest. */ /* Fault on byte I. All bits in FFR from I are cleared. The vector @@ -6093,6 +6305,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3)); do { @@ -6101,9 +6315,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, (cpu_watchpoint_address_matches (env_cpu(env), addr + mem_off, 1 << msz) & BP_MEM_READ)) { + clear_helper_retaddr(); goto do_fault; } if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { + clear_helper_retaddr(); goto do_fault; } host_fn(vd, reg_off, host + mem_off); @@ -6113,6 +6329,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } while (reg_off <= reg_last); + clear_helper_retaddr(); + /* * MemSingleNF is allowed to fail for any reason. We have special * code above to handle the first element crossing a page boundary. @@ -6307,9 +6525,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { -#ifdef CONFIG_USER_ONLY - g_assert_not_reached(); -#else /* * At least one page includes MMIO. * Any bus operation can fail with cpu_transaction_failed, @@ -6340,7 +6555,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, } while (reg_off & 63); } while (reg_off <= reg_last); return; -#endif } mem_off = info.mem_off_first[0]; @@ -6348,6 +6562,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -6362,6 +6578,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -6381,6 +6599,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -6394,6 +6614,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, mem_off += N << msz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -6560,7 +6782,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, if (unlikely(info.flags & TLB_MMIO)) { tlb_fn(env, &scratch, reg_off, addr, retaddr); } else { + set_helper_retaddr(retaddr); host_fn(&scratch, reg_off, info.host); + clear_helper_retaddr(); } } else { /* Element crosses the page boundary. */ @@ -6782,7 +7006,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, goto fault; } + set_helper_retaddr(retaddr); host_fn(vd, reg_off, info.host); + clear_helper_retaddr(); } reg_off += esize; } while (reg_off & 63); @@ -6986,7 +7212,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, do { void *h = host[i]; if (likely(h != NULL)) { + set_helper_retaddr(retaddr); host_fn(vd, reg_off, h); + clear_helper_retaddr(); } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) { target_ulong addr = base + (off_fn(vm, reg_off) << scale); tlb_fn(env, vd, reg_off, addr, retaddr); @@ -7369,7 +7597,7 @@ void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc) } void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va, - void *status, uint32_t desc) + float_status *status, uint32_t desc) { intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4); @@ -7407,7 +7635,7 @@ void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va, } void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va, - void *status, uint32_t desc) + float_status *status, uint32_t desc) { intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4); @@ -7443,7 +7671,8 @@ void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va, } #define DO_FCVTNT(NAME, TYPEW, TYPEN, HW, HN, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc); \ uint64_t *g = vg; \ @@ -7464,7 +7693,8 @@ DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16) DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32) #define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, \ + float_status *status, uint32_t desc) \ { \ intptr_t i = simd_oprsz(desc); \ uint64_t *g = vg; \ diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h index 4f159ec..f2243da 100644 --- a/target/arm/tcg/sve_ldst_internal.h +++ b/target/arm/tcg/sve_ldst_internal.h @@ -20,7 +20,7 @@ #ifndef TARGET_ARM_SVE_LDST_INTERNAL_H #define TARGET_ARM_SVE_LDST_INTERNAL_H -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" /* * Load one element into @vd + @reg_off from @host. diff --git a/target/arm/tcg/tlb-insns.c b/target/arm/tcg/tlb-insns.c new file mode 100644 index 0000000..95c26c6 --- /dev/null +++ b/target/arm/tcg/tlb-insns.c @@ -0,0 +1,1306 @@ +/* + * Helpers for TLBI insns + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "exec/cputlb.h" +#include "exec/target_page.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-features.h" +#include "cpregs.h" + +/* Check for traps from EL1 due to HCR_EL2.TTLB. */ +static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TTLB)) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} + +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */ +static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} + +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */ +static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} + +/* IS variants of TLB operations must affect all cores */ +static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_all_cpus_synced(cs); +} + +static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK); +} + +/* + * Non-IS variants of TLB operations are upgraded to + * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to + * force broadcast of these operations. + */ +static bool tlb_force_broadcast(CPUARMState *env) +{ + return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB); +} + +static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate all (TLBIALL) */ + CPUState *cs = env_cpu(env); + + if (tlb_force_broadcast(env)) { + tlb_flush_all_cpus_synced(cs); + } else { + tlb_flush(cs); + } +} + +static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ + CPUState *cs = env_cpu(env); + + value &= TARGET_PAGE_MASK; + if (tlb_force_broadcast(env)) { + tlb_flush_page_all_cpus_synced(cs, value); + } else { + tlb_flush_page(cs, value); + } +} + +static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by ASID (TLBIASID) */ + CPUState *cs = env_cpu(env); + + if (tlb_force_broadcast(env)) { + tlb_flush_all_cpus_synced(cs); + } else { + tlb_flush(cs); + } +} + +static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ + CPUState *cs = env_cpu(env); + + value &= TARGET_PAGE_MASK; + if (tlb_force_broadcast(env)) { + tlb_flush_page_all_cpus_synced(cs, value); + } else { + tlb_flush_page(cs, value); + } +} + +static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); + + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2); +} + +static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); + + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_E2); +} + +static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; + + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); +} + +static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; + + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2); +} + +static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_by_mmuidx(cs, alle1_tlbmask(env)); +} + +static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env)); +} + + +static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2); +} + +static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2); +} + +/* + * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions + * Page D4-1736 (DDI0487A.b) + */ + +static int vae1_tlbmask(CPUARMState *env) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + uint16_t mask; + + assert(arm_feature(env, ARM_FEATURE_AARCH64)); + + if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { + mask = ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E20_0; + } else { + /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */ + mask = ARMMMUIdxBit_E10_1 | + ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_0; + } + return mask; +} + +static int vae2_tlbmask(CPUARMState *env) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + uint16_t mask; + + if (hcr & HCR_E2H) { + mask = ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E20_0; + } else { + mask = ARMMMUIdxBit_E2; + } + return mask; +} + +/* Return 56 if TBI is enabled, 64 otherwise. */ +static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, + uint64_t addr) +{ + uint64_t tcr = regime_tcr(env, mmu_idx); + int tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + int select = extract64(addr, 55, 1); + + return (tbi >> select) & 1 ? 56 : 64; +} + +static int vae1_tlbbits(CPUARMState *env, uint64_t addr) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + ARMMMUIdx mmu_idx; + + assert(arm_feature(env, ARM_FEATURE_AARCH64)); + + /* Only the regime of the mmu_idx below is significant. */ + if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { + mmu_idx = ARMMMUIdx_E20_0; + } else { + mmu_idx = ARMMMUIdx_E10_0; + } + + return tlbbits_for_regime(env, mmu_idx, addr); +} + +static int vae2_tlbbits(CPUARMState *env, uint64_t addr) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + ARMMMUIdx mmu_idx; + + /* + * Only the regime of the mmu_idx below is significant. + * Regime EL2&0 has two ranges with separate TBI configuration, while EL2 + * only has one. + */ + if (hcr & HCR_E2H) { + mmu_idx = ARMMMUIdx_E20_2; + } else { + mmu_idx = ARMMMUIdx_E2; + } + + return tlbbits_for_regime(env, mmu_idx, addr); +} + +static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = vae1_tlbmask(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); +} + +static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = vae1_tlbmask(env); + + if (tlb_force_broadcast(env)) { + tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); + } else { + tlb_flush_by_mmuidx(cs, mask); + } +} + +static int e2_tlbmask(CPUARMState *env) +{ + return (ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E2); +} + +static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = alle1_tlbmask(env); + + tlb_flush_by_mmuidx(cs, mask); +} + +static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = e2_tlbmask(env); + + tlb_flush_by_mmuidx(cs, mask); +} + +static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); +} + +static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = alle1_tlbmask(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); +} + +static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = e2_tlbmask(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, mask); +} + +static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); +} + +static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA, EL2 + * Currently handles both VAE2 and VALE2, since we don't support + * flush-last-level-only. + */ + CPUState *cs = env_cpu(env); + int mask = vae2_tlbmask(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae2_tlbbits(env, pageaddr); + + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); +} + +static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA, EL3 + * Currently handles both VAE3 and VALE3, since we don't support + * flush-last-level-only. + */ + ARMCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); +} + +static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = vae1_tlbmask(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); + + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); +} + +static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA, EL1&0 (AArch64 version). + * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + CPUState *cs = env_cpu(env); + int mask = vae1_tlbmask(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); + + if (tlb_force_broadcast(env)) { + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); + } else { + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); + } +} + +static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = vae2_tlbmask(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae2_tlbbits(env, pageaddr); + + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); +} + +static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); + + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_E3, bits); +} + +static int ipas2e1_tlbmask(CPUARMState *env, int64_t value) +{ + /* + * The MSB of value is the NS field, which only applies if SEL2 + * is implemented and SCR_EL3.NS is not set (i.e. in secure mode). + */ + return (value >= 0 + && cpu_isar_feature(aa64_sel2, env_archcpu(env)) + && arm_is_secure_below_el3(env) + ? ARMMMUIdxBit_Stage2_S + : ARMMMUIdxBit_Stage2); +} + +static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = ipas2e1_tlbmask(env, value); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, mask); + } +} + +static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = ipas2e1_tlbmask(env, value); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); +} + +static const ARMCPRegInfo tlbi_not_v7_cp_reginfo[] = { + /* + * MMU TLB control. Note that the wildcarding means we cover not just + * the unified TLB ops but also the dside/iside/inner-shareable variants. + */ + { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write, + .type = ARM_CP_NO_RAW }, +}; + +static const ARMCPRegInfo tlbi_v7_cp_reginfo[] = { + /* 32 bit ITLB invalidates */ + { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiall_write }, + { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimva_write }, + { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiasid_write }, + /* 32 bit DTLB invalidates */ + { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiall_write }, + { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimva_write }, + { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiasid_write }, + /* 32 bit TLB invalidates */ + { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiall_write }, + { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimva_write }, + { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbiasid_write }, + { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimvaa_write }, +}; + +static const ARMCPRegInfo tlbi_v7mp_cp_reginfo[] = { + /* 32 bit TLB invalidates, Inner Shareable */ + { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbiall_is_write }, + { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbimva_is_write }, + { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbiasid_is_write }, + { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbimvaa_is_write }, +}; + +static const ARMCPRegInfo tlbi_v8_cp_reginfo[] = { + /* AArch32 TLB invalidate last level of translation table walk */ + { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbimva_is_write }, + { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, + .writefn = tlbimvaa_is_write }, + { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimva_write }, + { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .writefn = tlbimvaa_write }, + { .name = "TLBIMVALH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbimva_hyp_write }, + { .name = "TLBIMVALHIS", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbimva_hyp_is_write }, + { .name = "TLBIIPAS2", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2_hyp_write }, + { .name = "TLBIIPAS2IS", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2is_hyp_write }, + { .name = "TLBIIPAS2L", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2_hyp_write }, + { .name = "TLBIIPAS2LIS", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2is_hyp_write }, + /* AArch64 TLBI operations */ + { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVMALLE1IS, + .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAE1IS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIASIDE1IS, + .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAAE1IS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVALE1IS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAALE1IS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVMALLE1, + .writefn = tlbi_aa64_vmalle1_write }, + { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAE1, + .writefn = tlbi_aa64_vae1_write }, + { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIASIDE1, + .writefn = tlbi_aa64_vmalle1_write }, + { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAAE1, + .writefn = tlbi_aa64_vae1_write }, + { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVALE1, + .writefn = tlbi_aa64_vae1_write }, + { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAALE1, + .writefn = tlbi_aa64_vae1_write }, + { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ipas2e1is_write }, + { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ipas2e1is_write }, + { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1is_write }, + { .name = "TLBI_VMALLS12E1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1is_write }, + { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ipas2e1_write }, + { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ipas2e1_write }, + { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1_write }, + { .name = "TLBI_VMALLS12E1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1is_write }, +}; + +static const ARMCPRegInfo tlbi_el2_cp_reginfo[] = { + { .name = "TLBIALLNSNH", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiall_nsnh_write }, + { .name = "TLBIALLNSNHIS", + .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiall_nsnh_is_write }, + { .name = "TLBIALLH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiall_hyp_write }, + { .name = "TLBIALLHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiall_hyp_is_write }, + { .name = "TLBIMVAH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbimva_hyp_write }, + { .name = "TLBIMVAHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbimva_hyp_is_write }, + { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_alle2_write }, + { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2_write }, + { .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2_write }, + { .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_alle2is_write }, + { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, + { .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, +}; + +static const ARMCPRegInfo tlbi_el3_cp_reginfo[] = { + { .name = "TLBI_ALLE3IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle3is_write }, + { .name = "TLBI_VAE3IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3is_write }, + { .name = "TLBI_VALE3IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3is_write }, + { .name = "TLBI_ALLE3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle3_write }, + { .name = "TLBI_VAE3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3_write }, + { .name = "TLBI_VALE3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3_write }, +}; + +typedef struct { + uint64_t base; + uint64_t length; +} TLBIRange; + +static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg) +{ + /* + * Note that the TLBI range TG field encoding differs from both + * TG0 and TG1 encodings. + */ + switch (tg) { + case 1: + return Gran4K; + case 2: + return Gran16K; + case 3: + return Gran64K; + default: + return GranInvalid; + } +} + +static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx, + uint64_t value) +{ + unsigned int page_size_granule, page_shift, num, scale, exponent; + /* Extract one bit to represent the va selector in use. */ + uint64_t select = sextract64(value, 36, 1); + ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false); + TLBIRange ret = { }; + ARMGranuleSize gran; + + page_size_granule = extract64(value, 46, 2); + gran = tlbi_range_tg_to_gran_size(page_size_granule); + + /* The granule encoded in value must match the granule in use. */ + if (gran != param.gran) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n", + page_size_granule); + return ret; + } + + page_shift = arm_granule_bits(gran); + num = extract64(value, 39, 5); + scale = extract64(value, 44, 2); + exponent = (5 * scale) + 1; + + ret.length = (num + 1) << (exponent + page_shift); + + if (param.select) { + ret.base = sextract64(value, 0, 37); + } else { + ret.base = extract64(value, 0, 37); + } + if (param.ds) { + /* + * With DS=1, BaseADDR is always shifted 16 so that it is able + * to address all 52 va bits. The input address is perforce + * aligned on a 64k boundary regardless of translation granule. + */ + page_shift = 16; + } + ret.base <<= page_shift; + + return ret; +} + +static void do_rvae_write(CPUARMState *env, uint64_t value, + int idxmap, bool synced) +{ + ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap); + TLBIRange range; + int bits; + + range = tlbi_aa64_get_range(env, one_idx, value); + bits = tlbbits_for_regime(env, one_idx, range.base); + + if (synced) { + tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env), + range.base, + range.length, + idxmap, + bits); + } else { + tlb_flush_range_by_mmuidx(env_cpu(env), range.base, + range.length, idxmap, bits); + } +} + +static void tlbi_aa64_rvae1_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, EL1&0. + * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + + do_rvae_write(env, value, vae1_tlbmask(env), + tlb_force_broadcast(env)); +} + +static void tlbi_aa64_rvae1is_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, Inner/Outer Shareable EL1&0. + * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS, + * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support + * flush-for-specific-ASID-only, flush-last-level-only or inner/outer + * shareable specific flushes. + */ + + do_rvae_write(env, value, vae1_tlbmask(env), true); +} + +static void tlbi_aa64_rvae2_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, EL2. + * Currently handles all of RVAE2 and RVALE2, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + + do_rvae_write(env, value, vae2_tlbmask(env), + tlb_force_broadcast(env)); + + +} + +static void tlbi_aa64_rvae2is_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, Inner/Outer Shareable, EL2. + * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS, + * since we don't support flush-for-specific-ASID-only, + * flush-last-level-only or inner/outer shareable specific flushes. + */ + + do_rvae_write(env, value, vae2_tlbmask(env), true); + +} + +static void tlbi_aa64_rvae3_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, EL3. + * Currently handles all of RVAE3 and RVALE3, + * since we don't support flush-for-specific-ASID-only or + * flush-last-level-only. + */ + + do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); +} + +static void tlbi_aa64_rvae3is_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Invalidate by VA range, EL3, Inner/Outer Shareable. + * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS, + * since we don't support flush-for-specific-ASID-only, + * flush-last-level-only or inner/outer specific flushes. + */ + + do_rvae_write(env, value, ARMMMUIdxBit_E3, true); +} + +static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + do_rvae_write(env, value, ipas2e1_tlbmask(env, value), + tlb_force_broadcast(env)); +} + +static void tlbi_aa64_ripas2e1is_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true); +} + +static const ARMCPRegInfo tlbirange_reginfo[] = { + { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAE1IS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAAE1IS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVALE1IS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlbis, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAALE1IS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAE1OS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAAE1OS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVALE1OS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAALE1OS, + .writefn = tlbi_aa64_rvae1is_write }, + { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAE1, + .writefn = tlbi_aa64_rvae1_write }, + { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAAE1, + .writefn = tlbi_aa64_rvae1_write }, + { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVALE1, + .writefn = tlbi_aa64_rvae1_write }, + { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIRVAALE1, + .writefn = tlbi_aa64_rvae1_write }, + { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ripas2e1is_write }, + { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ripas2e1is_write }, + { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2is_write }, + { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2is_write }, + { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ripas2e1_write }, + { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_ripas2e1_write }, + { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2is_write }, + { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2is_write }, + { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2_write }, + { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_rvae2_write }, + { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3is_write }, + { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3is_write }, + { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3is_write }, + { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3is_write }, + { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3_write }, + { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_rvae3_write }, +}; + +static const ARMCPRegInfo tlbios_reginfo[] = { + { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVMALLE1OS, + .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1, + .fgt = FGT_TLBIVAE1OS, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIASIDE1OS, + .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAAE1OS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVALE1OS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlbos, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .fgt = FGT_TLBIVAALE1OS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_alle2is_write }, + { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, + { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1is_write }, + { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL2_W, + .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, + { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle1is_write }, + { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS }, + { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS }, + { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS }, + { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS }, + { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_alle3is_write }, + { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3is_write }, + { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS, + .writefn = tlbi_aa64_vae3is_write }, +}; + +static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush(cs); +} + +static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + + tlb_flush_all_cpus_synced(cs); +} + +static const ARMCPRegInfo tlbi_rme_reginfo[] = { + { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paall_write }, + { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + /* + * QEMU does not have a way to invalidate by physical address, thus + * invalidating a range of physical addresses is accomplished by + * flushing all tlb entries in the outer shareable domain, + * just like PAALLOS. + */ + { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, +}; + +void define_tlb_insn_regs(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + + if (!arm_feature(env, ARM_FEATURE_V7)) { + define_arm_cp_regs(cpu, tlbi_not_v7_cp_reginfo); + } else { + define_arm_cp_regs(cpu, tlbi_v7_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V7MP) && + !arm_feature(env, ARM_FEATURE_PMSA)) { + define_arm_cp_regs(cpu, tlbi_v7mp_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V8)) { + define_arm_cp_regs(cpu, tlbi_v8_cp_reginfo); + } + /* + * We retain the existing logic for when to register these TLBI + * ops (i.e. matching the condition for el2_cp_reginfo[] in + * helper.c), but we will be able to simplify this later. + */ + if (arm_feature(env, ARM_FEATURE_EL2)) { + define_arm_cp_regs(cpu, tlbi_el2_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_EL3)) { + define_arm_cp_regs(cpu, tlbi_el3_cp_reginfo); + } + if (cpu_isar_feature(aa64_tlbirange, cpu)) { + define_arm_cp_regs(cpu, tlbirange_reginfo); + } + if (cpu_isar_feature(aa64_tlbios, cpu)) { + define_arm_cp_regs(cpu, tlbios_reginfo); + } + if (cpu_isar_feature(aa64_rme, cpu)) { + define_arm_cp_regs(cpu, tlbi_rme_reginfo); + } +} diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c index 885bf4e..23c72a9 100644 --- a/target/arm/tcg/tlb_helper.c +++ b/target/arm/tcg/tlb_helper.c @@ -9,9 +9,9 @@ #include "cpu.h" #include "internals.h" #include "cpu-features.h" -#include "exec/exec-all.h" -#include "exec/helper-proto.h" +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" /* * Returns true if the stage 1 translation regime is using LPAE format page @@ -277,7 +277,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); } -void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc) +void helper_exception_pc_alignment(CPUARMState *env, vaddr pc) { ARMMMUFaultInfo fi = { .type = ARMFault_Alignment }; int target_el = exception_target_el(env); @@ -318,14 +318,13 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi); } -bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) +bool arm_cpu_tlb_fill_align(CPUState *cs, CPUTLBEntryFull *out, vaddr address, + MMUAccessType access_type, int mmu_idx, + MemOp memop, int size, bool probe, uintptr_t ra) { ARMCPU *cpu = ARM_CPU(cs); GetPhysAddrResult res = {}; ARMMMUFaultInfo local_fi, *fi; - int ret; /* * Allow S1_ptw_translate to see any fault generated here. @@ -339,37 +338,27 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } /* - * Walk the page table and (if the mapping exists) add the page - * to the TLB. On success, return true. Otherwise, if probing, - * return false. Otherwise populate fsr with ARM DFSR/IFSR fault - * register format, and signal the fault. + * Per R_XCHFJ, alignment fault not due to memory type has + * highest precedence. Otherwise, walk the page table and + * and collect the page description. */ - ret = get_phys_addr(&cpu->env, address, access_type, - core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &res, fi); - if (likely(!ret)) { - /* - * Map a single [sub]page. Regions smaller than our declared - * target page size are handled specially, so for those we - * pass in the exact addresses. - */ - if (res.f.lg_page_size >= TARGET_PAGE_BITS) { - res.f.phys_addr &= TARGET_PAGE_MASK; - address &= TARGET_PAGE_MASK; - } - + if (address & ((1 << memop_alignment_bits(memop)) - 1)) { + fi->type = ARMFault_Alignment; + } else if (!get_phys_addr(&cpu->env, address, access_type, memop, + core_to_arm_mmu_idx(&cpu->env, mmu_idx), + &res, fi)) { res.f.extra.arm.pte_attrs = res.cacheattrs.attrs; res.f.extra.arm.shareability = res.cacheattrs.shareability; - - tlb_set_page_full(cs, mmu_idx, address, &res.f); + *out = res.f; return true; - } else if (probe) { + } + if (probe) { return false; - } else { - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr); - arm_deliver_fault(cpu, address, access_type, mmu_idx, fi); } + + /* Now we have a real cpu fault. */ + cpu_restore_state(cs, ra); + arm_deliver_fault(cpu, address, access_type, mmu_idx, fi); } #else void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr, diff --git a/target/arm/tcg/translate-a32.h b/target/arm/tcg/translate-a32.h index 19de6e0..0b1fa57 100644 --- a/target/arm/tcg/translate-a32.h +++ b/target/arm/tcg/translate-a32.h @@ -83,6 +83,13 @@ void store_cpu_offset(TCGv_i32 var, int offset, int size); sizeof_field(CPUARMState, name)); \ }) +/* Store to the low half of a 64-bit field from a TCGv_i32 */ +#define store_cpu_field_low32(val, name) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8); \ + store_cpu_offset(val, offsetoflow32(CPUARMState, name), 4); \ + }) + #define store_cpu_field_constant(val, name) \ store_cpu_field(tcg_constant_i32(val), name) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 93543da..815225b 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -17,8 +17,7 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" - -#include "exec/exec-all.h" +#include "exec/target_page.h" #include "translate.h" #include "translate-a64.h" #include "qemu/log.h" @@ -75,17 +74,6 @@ static int scale_by_log2_tag_granule(DisasContext *s, int x) #include "decode-sme-fa64.c.inc" #include "decode-a64.c.inc" -/* Table based decoder typedefs - used when the relevant bits for decode - * are too awkwardly scattered across the instruction (eg SIMD). - */ -typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); - -typedef struct AArch64DecodeTable { - uint32_t pattern; - uint32_t mask; - AArch64DecodeFn *disas_fn; -} AArch64DecodeTable; - /* initialize TCG globals. */ void a64_translate_init(void) { @@ -294,7 +282,7 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); ret = tcg_temp_new_i64(); @@ -326,7 +314,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); ret = tcg_temp_new_i64(); @@ -445,12 +433,6 @@ static void gen_rebuild_hflags(DisasContext *s) gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); } -static void gen_exception_internal(int excp) -{ - assert(excp_is_internal(excp)); - gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); -} - static void gen_exception_internal_insn(DisasContext *s, int excp) { gen_a64_update_pc(s, 0); @@ -628,7 +610,16 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) return v; } -/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). +static void clear_vec(DisasContext *s, int rd) +{ + unsigned ofs = fp_reg_offset(s, rd, MO_64); + unsigned vsz = vec_full_reg_size(s); + + tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); +} + +/* + * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). * If SVE is not enabled, then there are only 128 bits in the vector. */ static void clear_vec_high(DisasContext *s, bool is_q, int rd) @@ -656,6 +647,68 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) write_fp_dreg(s, reg, tmp); } +/* + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: + * - if FPCR.NEP == 0, clear the high elements of reg + * - if FPCR.NEP == 1, set the high elements of reg from mergereg + * (i.e. merge the result with those high elements) + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). + */ +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i64 v) +{ + if (!s->fpcr_nep) { + write_fp_dreg(s, reg, v); + return; + } + + /* + * Move from mergereg to reg; this sets the high elements and + * clears the bits above 128 as a side effect. + */ + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); +} + +/* + * Write a single-prec result, but only clear the higher elements + * of the destination register if FPCR.NEP is 0; otherwise preserve them. + */ +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i32 v) +{ + if (!s->fpcr_nep) { + write_fp_sreg(s, reg, v); + return; + } + + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); +} + +/* + * Write a half-prec result, but only clear the higher elements + * of the destination register if FPCR.NEP is 0; otherwise preserve them. + * The caller must ensure that the top 16 bits of v are zero. + */ +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i32 v) +{ + if (!s->fpcr_nep) { + write_fp_sreg(s, reg, v); + return; + } + + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); +} + /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, GVecGen2Fn *gvec_fn, int vece) @@ -714,10 +767,10 @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, * an out-of-line helper. */ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, - int rm, bool is_fp16, int data, + int rm, ARMFPStatusFlavour fpsttype, int data, gen_helper_gvec_3_ptr *fn) { - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); + TCGv_ptr fpst = fpstatus_ptr(fpsttype); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), fpst, @@ -736,14 +789,31 @@ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, } /* + * Expand a 4-operand operation using an out-of-line helper that takes + * a pointer to the CPU env. + */ +static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, + int rm, int ra, int data, + gen_helper_gvec_4_ptr *fn) +{ + tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, ra), + tcg_env, + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); +} + +/* * Expand a 4-operand + fpstatus pointer + simd data value operation using * an out-of-line helper. */ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, - int rm, int ra, bool is_fp16, int data, + int rm, int ra, ARMFPStatusFlavour fpsttype, + int data, gen_helper_gvec_4_ptr *fn) { - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); + TCGv_ptr fpst = fpstatus_ptr(fpsttype); tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), @@ -751,6 +821,111 @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } +/* + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. + * These functions implement + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) + * which for float32 is + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) + * and similarly for the other float sizes. + */ +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); + + gen_vfp_negh(chs_s, s); + gen_vfp_absh(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7c00), + s, chs_s); +} + +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); + + gen_vfp_negs(chs_s, s); + gen_vfp_abss(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7f800000UL), + s, chs_s); +} + +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); + + gen_vfp_negd(chs_s, s); + gen_vfp_absd(abs_s, s); + tcg_gen_movcond_i64(TCG_COND_GTU, d, + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), + s, chs_s); +} + +/* + * These functions implement + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) + * which for float32 is + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) + * and similarly for the other float sizes. + */ +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(); + + gen_vfp_absh(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7c00), + s, abs_s); +} + +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(); + + gen_vfp_abss(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7f800000UL), + s, abs_s); +} + +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 abs_s = tcg_temp_new_i64(); + + gen_vfp_absd(abs_s, s); + tcg_gen_movcond_i64(TCG_COND_GTU, d, + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), + s, abs_s); +} + +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negh(d, s); + } else { + gen_vfp_negh(d, s); + } +} + +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negs(d, s); + } else { + gen_vfp_negs(d, s); + } +} + +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negd(d, s); + } else { + gen_vfp_negd(d, s); + } +} + /* Set ZF and NF based on a 64 bit result. This is alas fiddlier * than the 32 bit equivalent. */ @@ -894,11 +1069,9 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i64 cf_64 = tcg_temp_new_i64(); TCGv_i64 vf_64 = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_extu_i32_i64(cf_64, cpu_CF); - tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); - tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); + tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); @@ -912,12 +1085,10 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 t0_32 = tcg_temp_new_i32(); TCGv_i32 t1_32 = tcg_temp_new_i32(); TCGv_i32 tmp = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_extrl_i64_i32(t0_32, t0); tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); @@ -1199,14 +1370,14 @@ static bool fp_access_check_only(DisasContext *s) { if (s->fp_excp_el) { assert(!s->fp_access_checked); - s->fp_access_checked = true; + s->fp_access_checked = -1; gen_exception_insn_el(s, 0, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false, 0), s->fp_excp_el); return false; } - s->fp_access_checked = true; + s->fp_access_checked = 1; return true; } @@ -1224,6 +1395,49 @@ static bool fp_access_check(DisasContext *s) } /* + * Return <0 for non-supported element sizes, with MO_16 controlled by + * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. + */ +static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) +{ + switch (esz) { + case MO_64: + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return -1; + } + break; + default: + return -1; + } + return fp_access_check(s); +} + +/* Likewise, but vector MO_64 must have two elements. */ +static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) +{ + switch (esz) { + case MO_64: + if (!is_q) { + return -1; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return -1; + } + break; + default: + return -1; + } + return fp_access_check(s); +} + +/* * Check that SVE access is enabled. If it is, return true. * If not, emit code to generate an appropriate exception and return false. * This function corresponds to CheckSVEEnabled(). @@ -1231,23 +1445,23 @@ static bool fp_access_check(DisasContext *s) bool sve_access_check(DisasContext *s) { if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { + bool ret; + assert(dc_isar_feature(aa64_sme, s)); - if (!sme_sm_enabled_check(s)) { - goto fail_exit; - } - } else if (s->sve_excp_el) { + ret = sme_sm_enabled_check(s); + s->sve_access_checked = (ret ? 1 : -1); + return ret; + } + if (s->sve_excp_el) { + /* Assert that we only raise one exception per instruction. */ + assert(!s->sve_access_checked); gen_exception_insn_el(s, 0, EXCP_UDEF, syn_sve_access_trap(), s->sve_excp_el); - goto fail_exit; + s->sve_access_checked = -1; + return false; } - s->sve_access_checked = true; + s->sve_access_checked = 1; return fp_access_check(s); - - fail_exit: - /* Assert that we only raise one exception per instruction. */ - assert(!s->sve_access_checked); - s->sve_access_checked = true; - return false; } /* @@ -1275,8 +1489,9 @@ bool sme_enabled_check(DisasContext *s) * sme_excp_el by itself for cpregs access checks. */ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { - s->fp_access_checked = true; - return sme_access_check(s); + bool ret = sme_access_check(s); + s->fp_access_checked = (ret ? 1 : -1); + return ret; } return fp_access_check_only(s); } @@ -1398,31 +1613,6 @@ static inline void gen_check_sp_alignment(DisasContext *s) } /* - * This provides a simple table based table lookup decoder. It is - * intended to be used when the relevant bits for decode are too - * awkwardly placed and switch/if based logic would be confusing and - * deeply nested. Since it's a linear search through the table, tables - * should be kept small. - * - * It returns the first handler where insn & mask == pattern, or - * NULL if there is no match. - * The table is terminated by an empty mask (i.e. 0) - */ -static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, - uint32_t insn) -{ - const AArch64DecodeTable *tptr = table; - - while (tptr->mask) { - if ((insn & tptr->mask) == tptr->pattern) { - return tptr->disas_fn; - } - tptr++; - } - return NULL; -} - -/* * The instruction disassembly implemented here matches * the instruction encoding classifications in chapter C4 * of the ARM Architecture Reference Manual (DDI0487B_a); @@ -1507,7 +1697,14 @@ static void set_btype_for_br(DisasContext *s, int rn) { if (dc_isar_feature(aa64_bti, s)) { /* BR to {x16,x17} or !guard -> 1, else 3. */ - set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); + if (rn == 16 || rn == 17) { + set_btype(s, 1); + } else { + TCGv_i64 pc = tcg_temp_new_i64(); + gen_pc_plus_diff(s, pc, 0); + gen_helper_guarded_page_br(tcg_env, pc); + s->btype = -1; + } } } @@ -1521,8 +1718,8 @@ static void set_btype_for_blr(DisasContext *s) static bool trans_BR(DisasContext *s, arg_r *a) { - gen_a64_set_pc(s, cpu_reg(s, a->rn)); set_btype_for_br(s, a->rn); + gen_a64_set_pc(s, cpu_reg(s, a->rn)); s->base.is_jmp = DISAS_JUMP; return true; } @@ -1581,8 +1778,8 @@ static bool trans_BRAZ(DisasContext *s, arg_braz *a) } dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); - gen_a64_set_pc(s, dst); set_btype_for_br(s, a->rn); + gen_a64_set_pc(s, dst); s->base.is_jmp = DISAS_JUMP; return true; } @@ -1613,6 +1810,10 @@ static bool trans_RETA(DisasContext *s, arg_reta *a) { TCGv_i64 dst; + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); gen_a64_set_pc(s, dst); s->base.is_jmp = DISAS_JUMP; @@ -1936,6 +2137,15 @@ static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) return true; } +static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) +{ + if (!dc_isar_feature(aa64_xs, s)) { + return false; + } + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + return true; +} + static bool trans_ISB(DisasContext *s, arg_ISB *a) { /* @@ -3543,7 +3753,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) if (a->rn == 31) { gen_check_sp_alignment(s); } - mop = check_atomic_align(s, a->rn, a->sz); + mop = check_ordered_align(s, a->rn, 0, false, a->sz); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, a->rn != 31, mop); /* @@ -4657,6 +4867,88 @@ static bool trans_EXTR(DisasContext *s, arg_extract *a) return true; } +static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) +{ + if (fp_access_check(s)) { + int len = (a->len + 1) * 16; + + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rm), tcg_env, + a->q ? 16 : 8, vec_full_reg_size(s), + (len << 6) | (a->tbx << 5) | a->rn, + gen_helper_simd_tblx); + } + return true; +} + +typedef int simd_permute_idx_fn(int i, int part, int elements); + +static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, + simd_permute_idx_fn *fn, int part) +{ + MemOp esz = a->esz; + int datasize = a->q ? 16 : 8; + int elements = datasize >> esz; + TCGv_i64 tcg_res[2], tcg_ele; + + if (esz == MO_64 && !a->q) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; + tcg_ele = tcg_temp_new_i64(); + + for (int i = 0; i < elements; i++) { + int o, w, idx; + + idx = fn(i, part, elements); + read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), + idx & (elements - 1), esz); + + w = (i << (esz + 3)) / 64; + o = (i << (esz + 3)) % 64; + if (o == 0) { + tcg_gen_mov_i64(tcg_res[w], tcg_ele); + } else { + tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); + } + } + + for (int i = a->q; i >= 0; --i) { + write_vec_element(s, tcg_res[i], a->rd, i, MO_64); + } + clear_vec_high(s, a->q, a->rd); + return true; +} + +static int permute_load_uzp(int i, int part, int elements) +{ + return 2 * i + part; +} + +TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) +TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) + +static int permute_load_trn(int i, int part, int elements) +{ + return (i & 1) * elements + (i & ~1) + part; +} + +TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) +TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) + +static int permute_load_zip(int i, int part, int elements) +{ + return (i & 1) * elements + ((part * elements + i) >> 1); +} + +TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) +TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) + /* * Cryptographic AES, SHA, SHA512 */ @@ -4703,7 +4995,6 @@ static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) TCGv_i32 tcg_op2 = tcg_temp_new_i32(); TCGv_i32 tcg_op3 = tcg_temp_new_i32(); TCGv_i32 tcg_res = tcg_temp_new_i32(); - unsigned vsz, dofs; read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); @@ -4715,9 +5006,7 @@ static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) tcg_gen_rotri_i32(tcg_res, tcg_res, 25); /* Clear the whole register first, then store bits [127:96]. */ - vsz = vec_full_reg_size(s); - dofs = vec_full_reg_offset(s, a->rd); - tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); + clear_vec(s, a->rd); write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); } return true; @@ -4898,23 +5187,25 @@ typedef struct FPScalar { void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); } FPScalar; -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, + const FPScalar *f, int mergereg, + ARMFPStatusFlavour fpsttype) { switch (a->esz) { case MO_64: if (fp_access_check(s)) { TCGv_i64 t0 = read_fp_dreg(s, a->rn); TCGv_i64 t1 = read_fp_dreg(s, a->rm); - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); - write_fp_dreg(s, a->rd, t0); + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_dreg_merging(s, a->rd, mergereg, t0); } break; case MO_32: if (fp_access_check(s)) { TCGv_i32 t0 = read_fp_sreg(s, a->rn); TCGv_i32 t1 = read_fp_sreg(s, a->rm); - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); - write_fp_sreg(s, a->rd, t0); + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_sreg_merging(s, a->rd, mergereg, t0); } break; case MO_16: @@ -4924,8 +5215,8 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) if (fp_access_check(s)) { TCGv_i32 t0 = read_fp_hreg(s, a->rn); TCGv_i32 t1 = read_fp_hreg(s, a->rm); - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); - write_fp_sreg(s, a->rd, t0); + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_hreg_merging(s, a->rd, mergereg, t0); } break; default: @@ -4934,68 +5225,103 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) return true; } +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, + int mergereg) +{ + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); +} + +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, + const FPScalar *fnormal, const FPScalar *fah, + int mergereg) +{ + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, + mergereg, select_ah_fpst(s, a->esz)); +} + +/* Some insns need to call different helpers when FPCR.AH == 1 */ +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, + const FPScalar *fnormal, + const FPScalar *fah, + int mergereg) +{ + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); +} + static const FPScalar f_scalar_fadd = { gen_helper_vfp_addh, gen_helper_vfp_adds, gen_helper_vfp_addd, }; -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) static const FPScalar f_scalar_fsub = { gen_helper_vfp_subh, gen_helper_vfp_subs, gen_helper_vfp_subd, }; -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) static const FPScalar f_scalar_fdiv = { gen_helper_vfp_divh, gen_helper_vfp_divs, gen_helper_vfp_divd, }; -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) static const FPScalar f_scalar_fmul = { gen_helper_vfp_mulh, gen_helper_vfp_muls, gen_helper_vfp_muld, }; -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) static const FPScalar f_scalar_fmax = { - gen_helper_advsimd_maxh, + gen_helper_vfp_maxh, gen_helper_vfp_maxs, gen_helper_vfp_maxd, }; -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) +static const FPScalar f_scalar_fmax_ah = { + gen_helper_vfp_ah_maxh, + gen_helper_vfp_ah_maxs, + gen_helper_vfp_ah_maxd, +}; +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) static const FPScalar f_scalar_fmin = { - gen_helper_advsimd_minh, + gen_helper_vfp_minh, gen_helper_vfp_mins, gen_helper_vfp_mind, }; -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) +static const FPScalar f_scalar_fmin_ah = { + gen_helper_vfp_ah_minh, + gen_helper_vfp_ah_mins, + gen_helper_vfp_ah_mind, +}; +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) static const FPScalar f_scalar_fmaxnm = { - gen_helper_advsimd_maxnumh, + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnums, gen_helper_vfp_maxnumd, }; -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) static const FPScalar f_scalar_fminnm = { - gen_helper_advsimd_minnumh, + gen_helper_vfp_minnumh, gen_helper_vfp_minnums, gen_helper_vfp_minnumd, }; -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) static const FPScalar f_scalar_fmulx = { gen_helper_advsimd_mulxh, gen_helper_vfp_mulxs, gen_helper_vfp_mulxd, }; -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) { @@ -5015,47 +5341,70 @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) gen_vfp_negd(d, d); } +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_mulh(d, n, m, s); + gen_vfp_ah_negh(d, d); +} + +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_muls(d, n, m, s); + gen_vfp_ah_negs(d, d); +} + +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_muld(d, n, m, s); + gen_vfp_ah_negd(d, d); +} + static const FPScalar f_scalar_fnmul = { gen_fnmul_h, gen_fnmul_s, gen_fnmul_d, }; -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) +static const FPScalar f_scalar_ah_fnmul = { + gen_fnmul_ah_h, + gen_fnmul_ah_s, + gen_fnmul_ah_d, +}; +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) static const FPScalar f_scalar_fcmeq = { gen_helper_advsimd_ceq_f16, gen_helper_neon_ceq_f32, gen_helper_neon_ceq_f64, }; -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) static const FPScalar f_scalar_fcmge = { gen_helper_advsimd_cge_f16, gen_helper_neon_cge_f32, gen_helper_neon_cge_f64, }; -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) static const FPScalar f_scalar_fcmgt = { gen_helper_advsimd_cgt_f16, gen_helper_neon_cgt_f32, gen_helper_neon_cgt_f64, }; -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) static const FPScalar f_scalar_facge = { gen_helper_advsimd_acge_f16, gen_helper_neon_acge_f32, gen_helper_neon_acge_f64, }; -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) static const FPScalar f_scalar_facgt = { gen_helper_advsimd_acgt_f16, gen_helper_neon_acgt_f32, gen_helper_neon_acgt_f64, }; -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) { @@ -5075,26 +5424,116 @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) gen_vfp_absd(d, d); } +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subh(d, n, m, s); + gen_vfp_ah_absh(d, d); +} + +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subs(d, n, m, s); + gen_vfp_ah_abss(d, d); +} + +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_subd(d, n, m, s); + gen_vfp_ah_absd(d, d); +} + static const FPScalar f_scalar_fabd = { gen_fabd_h, gen_fabd_s, gen_fabd_d, }; -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) +static const FPScalar f_scalar_ah_fabd = { + gen_fabd_ah_h, + gen_fabd_ah_s, + gen_fabd_ah_d, +}; +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) static const FPScalar f_scalar_frecps = { gen_helper_recpsf_f16, gen_helper_recpsf_f32, gen_helper_recpsf_f64, }; -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) +static const FPScalar f_scalar_ah_frecps = { + gen_helper_recpsf_ah_f16, + gen_helper_recpsf_ah_f32, + gen_helper_recpsf_ah_f64, +}; +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) static const FPScalar f_scalar_frsqrts = { gen_helper_rsqrtsf_f16, gen_helper_rsqrtsf_f32, gen_helper_rsqrtsf_f64, }; -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) +static const FPScalar f_scalar_ah_frsqrts = { + gen_helper_rsqrtsf_ah_f16, + gen_helper_rsqrtsf_ah_f32, + gen_helper_rsqrtsf_ah_f64, +}; +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) + +static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, + const FPScalar *f, bool swap) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = tcg_constant_i64(0); + if (swap) { + f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); + } else { + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); + } + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rn); + TCGv_i32 t1 = tcg_constant_i32(0); + if (swap) { + f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); + } else { + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); + } + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rn); + TCGv_i32 t1 = tcg_constant_i32(0); + if (swap) { + f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); + } else { + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); + } + write_fp_sreg(s, a->rd, t0); + } + break; + default: + return false; + } + return true; +} + +TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) +TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) +TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) +TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) +TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, MemOp sgn_n, MemOp sgn_m, @@ -5235,6 +5674,43 @@ static const ENVScalar2 f_scalar_sqrdmulh = { }; TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) +typedef struct ENVScalar3 { + NeonGenThreeOpEnvFn *gen_hs[2]; +} ENVScalar3; + +static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, + const ENVScalar3 *f) +{ + TCGv_i32 t0, t1, t2; + + if (a->esz != MO_16 && a->esz != MO_32) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, 0, a->esz); + read_vec_element_i32(s, t2, a->rd, 0, a->esz); + f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); + write_fp_sreg(s, a->rd, t0); + return true; +} + +static const ENVScalar3 f_scalar_sqrdmlah = { + { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } +}; +TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) + +static const ENVScalar3 f_scalar_sqrdmlsh = { + { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } +}; +TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) + static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) { if (fp_access_check(s)) { @@ -5253,201 +5729,253 @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, - gen_helper_gvec_3_ptr * const fns[3]) +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, + int data, + gen_helper_gvec_3_ptr * const fns[3], + ARMFPStatusFlavour fpsttype) { MemOp esz = a->esz; + int check = fp_access_check_vector_hsd(s, a->q, esz); - switch (esz) { - case MO_64: - if (!a->q) { - return false; - } - break; - case MO_32: - break; - case MO_16: - if (!dc_isar_feature(aa64_fp16, s)) { - return false; - } - break; - default: - return false; - } - if (fp_access_check(s)) { - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, - esz == MO_16, 0, fns[esz - 1]); + if (check <= 0) { + return check == 0; } + + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, + data, fns[esz - 1]); return true; } +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fns[3]) +{ + return do_fp3_vector_with_fpsttype(s, a, data, fns, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); +} + +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fnormal[3], + gen_helper_gvec_3_ptr * const fah[3]) +{ + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); +} + +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fnormal[3], + gen_helper_gvec_3_ptr * const fah[3]) +{ + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, + select_ah_fpst(s, a->esz)); +} + static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { gen_helper_gvec_fadd_h, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_d, }; -TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd) +TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { gen_helper_gvec_fsub_h, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_d, }; -TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub) +TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { gen_helper_gvec_fdiv_h, gen_helper_gvec_fdiv_s, gen_helper_gvec_fdiv_d, }; -TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv) +TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { gen_helper_gvec_fmul_h, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_d, }; -TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul) +TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { gen_helper_gvec_fmax_h, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_d, }; -TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax) +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { + gen_helper_gvec_ah_fmax_h, + gen_helper_gvec_ah_fmax_s, + gen_helper_gvec_ah_fmax_d, +}; +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { gen_helper_gvec_fmin_h, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_d, }; -TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin) +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { + gen_helper_gvec_ah_fmin_h, + gen_helper_gvec_ah_fmin_s, + gen_helper_gvec_ah_fmin_d, +}; +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { gen_helper_gvec_fmaxnum_h, gen_helper_gvec_fmaxnum_s, gen_helper_gvec_fmaxnum_d, }; -TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm) +TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { gen_helper_gvec_fminnum_h, gen_helper_gvec_fminnum_s, gen_helper_gvec_fminnum_d, }; -TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm) +TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { gen_helper_gvec_fmulx_h, gen_helper_gvec_fmulx_s, gen_helper_gvec_fmulx_d, }; -TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx) +TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { gen_helper_gvec_vfma_h, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_d, }; -TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla) +TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { gen_helper_gvec_vfms_h, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_d, }; -TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls) +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { + gen_helper_gvec_ah_vfms_h, + gen_helper_gvec_ah_vfms_s, + gen_helper_gvec_ah_vfms_d, +}; +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { gen_helper_gvec_fceq_h, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_d, }; -TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq) +TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { gen_helper_gvec_fcge_h, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_d, }; -TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge) +TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { gen_helper_gvec_fcgt_h, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_d, }; -TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt) +TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { gen_helper_gvec_facge_h, gen_helper_gvec_facge_s, gen_helper_gvec_facge_d, }; -TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge) +TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { gen_helper_gvec_facgt_h, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_d, }; -TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt) +TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { gen_helper_gvec_fabd_h, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_d, }; -TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd) +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { + gen_helper_gvec_ah_fabd_h, + gen_helper_gvec_ah_fabd_s, + gen_helper_gvec_ah_fabd_d, +}; +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { gen_helper_gvec_recps_h, gen_helper_gvec_recps_s, gen_helper_gvec_recps_d, }; -TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps) +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { + gen_helper_gvec_ah_recps_h, + gen_helper_gvec_ah_recps_s, + gen_helper_gvec_ah_recps_d, +}; +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { gen_helper_gvec_rsqrts_h, gen_helper_gvec_rsqrts_s, gen_helper_gvec_rsqrts_d, }; -TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts) +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { + gen_helper_gvec_ah_rsqrts_h, + gen_helper_gvec_ah_rsqrts_s, + gen_helper_gvec_ah_rsqrts_d, +}; +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { gen_helper_gvec_faddp_h, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_d, }; -TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp) +TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { gen_helper_gvec_fmaxp_h, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_d, }; -TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp) +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { + gen_helper_gvec_ah_fmaxp_h, + gen_helper_gvec_ah_fmaxp_s, + gen_helper_gvec_ah_fmaxp_d, +}; +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { gen_helper_gvec_fminp_h, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_d, }; -TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp) +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { + gen_helper_gvec_ah_fminp_h, + gen_helper_gvec_ah_fminp_s, + gen_helper_gvec_ah_fminp_d, +}; +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { gen_helper_gvec_fmaxnump_h, gen_helper_gvec_fmaxnump_s, gen_helper_gvec_fmaxnump_d, }; -TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp) +TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { gen_helper_gvec_fminnump_h, gen_helper_gvec_fminnump_s, gen_helper_gvec_fminnump_d, }; -TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp) +TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) { @@ -5552,6 +6080,438 @@ TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) +TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) +TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) + +static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, + gen_helper_gvec_4 *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); + } + return true; +} + +static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, + gen_helper_gvec_4_ptr *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); + } + return true; +} + +TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) +TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) +TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) +TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) +TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) +TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) +TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) +TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) + +static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) +{ + if (!dc_isar_feature(aa64_bf16, s)) { + return false; + } + if (fp_access_check(s)) { + /* Q bit selects BFMLALB vs BFMLALT. */ + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, + gen_helper_gvec_bfmlal); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { + gen_helper_gvec_fcaddh, + gen_helper_gvec_fcadds, + gen_helper_gvec_fcaddd, +}; +/* + * Encode FPCR.AH into the data so the helper knows whether the + * negations it does should avoid flipping the sign bit on a NaN + */ +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), + f_vector_fcadd) +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), + f_vector_fcadd) + +static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) +{ + static gen_helper_gvec_4_ptr * const fn[] = { + [MO_16] = gen_helper_gvec_fcmlah, + [MO_32] = gen_helper_gvec_fcmlas, + [MO_64] = gen_helper_gvec_fcmlad, + }; + int check; + + if (!dc_isar_feature(aa64_fcma, s)) { + return false; + } + + check = fp_access_check_vector_hsd(s, a->q, a->esz); + if (check <= 0) { + return check == 0; + } + + gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->rot | (s->fpcr_ah << 2), fn[a->esz]); + return true; +} + +/* + * Widening vector x vector/indexed. + * + * These read from the top or bottom half of a 128-bit vector. + * After widening, optionally accumulate with a 128-bit vector. + * Implement these inline, as the number of elements are limited + * and the related SVE and SME operations on larger vectors use + * even/odd elements instead of top/bottom half. + * + * If idx >= 0, operand 2 is indexed, otherwise vector. + * If acc, operand 0 is loaded with rd. + */ + +/* For low half, iterating up. */ +static bool do_3op_widening(DisasContext *s, MemOp memop, int top, + int rd, int rn, int rm, int idx, + NeonGenTwo64OpFn *fn, bool acc) +{ + TCGv_i64 tcg_op0 = tcg_temp_new_i64(); + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + MemOp esz = memop & MO_SIZE; + int half = 8 >> esz; + int top_swap, top_half; + + /* There are no 64x64->128 bit operations. */ + if (esz >= MO_64) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + + if (idx >= 0) { + read_vec_element(s, tcg_op2, rm, idx, memop); + } + + /* + * For top half inputs, iterate forward; backward for bottom half. + * This means the store to the destination will not occur until + * overlapping input inputs are consumed. + * Use top_swap to conditionally invert the forward iteration index. + */ + top_swap = top ? 0 : half - 1; + top_half = top ? half : 0; + + for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { + int elt = elt_fwd ^ top_swap; + + read_vec_element(s, tcg_op1, rn, elt + top_half, memop); + if (idx < 0) { + read_vec_element(s, tcg_op2, rm, elt + top_half, memop); + } + if (acc) { + read_vec_element(s, tcg_op0, rd, elt, memop + 1); + } + fn(tcg_op0, tcg_op1, tcg_op2); + write_vec_element(s, tcg_op0, rd, elt, esz + 1); + } + clear_vec_high(s, 1, rd); + return true; +} + +static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_mul_i64(t, n, m); + tcg_gen_add_i64(d, d, t); +} + +static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_mul_i64(t, n, m); + tcg_gen_sub_i64(d, d, t); +} + +TRANS(SMULL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_mul_i64, false) +TRANS(UMULL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_mul_i64, false) +TRANS(SMLAL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + gen_muladd_i64, true) +TRANS(UMLAL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + gen_muladd_i64, true) +TRANS(SMLSL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + gen_mulsub_i64, true) +TRANS(UMLSL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + gen_mulsub_i64, true) + +TRANS(SMULL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + tcg_gen_mul_i64, false) +TRANS(UMULL_vi, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, a->idx, + tcg_gen_mul_i64, false) +TRANS(SMLAL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + gen_muladd_i64, true) +TRANS(UMLAL_vi, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, a->idx, + gen_muladd_i64, true) +TRANS(SMLSL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + gen_mulsub_i64, true) +TRANS(UMLSL_vi, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, a->idx, + gen_mulsub_i64, true) + +static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t1, n, m); + tcg_gen_sub_i64(t2, m, n); + tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); +} + +static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t1, n, m); + tcg_gen_sub_i64(t2, m, n); + tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); +} + +static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_sabd_i64(t, n, m); + tcg_gen_add_i64(d, d, t); +} + +static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_uabd_i64(t, n, m); + tcg_gen_add_i64(d, d, t); +} + +TRANS(SADDL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_add_i64, false) +TRANS(UADDL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_add_i64, false) +TRANS(SSUBL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_sub_i64, false) +TRANS(USUBL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + tcg_gen_sub_i64, false) +TRANS(SABDL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + gen_sabd_i64, false) +TRANS(UABDL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + gen_uabd_i64, false) +TRANS(SABAL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + gen_saba_i64, true) +TRANS(UABAL_v, do_3op_widening, + a->esz, a->q, a->rd, a->rn, a->rm, -1, + gen_uaba_i64, true) + +static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_mul_i64(d, n, m); + gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); +} + +static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_mul_i64(d, n, m); + gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); +} + +static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, n, m); + gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); + gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); +} + +static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, n, m); + gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); + gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); +} + +static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, n, m); + gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); + tcg_gen_neg_i64(t, t); + gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); +} + +static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, n, m); + gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); + tcg_gen_neg_i64(t, t); + gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); +} + +TRANS(SQDMULL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) +TRANS(SQDMLAL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) +TRANS(SQDMLSL_v, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, + a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) + +TRANS(SQDMULL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) +TRANS(SQDMLAL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) +TRANS(SQDMLSL_vi, do_3op_widening, + a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, + a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) + +static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, + MemOp sign, bool sub) +{ + TCGv_i64 tcg_op0, tcg_op1; + MemOp esz = a->esz; + int half = 8 >> esz; + bool top = a->q; + int top_swap = top ? 0 : half - 1; + int top_half = top ? half : 0; + + /* There are no 64x64->128 bit operations. */ + if (esz >= MO_64) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + tcg_op0 = tcg_temp_new_i64(); + tcg_op1 = tcg_temp_new_i64(); + + for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { + int elt = elt_fwd ^ top_swap; + + read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); + read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); + if (sub) { + tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); + } else { + tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); + } + write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); + } + clear_vec_high(s, 1, a->rd); + return true; +} + +TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) +TRANS(UADDW, do_addsub_wide, a, 0, false) +TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) +TRANS(USUBW, do_addsub_wide, a, 0, true) + +static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, + bool sub, bool round) +{ + TCGv_i64 tcg_op0, tcg_op1; + MemOp esz = a->esz; + int half = 8 >> esz; + bool top = a->q; + int ebits = 8 << esz; + uint64_t rbit = 1ull << (ebits - 1); + int top_swap, top_half; + + /* There are no 128x128->64 bit operations. */ + if (esz >= MO_64) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + tcg_op0 = tcg_temp_new_i64(); + tcg_op1 = tcg_temp_new_i64(); + + /* + * For top half inputs, iterate backward; forward for bottom half. + * This means the store to the destination will not occur until + * overlapping input inputs are consumed. + */ + top_swap = top ? half - 1 : 0; + top_half = top ? half : 0; + + for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { + int elt = elt_fwd ^ top_swap; + + read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); + read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); + if (sub) { + tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); + } else { + tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); + } + if (round) { + tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); + } + tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); + write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); + } + clear_vec_high(s, top, a->rd); + return true; +} + +TRANS(ADDHN, do_addsub_highnarrow, a, false, false) +TRANS(SUBHN, do_addsub_highnarrow, a, true, false) +TRANS(RADDHN, do_addsub_highnarrow, a, false, true) +TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) + +static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) +{ + if (fp_access_check(s)) { + /* The Q field specifies lo/hi half input for these insns. */ + gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); + } + return true; +} + +TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) +TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) /* * Advanced SIMD scalar/vector x indexed element @@ -5566,8 +6526,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) TCGv_i64 t1 = tcg_temp_new_i64(); read_vec_element(s, t1, a->rm, a->idx, MO_64); - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); - write_fp_dreg(s, a->rd, t0); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); + write_fp_dreg_merging(s, a->rd, a->rn, t0); } break; case MO_32: @@ -5576,8 +6536,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) TCGv_i32 t1 = tcg_temp_new_i32(); read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); - write_fp_sreg(s, a->rd, t0); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); + write_fp_sreg_merging(s, a->rd, a->rn, t0); } break; case MO_16: @@ -5589,8 +6549,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) TCGv_i32 t1 = tcg_temp_new_i32(); read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); - write_fp_sreg(s, a->rd, t0); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); + write_fp_hreg_merging(s, a->rd, a->rn, t0); } break; default: @@ -5613,10 +6573,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element(s, t2, a->rm, a->idx, MO_64); if (neg) { - gen_vfp_negd(t1, t1); + gen_vfp_maybe_ah_negd(s, t1, t1); } - gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); - write_fp_dreg(s, a->rd, t0); + gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); + write_fp_dreg_merging(s, a->rd, a->rd, t0); } break; case MO_32: @@ -5627,10 +6587,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); if (neg) { - gen_vfp_negs(t1, t1); + gen_vfp_maybe_ah_negs(s, t1, t1); } - gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); - write_fp_sreg(s, a->rd, t0); + gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); + write_fp_sreg_merging(s, a->rd, a->rd, t0); } break; case MO_16: @@ -5644,11 +6604,11 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); if (neg) { - gen_vfp_negh(t1, t1); + gen_vfp_maybe_ah_negh(s, t1, t1); } gen_helper_advsimd_muladdh(t0, t1, t2, t0, - fpstatus_ptr(FPST_FPCR_F16)); - write_fp_sreg(s, a->rd, t0); + fpstatus_ptr(FPST_A64_F16)); + write_fp_hreg_merging(s, a->rd, a->rd, t0); } break; default: @@ -5681,31 +6641,71 @@ static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) +static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, + const ENVScalar3 *f) +{ + if (a->esz < MO_16 || a->esz > MO_32) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); + read_vec_element_i32(s, t2, a->rd, 0, a->esz); + f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); + write_fp_sreg(s, a->rd, t0); + } + return true; +} + +TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) +TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) + +static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, + NeonGenTwo64OpFn *fn, bool acc) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + if (acc) { + read_vec_element(s, t0, a->rd, 0, a->esz + 1); + } + read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); + read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); + fn(t0, t1, t2); + + /* Clear the whole register first, then store scalar. */ + clear_vec(s, a->rd); + write_vec_element(s, t0, a->rd, 0, a->esz + 1); + } + return true; +} + +TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, + a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) +TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, + a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) +TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, + a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) + static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, gen_helper_gvec_3_ptr * const fns[3]) { MemOp esz = a->esz; + int check = fp_access_check_vector_hsd(s, a->q, esz); - switch (esz) { - case MO_64: - if (!a->q) { - return false; - } - break; - case MO_32: - break; - case MO_16: - if (!dc_isar_feature(aa64_fp16, s)) { - return false; - } - break; - default: - g_assert_not_reached(); - } - if (fp_access_check(s)) { - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, - esz == MO_16, a->idx, fns[esz - 1]); + if (check <= 0) { + return check == 0; } + + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, + esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->idx, fns[esz - 1]); return true; } @@ -5725,34 +6725,27 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) { - static gen_helper_gvec_4_ptr * const fns[3] = { - gen_helper_gvec_fmla_idx_h, - gen_helper_gvec_fmla_idx_s, - gen_helper_gvec_fmla_idx_d, + static gen_helper_gvec_4_ptr * const fns[3][3] = { + { gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_fmla_idx_s, + gen_helper_gvec_fmla_idx_d }, + { gen_helper_gvec_fmls_idx_h, + gen_helper_gvec_fmls_idx_s, + gen_helper_gvec_fmls_idx_d }, + { gen_helper_gvec_ah_fmls_idx_h, + gen_helper_gvec_ah_fmls_idx_s, + gen_helper_gvec_ah_fmls_idx_d }, }; MemOp esz = a->esz; + int check = fp_access_check_vector_hsd(s, a->q, esz); - switch (esz) { - case MO_64: - if (!a->q) { - return false; - } - break; - case MO_32: - break; - case MO_16: - if (!dc_isar_feature(aa64_fp16, s)) { - return false; - } - break; - default: - g_assert_not_reached(); - } - if (fp_access_check(s)) { - gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, - esz == MO_16, (a->idx << 1) | neg, - fns[esz - 1]); + if (check <= 0) { + return check == 0; } + + gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, + esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); return true; } @@ -5838,6 +6831,90 @@ static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { }; TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) +static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { + gen_helper_neon_sqrdmlah_idx_h, + gen_helper_neon_sqrdmlah_idx_s, +}; +TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, + f_vector_idx_sqrdmlah) + +static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { + gen_helper_neon_sqrdmlsh_idx_h, + gen_helper_neon_sqrdmlsh_idx_s, +}; +TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, + f_vector_idx_sqrdmlsh) + +static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_4 *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); + } + return true; +} + +static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_4_ptr *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); + } + return true; +} + +TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) +TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) +TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, + gen_helper_gvec_sudot_idx_b) +TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, + gen_helper_gvec_usdot_idx_b) +TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, + gen_helper_gvec_bfdot_idx) + +static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) +{ + if (!dc_isar_feature(aa64_bf16, s)) { + return false; + } + if (fp_access_check(s)) { + /* Q bit selects BFMLALB vs BFMLALT. */ + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, + s->fpcr_ah ? FPST_AH : FPST_A64, + (a->idx << 1) | a->q, + gen_helper_gvec_bfmlal_idx); + } + return true; +} + +static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) +{ + gen_helper_gvec_4_ptr *fn; + + if (!dc_isar_feature(aa64_fcma, s)) { + return false; + } + switch (a->esz) { + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + fn = gen_helper_gvec_fcmlah_idx; + break; + case MO_32: + fn = gen_helper_gvec_fcmlas_idx; + break; + default: + g_assert_not_reached(); + } + if (fp_access_check(s)) { + gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); + } + return true; +} + /* * Advanced SIMD scalar pairwise */ @@ -5852,7 +6929,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) read_vec_element(s, t0, a->rn, 0, MO_64); read_vec_element(s, t1, a->rn, 1, MO_64); - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); write_fp_dreg(s, a->rd, t0); } break; @@ -5863,7 +6940,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) read_vec_element_i32(s, t0, a->rn, 0, MO_32); read_vec_element_i32(s, t1, a->rn, 1, MO_32); - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); write_fp_sreg(s, a->rd, t0); } break; @@ -5877,7 +6954,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) read_vec_element_i32(s, t0, a->rn, 0, MO_16); read_vec_element_i32(s, t1, a->rn, 1, MO_16); - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); write_fp_sreg(s, a->rd, t0); } break; @@ -5887,9 +6964,16 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) return true; } +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, + const FPScalar *fnormal, + const FPScalar *fah) +{ + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); +} + TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) @@ -5915,22 +6999,10 @@ static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) { TCGv_i64 t_true, t_false; DisasCompare64 c; + int check = fp_access_check_scalar_hsd(s, a->esz); - switch (a->esz) { - case MO_32: - case MO_64: - break; - case MO_16: - if (!dc_isar_feature(aa64_fp16, s)) { - return false; - } - break; - default: - return false; - } - - if (!fp_access_check(s)) { - return true; + if (check <= 0) { + return check == 0; } /* Zero extend sreg & hreg inputs to 64 bits now. */ @@ -5952,6 +7024,54 @@ static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) } /* + * Advanced SIMD Extract + */ + +static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) +{ + if (fp_access_check(s)) { + TCGv_i64 lo = read_fp_dreg(s, a->rn); + if (a->imm != 0) { + TCGv_i64 hi = read_fp_dreg(s, a->rm); + tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); + } + write_fp_dreg(s, a->rd, lo); + } + return true; +} + +static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) +{ + TCGv_i64 lo, hi; + int pos = (a->imm & 7) * 8; + int elt = a->imm >> 3; + + if (!fp_access_check(s)) { + return true; + } + + lo = tcg_temp_new_i64(); + hi = tcg_temp_new_i64(); + + read_vec_element(s, lo, a->rn, elt, MO_64); + elt++; + read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); + elt++; + + if (pos != 0) { + TCGv_i64 hh = tcg_temp_new_i64(); + tcg_gen_extract2_i64(lo, lo, hi, pos); + read_vec_element(s, hh, a->rm, elt & 1, MO_64); + tcg_gen_extract2_i64(hi, hi, hh, pos); + } + + write_vec_element(s, lo, a->rd, 0, MO_64); + write_vec_element(s, hi, a->rd, 1, MO_64); + clear_vec_high(s, true, a->rd); + return true; +} + +/* * Floating-point data-processing (3 source) */ @@ -5972,14 +7092,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i64 ta = read_fp_dreg(s, a->ra); if (neg_a) { - gen_vfp_negd(ta, ta); + gen_vfp_maybe_ah_negd(s, ta, ta); } if (neg_n) { - gen_vfp_negd(tn, tn); + gen_vfp_maybe_ah_negd(s, tn, tn); } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); - write_fp_dreg(s, a->rd, ta); + write_fp_dreg_merging(s, a->rd, a->ra, ta); } break; @@ -5990,14 +7110,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i32 ta = read_fp_sreg(s, a->ra); if (neg_a) { - gen_vfp_negs(ta, ta); + gen_vfp_maybe_ah_negs(s, ta, ta); } if (neg_n) { - gen_vfp_negs(tn, tn); + gen_vfp_maybe_ah_negs(s, tn, tn); } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); - write_fp_sreg(s, a->rd, ta); + write_fp_sreg_merging(s, a->rd, a->ra, ta); } break; @@ -6011,14 +7131,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i32 ta = read_fp_hreg(s, a->ra); if (neg_a) { - gen_vfp_negh(ta, ta); + gen_vfp_maybe_ah_negh(s, ta, ta); } if (neg_n) { - gen_vfp_negh(tn, tn); + gen_vfp_maybe_ah_negh(s, tn, tn); } - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A64_F16); gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); - write_fp_sreg(s, a->rd, ta); + write_fp_hreg_merging(s, a->rd, a->ra, ta); } break; @@ -6033,6543 +7153,2879 @@ TRANS(FNMADD, do_fmadd, a, true, true) TRANS(FMSUB, do_fmadd, a, false, true) TRANS(FNMSUB, do_fmadd, a, true, false) -/* Shift a TCGv src by TCGv shift_amount, put result in dst. - * Note that it is the caller's responsibility to ensure that the - * shift amount is in range (ie 0..31 or 0..63) and provide the ARM - * mandated semantics for out of range shifts. - */ -static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, - enum a64_shift_type shift_type, TCGv_i64 shift_amount) -{ - switch (shift_type) { - case A64_SHIFT_TYPE_LSL: - tcg_gen_shl_i64(dst, src, shift_amount); - break; - case A64_SHIFT_TYPE_LSR: - tcg_gen_shr_i64(dst, src, shift_amount); - break; - case A64_SHIFT_TYPE_ASR: - if (!sf) { - tcg_gen_ext32s_i64(dst, src); - } - tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); - break; - case A64_SHIFT_TYPE_ROR: - if (sf) { - tcg_gen_rotr_i64(dst, src, shift_amount); - } else { - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t0, src); - tcg_gen_extrl_i64_i32(t1, shift_amount); - tcg_gen_rotr_i32(t0, t0, t1); - tcg_gen_extu_i32_i64(dst, t0); - } - break; - default: - assert(FALSE); /* all shift types should be handled */ - break; - } - - if (!sf) { /* zero extend final result */ - tcg_gen_ext32u_i64(dst, dst); - } -} - -/* Shift a TCGv src by immediate, put result in dst. - * The shift amount must be in range (this should always be true as the - * relevant instructions will UNDEF on bad shift immediates). +/* + * Advanced SIMD Across Lanes */ -static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, - enum a64_shift_type shift_type, unsigned int shift_i) -{ - assert(shift_i < (sf ? 64 : 32)); - if (shift_i == 0) { - tcg_gen_mov_i64(dst, src); - } else { - shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); - } -} - -/* Logical (shifted register) - * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 - * +----+-----+-----------+-------+---+------+--------+------+------+ - * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | - * +----+-----+-----------+-------+---+------+--------+------+------+ - */ -static void disas_logic_reg(DisasContext *s, uint32_t insn) +static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, + MemOp src_sign, NeonGenTwo64OpFn *fn) { - TCGv_i64 tcg_rd, tcg_rn, tcg_rm; - unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; - - sf = extract32(insn, 31, 1); - opc = extract32(insn, 29, 2); - shift_type = extract32(insn, 22, 2); - invert = extract32(insn, 21, 1); - rm = extract32(insn, 16, 5); - shift_amount = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - - if (!sf && (shift_amount & (1 << 5))) { - unallocated_encoding(s); - return; - } - - tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_res, tcg_elt; + MemOp src_mop = a->esz | src_sign; + int elements = (a->q ? 16 : 8) >> a->esz; - if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { - /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for - * register-register MOV and MVN, so it is worth special casing. - */ - tcg_rm = cpu_reg(s, rm); - if (invert) { - tcg_gen_not_i64(tcg_rd, tcg_rm); - if (!sf) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); - } - } else { - if (sf) { - tcg_gen_mov_i64(tcg_rd, tcg_rm); - } else { - tcg_gen_ext32u_i64(tcg_rd, tcg_rm); - } - } - return; + /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ + if (elements < 4) { + return false; } - - tcg_rm = read_cpu_reg(s, rm, sf); - - if (shift_amount) { - shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); + if (!fp_access_check(s)) { + return true; } - tcg_rn = cpu_reg(s, rn); - - switch (opc | (invert << 2)) { - case 0: /* AND */ - case 3: /* ANDS */ - tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 1: /* ORR */ - tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 2: /* EOR */ - tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 4: /* BIC */ - case 7: /* BICS */ - tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 5: /* ORN */ - tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 6: /* EON */ - tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); - break; - default: - assert(FALSE); - break; - } + tcg_res = tcg_temp_new_i64(); + tcg_elt = tcg_temp_new_i64(); - if (!sf) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + read_vec_element(s, tcg_res, a->rn, 0, src_mop); + for (int i = 1; i < elements; i++) { + read_vec_element(s, tcg_elt, a->rn, i, src_mop); + fn(tcg_res, tcg_res, tcg_elt); } - if (opc == 3) { - gen_logic_CC(sf, tcg_rd); - } + tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); + write_fp_dreg(s, a->rd, tcg_res); + return true; } +TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) +TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) +TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) +TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) +TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) +TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) +TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) + /* - * Add/subtract (extended register) - * - * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| - * +--+--+--+-----------+-----+--+-------+------+------+----+----+ - * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | - * +--+--+--+-----------+-----+--+-------+------+------+----+----+ + * do_fp_reduction helper * - * sf: 0 -> 32bit, 1 -> 64bit - * op: 0 -> add , 1 -> sub - * S: 1 -> set flags - * opt: 00 - * option: extension type (see DecodeRegExtend) - * imm3: optional shift to Rm + * This mirrors the Reduce() pseudocode in the ARM ARM. It is + * important for correct NaN propagation that we do these + * operations in exactly the order specified by the pseudocode. * - * Rd = Rn + LSL(extend(Rm), amount) + * This is a recursive function. */ -static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm3 = extract32(insn, 10, 3); - int option = extract32(insn, 13, 3); - int rm = extract32(insn, 16, 5); - int opt = extract32(insn, 22, 2); - bool setflags = extract32(insn, 29, 1); - bool sub_op = extract32(insn, 30, 1); - bool sf = extract32(insn, 31, 1); - - TCGv_i64 tcg_rm, tcg_rn; /* temps */ - TCGv_i64 tcg_rd; - TCGv_i64 tcg_result; - - if (imm3 > 4 || opt != 0) { - unallocated_encoding(s); - return; - } - - /* non-flag setting ops may use SP */ - if (!setflags) { - tcg_rd = cpu_reg_sp(s, rd); +static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, + int ebase, int ecount, TCGv_ptr fpst, + NeonGenTwoSingleOpFn *fn) +{ + if (ecount == 1) { + TCGv_i32 tcg_elem = tcg_temp_new_i32(); + read_vec_element_i32(s, tcg_elem, rn, ebase, esz); + return tcg_elem; } else { - tcg_rd = cpu_reg(s, rd); - } - tcg_rn = read_cpu_reg_sp(s, rn, sf); - - tcg_rm = read_cpu_reg(s, rm, sf); - ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); + int half = ecount >> 1; + TCGv_i32 tcg_hi, tcg_lo, tcg_res; - tcg_result = tcg_temp_new_i64(); + tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); + tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); + tcg_res = tcg_temp_new_i32(); - if (!setflags) { - if (sub_op) { - tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); - } else { - tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); - } - } else { - if (sub_op) { - gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); - } else { - gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); - } + fn(tcg_res, tcg_lo, tcg_hi, fpst); + return tcg_res; } +} - if (sf) { - tcg_gen_mov_i64(tcg_rd, tcg_result); - } else { - tcg_gen_ext32u_i64(tcg_rd, tcg_result); +static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, + NeonGenTwoSingleOpFn *fnormal, + NeonGenTwoSingleOpFn *fah) +{ + if (fp_access_check(s)) { + MemOp esz = a->esz; + int elts = (a->q ? 16 : 8) >> esz; + TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, + s->fpcr_ah ? fah : fnormal); + write_fp_sreg(s, a->rd, res); } + return true; } -/* - * Add/subtract (shifted register) - * - * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 - * +--+--+--+-----------+-----+--+-------+---------+------+------+ - * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | - * +--+--+--+-----------+-----+--+-------+---------+------+------+ - * - * sf: 0 -> 32bit, 1 -> 64bit - * op: 0 -> add , 1 -> sub - * S: 1 -> set flags - * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED - * imm6: Shift amount to apply to Rm before the add/sub - */ -static void disas_add_sub_reg(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm6 = extract32(insn, 10, 6); - int rm = extract32(insn, 16, 5); - int shift_type = extract32(insn, 22, 2); - bool setflags = extract32(insn, 29, 1); - bool sub_op = extract32(insn, 30, 1); - bool sf = extract32(insn, 31, 1); - - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_rn, tcg_rm; - TCGv_i64 tcg_result; - - if ((shift_type == 3) || (!sf && (imm6 > 31))) { - unallocated_encoding(s); - return; - } +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) - tcg_rn = read_cpu_reg(s, rn, sf); - tcg_rm = read_cpu_reg(s, rm, sf); +TRANS(FMAXNMV_s, do_fp_reduction, a, + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) +TRANS(FMINNMV_s, do_fp_reduction, a, + gen_helper_vfp_minnums, gen_helper_vfp_minnums) +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) - shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); +/* + * Floating-point Immediate + */ - tcg_result = tcg_temp_new_i64(); +static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) +{ + int check = fp_access_check_scalar_hsd(s, a->esz); + uint64_t imm; - if (!setflags) { - if (sub_op) { - tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); - } else { - tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); - } - } else { - if (sub_op) { - gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); - } else { - gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); - } + if (check <= 0) { + return check == 0; } - if (sf) { - tcg_gen_mov_i64(tcg_rd, tcg_result); - } else { - tcg_gen_ext32u_i64(tcg_rd, tcg_result); - } + imm = vfp_expand_imm(a->esz, a->imm); + write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); + return true; } -/* Data-processing (3 source) - * - * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 - * +--+------+-----------+------+------+----+------+------+------+ - * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | - * +--+------+-----------+------+------+----+------+------+------+ +/* + * Floating point compare, conditional compare */ -static void disas_data_proc_3src(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int ra = extract32(insn, 10, 5); - int rm = extract32(insn, 16, 5); - int op_id = (extract32(insn, 29, 3) << 4) | - (extract32(insn, 21, 3) << 1) | - extract32(insn, 15, 1); - bool sf = extract32(insn, 31, 1); - bool is_sub = extract32(op_id, 0, 1); - bool is_high = extract32(op_id, 2, 1); - bool is_signed = false; - TCGv_i64 tcg_op1; - TCGv_i64 tcg_op2; - TCGv_i64 tcg_tmp; - - /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ - switch (op_id) { - case 0x42: /* SMADDL */ - case 0x43: /* SMSUBL */ - case 0x44: /* SMULH */ - is_signed = true; - break; - case 0x0: /* MADD (32bit) */ - case 0x1: /* MSUB (32bit) */ - case 0x40: /* MADD (64bit) */ - case 0x41: /* MSUB (64bit) */ - case 0x4a: /* UMADDL */ - case 0x4b: /* UMSUBL */ - case 0x4c: /* UMULH */ - break; - default: - unallocated_encoding(s); - return; - } - if (is_high) { - TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_rn = cpu_reg(s, rn); - TCGv_i64 tcg_rm = cpu_reg(s, rm); +static void handle_fp_compare(DisasContext *s, int size, + unsigned int rn, unsigned int rm, + bool cmp_with_zero, bool signal_all_nans) +{ + TCGv_i64 tcg_flags = tcg_temp_new_i64(); + TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); - if (is_signed) { - tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); + if (size == MO_64) { + TCGv_i64 tcg_vn, tcg_vm; + + tcg_vn = read_fp_dreg(s, rn); + if (cmp_with_zero) { + tcg_vm = tcg_constant_i64(0); } else { - tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); + tcg_vm = read_fp_dreg(s, rm); } - return; - } - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_tmp = tcg_temp_new_i64(); - - if (op_id < 0x42) { - tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); - tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); - } else { - if (is_signed) { - tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); - tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); + if (signal_all_nans) { + gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); } else { - tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); - tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); + gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); } - } - - if (ra == 31 && !is_sub) { - /* Special-case MADD with rA == XZR; it is the standard MUL alias */ - tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); } else { - tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); - if (is_sub) { - tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); + TCGv_i32 tcg_vn = tcg_temp_new_i32(); + TCGv_i32 tcg_vm = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_vn, rn, 0, size); + if (cmp_with_zero) { + tcg_gen_movi_i32(tcg_vm, 0); } else { - tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); + read_vec_element_i32(s, tcg_vm, rm, 0, size); } - } - if (!sf) { - tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); + switch (size) { + case MO_32: + if (signal_all_nans) { + gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + break; + case MO_16: + if (signal_all_nans) { + gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + break; + default: + g_assert_not_reached(); + } } -} -/* Add/subtract (with carry) - * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 - * +--+--+--+------------------------+------+-------------+------+-----+ - * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | - * +--+--+--+------------------------+------+-------------+------+-----+ - */ + gen_set_nzcv(tcg_flags); +} -static void disas_adc_sbc(DisasContext *s, uint32_t insn) +/* FCMP, FCMPE */ +static bool trans_FCMP(DisasContext *s, arg_FCMP *a) { - unsigned int sf, op, setflags, rm, rn, rd; - TCGv_i64 tcg_y, tcg_rn, tcg_rd; + int check = fp_access_check_scalar_hsd(s, a->esz); - sf = extract32(insn, 31, 1); - op = extract32(insn, 30, 1); - setflags = extract32(insn, 29, 1); - rm = extract32(insn, 16, 5); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - - tcg_rd = cpu_reg(s, rd); - tcg_rn = cpu_reg(s, rn); - - if (op) { - tcg_y = tcg_temp_new_i64(); - tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); - } else { - tcg_y = cpu_reg(s, rm); + if (check <= 0) { + return check == 0; } - if (setflags) { - gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); - } else { - gen_adc(sf, tcg_rd, tcg_rn, tcg_y); - } + handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); + return true; } -/* - * Rotate right into flags - * 31 30 29 21 15 10 5 4 0 - * +--+--+--+-----------------+--------+-----------+------+--+------+ - * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | - * +--+--+--+-----------------+--------+-----------+------+--+------+ - */ -static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) +/* FCCMP, FCCMPE */ +static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) { - int mask = extract32(insn, 0, 4); - int o2 = extract32(insn, 4, 1); - int rn = extract32(insn, 5, 5); - int imm6 = extract32(insn, 15, 6); - int sf_op_s = extract32(insn, 29, 3); - TCGv_i64 tcg_rn; - TCGv_i32 nzcv; + TCGLabel *label_continue = NULL; + int check = fp_access_check_scalar_hsd(s, a->esz); - if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { - unallocated_encoding(s); - return; + if (check <= 0) { + return check == 0; } - tcg_rn = read_cpu_reg(s, rn, 1); - tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); + if (a->cond < 0x0e) { /* not always */ + TCGLabel *label_match = gen_new_label(); + label_continue = gen_new_label(); + arm_gen_test_cc(a->cond, label_match); + /* nomatch: */ + gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); + tcg_gen_br(label_continue); + gen_set_label(label_match); + } - nzcv = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(nzcv, tcg_rn); + handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); - if (mask & 8) { /* N */ - tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); - } - if (mask & 4) { /* Z */ - tcg_gen_not_i32(cpu_ZF, nzcv); - tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); - } - if (mask & 2) { /* C */ - tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); - } - if (mask & 1) { /* V */ - tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); + if (label_continue) { + gen_set_label(label_continue); } + return true; } /* - * Evaluate into flags - * 31 30 29 21 15 14 10 5 4 0 - * +--+--+--+-----------------+---------+----+---------+------+--+------+ - * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | - * +--+--+--+-----------------+---------+----+---------+------+--+------+ + * Advanced SIMD Modified Immediate */ -static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) -{ - int o3_mask = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int o2 = extract32(insn, 15, 6); - int sz = extract32(insn, 14, 1); - int sf_op_s = extract32(insn, 29, 3); - TCGv_i32 tmp; - int shift; - if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || - !dc_isar_feature(aa64_condm_4, s)) { - unallocated_encoding(s); - return; +static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) +{ + if (!dc_isar_feature(aa64_fp16, s)) { + return false; } - shift = sz ? 16 : 24; /* SETF16 or SETF8 */ + if (fp_access_check(s)) { + tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), + a->q ? 16 : 8, vec_full_reg_size(s), + vfp_expand_imm(MO_16, a->abcdefgh)); + } + return true; +} - tmp = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); - tcg_gen_shli_i32(cpu_NF, tmp, shift); - tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); - tcg_gen_mov_i32(cpu_ZF, cpu_NF); - tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); +static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); } -/* Conditional compare (immediate / register) - * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 - * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ - * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | - * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ - * [1] y [0] [0] - */ -static void disas_cc(DisasContext *s, uint32_t insn) +static bool trans_Vimm(DisasContext *s, arg_Vimm *a) { - unsigned int sf, op, y, cond, rn, nzcv, is_imm; - TCGv_i32 tcg_t0, tcg_t1, tcg_t2; - TCGv_i64 tcg_tmp, tcg_y, tcg_rn; - DisasCompare c; + GVecGen2iFn *fn; - if (!extract32(insn, 29, 1)) { - unallocated_encoding(s); - return; + /* Handle decode of cmode/op here between ORR/BIC/MOVI */ + if ((a->cmode & 1) && a->cmode < 12) { + /* For op=1, the imm will be inverted, so BIC becomes AND. */ + fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1 && a->q == 0) { + return false; + } + fn = gen_movi; } - if (insn & (1 << 10 | 1 << 4)) { - unallocated_encoding(s); - return; + + if (fp_access_check(s)) { + uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); + gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); } - sf = extract32(insn, 31, 1); - op = extract32(insn, 30, 1); - is_imm = extract32(insn, 11, 1); - y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ - cond = extract32(insn, 12, 4); - rn = extract32(insn, 5, 5); - nzcv = extract32(insn, 0, 4); + return true; +} - /* Set T0 = !COND. */ - tcg_t0 = tcg_temp_new_i32(); - arm_test_cc(&c, cond); - tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); +/* + * Advanced SIMD Shift by Immediate + */ - /* Load the arguments for the new comparison. */ - if (is_imm) { - tcg_y = tcg_temp_new_i64(); - tcg_gen_movi_i64(tcg_y, y); - } else { - tcg_y = cpu_reg(s, y); +static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) +{ + if (fp_access_check(s)) { + gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); } - tcg_rn = cpu_reg(s, rn); + return true; +} - /* Set the flags for the new comparison. */ - tcg_tmp = tcg_temp_new_i64(); - if (op) { - gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); - } else { - gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); +TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) +TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) +TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) +TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) +TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) +TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) +TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) +TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) +TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) +TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) +TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); +TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) +TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) +TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) + +static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) +{ + TCGv_i64 tcg_rn, tcg_rd; + int esz = a->esz; + int esize; + + if (!fp_access_check(s)) { + return true; } - /* If COND was false, force the flags to #nzcv. Compute two masks - * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). - * For tcg hosts that support ANDC, we can make do with just T1. - * In either case, allow the tcg optimizer to delete any unused mask. + /* + * For the LL variants the store is larger than the load, + * so if rd == rn we would overwrite parts of our input. + * So load everything right now and use shifts in the main loop. */ - tcg_t1 = tcg_temp_new_i32(); - tcg_t2 = tcg_temp_new_i32(); - tcg_gen_neg_i32(tcg_t1, tcg_t0); - tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); + tcg_rd = tcg_temp_new_i64(); + tcg_rn = tcg_temp_new_i64(); + read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); - if (nzcv & 8) { /* N */ - tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); - } else { - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); - } else { - tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); - } - } - if (nzcv & 4) { /* Z */ - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); - } else { - tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); - } - } else { - tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); - } - if (nzcv & 2) { /* C */ - tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); - } else { - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); - } else { - tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); - } - } - if (nzcv & 1) { /* V */ - tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); - } else { - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); + esize = 8 << esz; + for (int i = 0, elements = 8 >> esz; i < elements; i++) { + if (is_u) { + tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); } else { - tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); + tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); } + tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); + write_vec_element(s, tcg_rd, a->rd, i, esz + 1); } + clear_vec_high(s, true, a->rd); + return true; } -/* Conditional select - * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 - * +----+----+---+-----------------+------+------+-----+------+------+ - * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | - * +----+----+---+-----------------+------+------+-----+------+------+ - */ -static void disas_cond_select(DisasContext *s, uint32_t insn) +TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) +TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) + +static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; - TCGv_i64 tcg_rd, zero; - DisasCompare64 c; + assert(shift >= 0 && shift <= 64); + tcg_gen_sari_i64(dst, src, MIN(shift, 63)); +} - if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { - /* S == 1 or op2<1> == 1 */ - unallocated_encoding(s); - return; +static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) +{ + assert(shift >= 0 && shift <= 64); + if (shift == 64) { + tcg_gen_movi_i64(dst, 0); + } else { + tcg_gen_shri_i64(dst, src, shift); } - sf = extract32(insn, 31, 1); - else_inv = extract32(insn, 30, 1); - rm = extract32(insn, 16, 5); - cond = extract32(insn, 12, 4); - else_inc = extract32(insn, 10, 1); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); +} - tcg_rd = cpu_reg(s, rd); +static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) +{ + gen_sshr_d(src, src, shift); + tcg_gen_add_i64(dst, dst, src); +} - a64_test_cc(&c, cond); - zero = tcg_constant_i64(0); +static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) +{ + gen_ushr_d(src, src, shift); + tcg_gen_add_i64(dst, dst, src); +} - if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { - /* CSET & CSETM. */ - if (else_inv) { - tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), - tcg_rd, c.value, zero); - } else { - tcg_gen_setcond_i64(tcg_invert_cond(c.cond), - tcg_rd, c.value, zero); - } +static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) +{ + assert(shift >= 0 && shift <= 32); + if (shift) { + TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); + tcg_gen_add_i64(dst, src, rnd); + tcg_gen_sari_i64(dst, dst, shift); } else { - TCGv_i64 t_true = cpu_reg(s, rn); - TCGv_i64 t_false = read_cpu_reg(s, rm, 1); - if (else_inv && else_inc) { - tcg_gen_neg_i64(t_false, t_false); - } else if (else_inv) { - tcg_gen_not_i64(t_false, t_false); - } else if (else_inc) { - tcg_gen_addi_i64(t_false, t_false, 1); - } - tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); - } - - if (!sf) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + tcg_gen_mov_i64(dst, src); } } -static void handle_clz(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - TCGv_i64 tcg_rd, tcg_rn; - tcg_rd = cpu_reg(s, rd); - tcg_rn = cpu_reg(s, rn); - - if (sf) { - tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); + assert(shift >= 0 && shift <= 32); + if (shift) { + TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); + tcg_gen_add_i64(dst, src, rnd); + tcg_gen_shri_i64(dst, dst, shift); } else { - TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); - tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); - tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + tcg_gen_mov_i64(dst, src); } } -static void handle_cls(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - TCGv_i64 tcg_rd, tcg_rn; - tcg_rd = cpu_reg(s, rd); - tcg_rn = cpu_reg(s, rn); - - if (sf) { - tcg_gen_clrsb_i64(tcg_rd, tcg_rn); + assert(shift >= 0 && shift <= 64); + if (shift == 0) { + tcg_gen_mov_i64(dst, src); + } else if (shift == 64) { + /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ + tcg_gen_movi_i64(dst, 0); } else { - TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); - tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); - tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + TCGv_i64 rnd = tcg_temp_new_i64(); + tcg_gen_extract_i64(rnd, src, shift - 1, 1); + tcg_gen_sari_i64(dst, src, shift); + tcg_gen_add_i64(dst, dst, rnd); } } -static void handle_rbit(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - TCGv_i64 tcg_rd, tcg_rn; - tcg_rd = cpu_reg(s, rd); - tcg_rn = cpu_reg(s, rn); - - if (sf) { - gen_helper_rbit64(tcg_rd, tcg_rn); + assert(shift >= 0 && shift <= 64); + if (shift == 0) { + tcg_gen_mov_i64(dst, src); + } else if (shift == 64) { + /* Rounding will propagate bit 63 into bit 64. */ + tcg_gen_shri_i64(dst, src, 63); } else { - TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); - gen_helper_rbit(tcg_tmp32, tcg_tmp32); - tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + TCGv_i64 rnd = tcg_temp_new_i64(); + tcg_gen_extract_i64(rnd, src, shift - 1, 1); + tcg_gen_shri_i64(dst, src, shift); + tcg_gen_add_i64(dst, dst, rnd); } } -/* REV with sf==1, opcode==3 ("REV64") */ -static void handle_rev64(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - if (!sf) { - unallocated_encoding(s); - return; - } - tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); + gen_srshr_d(src, src, shift); + tcg_gen_add_i64(dst, dst, src); } -/* REV with sf==0, opcode==2 - * REV32 (sf==1, opcode==2) - */ -static void handle_rev32(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_rn = cpu_reg(s, rn); + gen_urshr_d(src, src, shift); + tcg_gen_add_i64(dst, dst, src); +} - if (sf) { - tcg_gen_bswap64_i64(tcg_rd, tcg_rn); - tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); - } else { - tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); +static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) +{ + /* If shift is 64, dst is unchanged. */ + if (shift != 64) { + tcg_gen_shri_i64(src, src, shift); + tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); } } -/* REV16 (opcode==1) */ -static void handle_rev16(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) { - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); - - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); - tcg_gen_and_i64(tcg_rd, tcg_rn, mask); - tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); - tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); - tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); + tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); } -/* Data-processing (1 source) - * 31 30 29 28 21 20 16 15 10 9 5 4 0 - * +----+---+---+-----------------+---------+--------+------+------+ - * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | - * +----+---+---+-----------------+---------+--------+------+------+ - */ -static void disas_data_proc_1src(DisasContext *s, uint32_t insn) +static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, + WideShiftImmFn * const fns[3], MemOp sign) { - unsigned int sf, opcode, opcode2, rn, rd; - TCGv_i64 tcg_rd; + TCGv_i64 tcg_rn, tcg_rd; + int esz = a->esz; + int esize; + WideShiftImmFn *fn; - if (extract32(insn, 29, 1)) { - unallocated_encoding(s); - return; - } + tcg_debug_assert(esz >= MO_8 && esz <= MO_32); - sf = extract32(insn, 31, 1); - opcode = extract32(insn, 10, 6); - opcode2 = extract32(insn, 16, 5); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); + if (!fp_access_check(s)) { + return true; + } -#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_gen_movi_i64(tcg_rd, 0); - switch (MAP(sf, opcode2, opcode)) { - case MAP(0, 0x00, 0x00): /* RBIT */ - case MAP(1, 0x00, 0x00): - handle_rbit(s, sf, rn, rd); - break; - case MAP(0, 0x00, 0x01): /* REV16 */ - case MAP(1, 0x00, 0x01): - handle_rev16(s, sf, rn, rd); - break; - case MAP(0, 0x00, 0x02): /* REV/REV32 */ - case MAP(1, 0x00, 0x02): - handle_rev32(s, sf, rn, rd); - break; - case MAP(1, 0x00, 0x03): /* REV64 */ - handle_rev64(s, sf, rn, rd); - break; - case MAP(0, 0x00, 0x04): /* CLZ */ - case MAP(1, 0x00, 0x04): - handle_clz(s, sf, rn, rd); - break; - case MAP(0, 0x00, 0x05): /* CLS */ - case MAP(1, 0x00, 0x05): - handle_cls(s, sf, rn, rd); - break; - case MAP(1, 0x01, 0x00): /* PACIA */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x01): /* PACIB */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x02): /* PACDA */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x03): /* PACDB */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x04): /* AUTIA */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x05): /* AUTIB */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x06): /* AUTDA */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x07): /* AUTDB */ - if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); - } else if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - break; - case MAP(1, 0x01, 0x08): /* PACIZA */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x09): /* PACIZB */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0a): /* PACDZA */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0b): /* PACDZB */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0c): /* AUTIZA */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0d): /* AUTIZB */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0e): /* AUTDZA */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x0f): /* AUTDZB */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); - } - break; - case MAP(1, 0x01, 0x10): /* XPACI */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); - } - break; - case MAP(1, 0x01, 0x11): /* XPACD */ - if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { - goto do_unallocated; - } else if (s->pauth_active) { - tcg_rd = cpu_reg(s, rd); - gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); - } - break; - default: - do_unallocated: - unallocated_encoding(s); - break; + fn = fns[esz]; + esize = 8 << esz; + for (int i = 0, elements = 8 >> esz; i < elements; i++) { + read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); + fn(tcg_rn, tcg_rn, a->imm); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); } -#undef MAP + write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); + clear_vec_high(s, a->q, a->rd); + return true; } -static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, - unsigned int rm, unsigned int rn, unsigned int rd) +static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) { - TCGv_i64 tcg_n, tcg_m, tcg_rd; - tcg_rd = cpu_reg(s, rd); - - if (!sf && is_signed) { - tcg_n = tcg_temp_new_i64(); - tcg_m = tcg_temp_new_i64(); - tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); - tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); - } else { - tcg_n = read_cpu_reg(s, rn, sf); - tcg_m = read_cpu_reg(s, rm, sf); - } - - if (is_signed) { - gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); - } else { - gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); - } + tcg_gen_sari_i64(d, s, i); + tcg_gen_ext16u_i64(d, d); + gen_helper_neon_narrow_sat_s8(d, tcg_env, d); +} - if (!sf) { /* zero extend final result */ - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); - } +static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + tcg_gen_sari_i64(d, s, i); + tcg_gen_ext32u_i64(d, d); + gen_helper_neon_narrow_sat_s16(d, tcg_env, d); } -/* LSLV, LSRV, ASRV, RORV */ -static void handle_shift_reg(DisasContext *s, - enum a64_shift_type shift_type, unsigned int sf, - unsigned int rm, unsigned int rn, unsigned int rd) +static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) { - TCGv_i64 tcg_shift = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); + gen_sshr_d(d, s, i); + gen_helper_neon_narrow_sat_s32(d, tcg_env, d); +} - tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); - shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); +static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + tcg_gen_shri_i64(d, s, i); + gen_helper_neon_narrow_sat_u8(d, tcg_env, d); } -/* CRC32[BHWX], CRC32C[BHWX] */ -static void handle_crc32(DisasContext *s, - unsigned int sf, unsigned int sz, bool crc32c, - unsigned int rm, unsigned int rn, unsigned int rd) +static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) { - TCGv_i64 tcg_acc, tcg_val; - TCGv_i32 tcg_bytes; + tcg_gen_shri_i64(d, s, i); + gen_helper_neon_narrow_sat_u16(d, tcg_env, d); +} - if (!dc_isar_feature(aa64_crc32, s) - || (sf == 1 && sz != 3) - || (sf == 0 && sz == 3)) { - unallocated_encoding(s); - return; - } +static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_ushr_d(d, s, i); + gen_helper_neon_narrow_sat_u32(d, tcg_env, d); +} - if (sz == 3) { - tcg_val = cpu_reg(s, rm); - } else { - uint64_t mask; - switch (sz) { - case 0: - mask = 0xFF; - break; - case 1: - mask = 0xFFFF; - break; - case 2: - mask = 0xFFFFFFFF; - break; - default: - g_assert_not_reached(); - } - tcg_val = tcg_temp_new_i64(); - tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); - } +static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + tcg_gen_sari_i64(d, s, i); + tcg_gen_ext16u_i64(d, d); + gen_helper_neon_unarrow_sat8(d, tcg_env, d); +} - tcg_acc = cpu_reg(s, rn); - tcg_bytes = tcg_constant_i32(1 << sz); +static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + tcg_gen_sari_i64(d, s, i); + tcg_gen_ext32u_i64(d, d); + gen_helper_neon_unarrow_sat16(d, tcg_env, d); +} - if (crc32c) { - gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); - } else { - gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); - } +static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_sshr_d(d, s, i); + gen_helper_neon_unarrow_sat32(d, tcg_env, d); } -/* Data-processing (2 source) - * 31 30 29 28 21 20 16 15 10 9 5 4 0 - * +----+---+---+-----------------+------+--------+------+------+ - * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | - * +----+---+---+-----------------+------+--------+------+------+ - */ -static void disas_data_proc_2src(DisasContext *s, uint32_t insn) +static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) { - unsigned int sf, rm, opcode, rn, rd, setflag; - sf = extract32(insn, 31, 1); - setflag = extract32(insn, 29, 1); - rm = extract32(insn, 16, 5); - opcode = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); + gen_srshr_bhs(d, s, i); + tcg_gen_ext16u_i64(d, d); + gen_helper_neon_narrow_sat_s8(d, tcg_env, d); +} - if (setflag && opcode != 0) { - unallocated_encoding(s); - return; - } +static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_srshr_bhs(d, s, i); + tcg_gen_ext32u_i64(d, d); + gen_helper_neon_narrow_sat_s16(d, tcg_env, d); +} - switch (opcode) { - case 0: /* SUBP(S) */ - if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { - goto do_unallocated; - } else { - TCGv_i64 tcg_n, tcg_m, tcg_d; +static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_srshr_d(d, s, i); + gen_helper_neon_narrow_sat_s32(d, tcg_env, d); +} - tcg_n = read_cpu_reg_sp(s, rn, true); - tcg_m = read_cpu_reg_sp(s, rm, true); - tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); - tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); - tcg_d = cpu_reg(s, rd); +static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_urshr_bhs(d, s, i); + gen_helper_neon_narrow_sat_u8(d, tcg_env, d); +} - if (setflag) { - gen_sub_CC(true, tcg_d, tcg_n, tcg_m); - } else { - tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); - } - } - break; - case 2: /* UDIV */ - handle_div(s, false, sf, rm, rn, rd); - break; - case 3: /* SDIV */ - handle_div(s, true, sf, rm, rn, rd); - break; - case 4: /* IRG */ - if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { - goto do_unallocated; - } - if (s->ata[0]) { - gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, - cpu_reg_sp(s, rn), cpu_reg(s, rm)); - } else { - gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), - cpu_reg_sp(s, rn)); - } - break; - case 5: /* GMI */ - if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { - goto do_unallocated; - } else { - TCGv_i64 t = tcg_temp_new_i64(); +static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_urshr_bhs(d, s, i); + gen_helper_neon_narrow_sat_u16(d, tcg_env, d); +} - tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); - tcg_gen_shl_i64(t, tcg_constant_i64(1), t); - tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); - } - break; - case 8: /* LSLV */ - handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); - break; - case 9: /* LSRV */ - handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); - break; - case 10: /* ASRV */ - handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); - break; - case 11: /* RORV */ - handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); - break; - case 12: /* PACGA */ - if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - gen_helper_pacga(cpu_reg(s, rd), tcg_env, - cpu_reg(s, rn), cpu_reg_sp(s, rm)); - break; - case 16: - case 17: - case 18: - case 19: - case 20: - case 21: - case 22: - case 23: /* CRC32 */ - { - int sz = extract32(opcode, 0, 2); - bool crc32c = extract32(opcode, 2, 1); - handle_crc32(s, sf, sz, crc32c, rm, rn, rd); - break; - } - default: - do_unallocated: - unallocated_encoding(s); - break; - } +static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_urshr_d(d, s, i); + gen_helper_neon_narrow_sat_u32(d, tcg_env, d); } -/* - * Data processing - register - * 31 30 29 28 25 21 20 16 10 0 - * +--+---+--+---+-------+-----+-------+-------+---------+ - * | |op0| |op1| 1 0 1 | op2 | | op3 | | - * +--+---+--+---+-------+-----+-------+-------+---------+ - */ -static void disas_data_proc_reg(DisasContext *s, uint32_t insn) -{ - int op0 = extract32(insn, 30, 1); - int op1 = extract32(insn, 28, 1); - int op2 = extract32(insn, 21, 4); - int op3 = extract32(insn, 10, 6); - - if (!op1) { - if (op2 & 8) { - if (op2 & 1) { - /* Add/sub (extended register) */ - disas_add_sub_ext_reg(s, insn); - } else { - /* Add/sub (shifted register) */ - disas_add_sub_reg(s, insn); - } - } else { - /* Logical (shifted register) */ - disas_logic_reg(s, insn); - } - return; - } +static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_srshr_bhs(d, s, i); + tcg_gen_ext16u_i64(d, d); + gen_helper_neon_unarrow_sat8(d, tcg_env, d); +} - switch (op2) { - case 0x0: - switch (op3) { - case 0x00: /* Add/subtract (with carry) */ - disas_adc_sbc(s, insn); - break; +static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_srshr_bhs(d, s, i); + tcg_gen_ext32u_i64(d, d); + gen_helper_neon_unarrow_sat16(d, tcg_env, d); +} - case 0x01: /* Rotate right into flags */ - case 0x21: - disas_rotate_right_into_flags(s, insn); - break; +static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_srshr_d(d, s, i); + gen_helper_neon_unarrow_sat32(d, tcg_env, d); +} - case 0x02: /* Evaluate into flags */ - case 0x12: - case 0x22: - case 0x32: - disas_evaluate_into_flags(s, insn); - break; +static WideShiftImmFn * const shrn_fns[] = { + tcg_gen_shri_i64, + tcg_gen_shri_i64, + gen_ushr_d, +}; +TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) - default: - goto do_unallocated; - } - break; +static WideShiftImmFn * const rshrn_fns[] = { + gen_urshr_bhs, + gen_urshr_bhs, + gen_urshr_d, +}; +TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) - case 0x2: /* Conditional compare */ - disas_cc(s, insn); /* both imm and reg forms */ - break; +static WideShiftImmFn * const sqshrn_fns[] = { + gen_sqshrn_b, + gen_sqshrn_h, + gen_sqshrn_s, +}; +TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) - case 0x4: /* Conditional select */ - disas_cond_select(s, insn); - break; +static WideShiftImmFn * const uqshrn_fns[] = { + gen_uqshrn_b, + gen_uqshrn_h, + gen_uqshrn_s, +}; +TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) - case 0x6: /* Data-processing */ - if (op0) { /* (1 source) */ - disas_data_proc_1src(s, insn); - } else { /* (2 source) */ - disas_data_proc_2src(s, insn); - } - break; - case 0x8 ... 0xf: /* (3 source) */ - disas_data_proc_3src(s, insn); - break; +static WideShiftImmFn * const sqshrun_fns[] = { + gen_sqshrun_b, + gen_sqshrun_h, + gen_sqshrun_s, +}; +TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) - default: - do_unallocated: - unallocated_encoding(s); - break; - } -} +static WideShiftImmFn * const sqrshrn_fns[] = { + gen_sqrshrn_b, + gen_sqrshrn_h, + gen_sqrshrn_s, +}; +TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) -static void handle_fp_compare(DisasContext *s, int size, - unsigned int rn, unsigned int rm, - bool cmp_with_zero, bool signal_all_nans) -{ - TCGv_i64 tcg_flags = tcg_temp_new_i64(); - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); +static WideShiftImmFn * const uqrshrn_fns[] = { + gen_uqrshrn_b, + gen_uqrshrn_h, + gen_uqrshrn_s, +}; +TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) - if (size == MO_64) { - TCGv_i64 tcg_vn, tcg_vm; +static WideShiftImmFn * const sqrshrun_fns[] = { + gen_sqrshrun_b, + gen_sqrshrun_h, + gen_sqrshrun_s, +}; +TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) - tcg_vn = read_fp_dreg(s, rn); - if (cmp_with_zero) { - tcg_vm = tcg_constant_i64(0); - } else { - tcg_vm = read_fp_dreg(s, rm); - } - if (signal_all_nans) { - gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } else { - gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } - } else { - TCGv_i32 tcg_vn = tcg_temp_new_i32(); - TCGv_i32 tcg_vm = tcg_temp_new_i32(); +/* + * Advanced SIMD Scalar Shift by Immediate + */ - read_vec_element_i32(s, tcg_vn, rn, 0, size); - if (cmp_with_zero) { - tcg_gen_movi_i32(tcg_vm, 0); - } else { - read_vec_element_i32(s, tcg_vm, rm, 0, size); - } +static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, + WideShiftImmFn *fn, bool accumulate, + MemOp sign) +{ + if (fp_access_check(s)) { + TCGv_i64 rd = tcg_temp_new_i64(); + TCGv_i64 rn = tcg_temp_new_i64(); - switch (size) { - case MO_32: - if (signal_all_nans) { - gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } else { - gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } - break; - case MO_16: - if (signal_all_nans) { - gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } else { - gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); - } - break; - default: - g_assert_not_reached(); + read_vec_element(s, rn, a->rn, 0, a->esz | sign); + if (accumulate) { + read_vec_element(s, rd, a->rd, 0, a->esz | sign); } + fn(rd, rn, a->imm); + write_fp_dreg(s, a->rd, rd); } - - gen_set_nzcv(tcg_flags); + return true; } -/* Floating point compare - * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | - * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ - */ -static void disas_fp_compare(DisasContext *s, uint32_t insn) -{ - unsigned int mos, type, rm, op, rn, opc, op2r; - int size; +TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) +TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) +TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) +TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) +TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) +TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) +TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) +TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) +TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) - mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); - rm = extract32(insn, 16, 5); - op = extract32(insn, 14, 2); - rn = extract32(insn, 5, 5); - opc = extract32(insn, 3, 2); - op2r = extract32(insn, 0, 3); +TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) +TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) - if (mos || op || op2r) { - unallocated_encoding(s); - return; - } +static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, + NeonGenTwoOpEnvFn *fn) +{ + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t, s); + fn(t, tcg_env, t, tcg_constant_i32(i)); + tcg_gen_extu_i32_i64(d, t); +} - switch (type) { - case 0: - size = MO_32; - break; - case 1: - size = MO_64; - break; - case 3: - size = MO_16; - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ - default: - unallocated_encoding(s); - return; - } +static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); +} - if (!fp_access_check(s)) { - return; - } +static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); +} - handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); +static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); } -/* Floating point conditional compare - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 - * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | - * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ - */ -static void disas_fp_ccomp(DisasContext *s, uint32_t insn) +static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) { - unsigned int mos, type, rm, cond, rn, op, nzcv; - TCGLabel *label_continue = NULL; - int size; + gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); +} - mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); - rm = extract32(insn, 16, 5); - cond = extract32(insn, 12, 4); - rn = extract32(insn, 5, 5); - op = extract32(insn, 4, 1); - nzcv = extract32(insn, 0, 4); +static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); +} - if (mos) { - unallocated_encoding(s); - return; - } +static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); +} - switch (type) { - case 0: - size = MO_32; - break; - case 1: - size = MO_64; - break; - case 3: - size = MO_16; - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ - default: - unallocated_encoding(s); - return; - } +static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); +} - if (!fp_access_check(s)) { - return; - } +static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); +} - if (cond < 0x0e) { /* not always */ - TCGLabel *label_match = gen_new_label(); - label_continue = gen_new_label(); - arm_gen_test_cc(cond, label_match); - /* nomatch: */ - gen_set_nzcv(tcg_constant_i64(nzcv << 28)); - tcg_gen_br(label_continue); - gen_set_label(label_match); - } +static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); +} - handle_fp_compare(s, size, rn, rm, false, op); +static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); +} - if (cond < 0x0e) { - gen_set_label(label_continue); - } +static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) +{ + trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); } -/* Floating-point data-processing (1 source) - half precision */ -static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) +static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) { - TCGv_ptr fpst = NULL; - TCGv_i32 tcg_op = read_fp_hreg(s, rn); - TCGv_i32 tcg_res = tcg_temp_new_i32(); + gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); +} - switch (opcode) { - case 0x0: /* FMOV */ - tcg_gen_mov_i32(tcg_res, tcg_op); - break; - case 0x1: /* FABS */ - gen_vfp_absh(tcg_res, tcg_op); - break; - case 0x2: /* FNEG */ - gen_vfp_negh(tcg_res, tcg_op); - break; - case 0x3: /* FSQRT */ - fpst = fpstatus_ptr(FPST_FPCR_F16); - gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); - break; - case 0x8: /* FRINTN */ - case 0x9: /* FRINTP */ - case 0xa: /* FRINTM */ - case 0xb: /* FRINTZ */ - case 0xc: /* FRINTA */ - { - TCGv_i32 tcg_rmode; +static WideShiftImmFn * const f_scalar_sqshli[] = { + gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d +}; - fpst = fpstatus_ptr(FPST_FPCR_F16); - tcg_rmode = gen_set_rmode(opcode & 7, fpst); - gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); - gen_restore_rmode(tcg_rmode, fpst); - break; - } - case 0xe: /* FRINTX */ - fpst = fpstatus_ptr(FPST_FPCR_F16); - gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); - break; - case 0xf: /* FRINTI */ - fpst = fpstatus_ptr(FPST_FPCR_F16); - gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); - break; - default: - g_assert_not_reached(); - } +static WideShiftImmFn * const f_scalar_uqshli[] = { + gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d +}; - write_fp_sreg(s, rd, tcg_res); -} +static WideShiftImmFn * const f_scalar_sqshlui[] = { + gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d +}; -/* Floating-point data-processing (1 source) - single precision */ -static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) -{ - void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); - TCGv_i32 tcg_op, tcg_res; - TCGv_ptr fpst; - int rmode = -1; - - tcg_op = read_fp_sreg(s, rn); - tcg_res = tcg_temp_new_i32(); - - switch (opcode) { - case 0x0: /* FMOV */ - tcg_gen_mov_i32(tcg_res, tcg_op); - goto done; - case 0x1: /* FABS */ - gen_vfp_abss(tcg_res, tcg_op); - goto done; - case 0x2: /* FNEG */ - gen_vfp_negs(tcg_res, tcg_op); - goto done; - case 0x3: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); - goto done; - case 0x6: /* BFCVT */ - gen_fpst = gen_helper_bfcvt; - break; - case 0x8: /* FRINTN */ - case 0x9: /* FRINTP */ - case 0xa: /* FRINTM */ - case 0xb: /* FRINTZ */ - case 0xc: /* FRINTA */ - rmode = opcode & 7; - gen_fpst = gen_helper_rints; - break; - case 0xe: /* FRINTX */ - gen_fpst = gen_helper_rints_exact; - break; - case 0xf: /* FRINTI */ - gen_fpst = gen_helper_rints; - break; - case 0x10: /* FRINT32Z */ - rmode = FPROUNDING_ZERO; - gen_fpst = gen_helper_frint32_s; - break; - case 0x11: /* FRINT32X */ - gen_fpst = gen_helper_frint32_s; - break; - case 0x12: /* FRINT64Z */ - rmode = FPROUNDING_ZERO; - gen_fpst = gen_helper_frint64_s; - break; - case 0x13: /* FRINT64X */ - gen_fpst = gen_helper_frint64_s; - break; - default: - g_assert_not_reached(); - } +/* Note that the helpers sign-extend their inputs, so don't do it here. */ +TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) +TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) +TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) - fpst = fpstatus_ptr(FPST_FPCR); - if (rmode >= 0) { - TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); - gen_fpst(tcg_res, tcg_op, fpst); - gen_restore_rmode(tcg_rmode, fpst); - } else { - gen_fpst(tcg_res, tcg_op, fpst); - } +static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, + WideShiftImmFn * const fns[3], + MemOp sign, bool zext) +{ + MemOp esz = a->esz; - done: - write_fp_sreg(s, rd, tcg_res); -} + tcg_debug_assert(esz >= MO_8 && esz <= MO_32); -/* Floating-point data-processing (1 source) - double precision */ -static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) -{ - void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); - TCGv_i64 tcg_op, tcg_res; - TCGv_ptr fpst; - int rmode = -1; + if (fp_access_check(s)) { + TCGv_i64 rd = tcg_temp_new_i64(); + TCGv_i64 rn = tcg_temp_new_i64(); - switch (opcode) { - case 0x0: /* FMOV */ - gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); - return; + read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); + fns[esz](rd, rn, a->imm); + if (zext) { + tcg_gen_ext_i64(rd, rd, esz); + } + write_fp_dreg(s, a->rd, rd); } + return true; +} - tcg_op = read_fp_dreg(s, rn); - tcg_res = tcg_temp_new_i64(); +TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) +TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) +TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) +TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) +TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) +TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) - switch (opcode) { - case 0x1: /* FABS */ - gen_vfp_absd(tcg_res, tcg_op); - goto done; - case 0x2: /* FNEG */ - gen_vfp_negd(tcg_res, tcg_op); - goto done; - case 0x3: /* FSQRT */ - gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); - goto done; - case 0x8: /* FRINTN */ - case 0x9: /* FRINTP */ - case 0xa: /* FRINTM */ - case 0xb: /* FRINTZ */ - case 0xc: /* FRINTA */ - rmode = opcode & 7; - gen_fpst = gen_helper_rintd; - break; - case 0xe: /* FRINTX */ - gen_fpst = gen_helper_rintd_exact; - break; - case 0xf: /* FRINTI */ - gen_fpst = gen_helper_rintd; - break; - case 0x10: /* FRINT32Z */ - rmode = FPROUNDING_ZERO; - gen_fpst = gen_helper_frint32_d; - break; - case 0x11: /* FRINT32X */ - gen_fpst = gen_helper_frint32_d; - break; - case 0x12: /* FRINT64Z */ - rmode = FPROUNDING_ZERO; - gen_fpst = gen_helper_frint64_d; - break; - case 0x13: /* FRINT64X */ - gen_fpst = gen_helper_frint64_d; - break; - default: - g_assert_not_reached(); +static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) +{ + TCGv_i64 tcg_n, tcg_m, tcg_rd; + tcg_rd = cpu_reg(s, a->rd); + + if (!a->sf && is_signed) { + tcg_n = tcg_temp_new_i64(); + tcg_m = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); + tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); + } else { + tcg_n = read_cpu_reg(s, a->rn, a->sf); + tcg_m = read_cpu_reg(s, a->rm, a->sf); } - fpst = fpstatus_ptr(FPST_FPCR); - if (rmode >= 0) { - TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); - gen_fpst(tcg_res, tcg_op, fpst); - gen_restore_rmode(tcg_rmode, fpst); + if (is_signed) { + gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); } else { - gen_fpst(tcg_res, tcg_op, fpst); + gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); } - done: - write_fp_dreg(s, rd, tcg_res); + if (!a->sf) { /* zero extend final result */ + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + return true; } -static void handle_fp_fcvt(DisasContext *s, int opcode, - int rd, int rn, int dtype, int ntype) -{ - switch (ntype) { - case 0x0: - { - TCGv_i32 tcg_rn = read_fp_sreg(s, rn); - if (dtype == 1) { - /* Single to double */ - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); - write_fp_dreg(s, rd, tcg_rd); - } else { - /* Single to half */ - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - TCGv_i32 ahp = get_ahp_flag(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); +TRANS(SDIV, do_div, a, true) +TRANS(UDIV, do_div, a, false) - gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ - write_fp_sreg(s, rd, tcg_rd); - } +/* Shift a TCGv src by TCGv shift_amount, put result in dst. + * Note that it is the caller's responsibility to ensure that the + * shift amount is in range (ie 0..31 or 0..63) and provide the ARM + * mandated semantics for out of range shifts. + */ +static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, + enum a64_shift_type shift_type, TCGv_i64 shift_amount) +{ + switch (shift_type) { + case A64_SHIFT_TYPE_LSL: + tcg_gen_shl_i64(dst, src, shift_amount); break; - } - case 0x1: - { - TCGv_i64 tcg_rn = read_fp_dreg(s, rn); - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - if (dtype == 0) { - /* Double to single */ - gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); - } else { - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - TCGv_i32 ahp = get_ahp_flag(); - /* Double to half */ - gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ + case A64_SHIFT_TYPE_LSR: + tcg_gen_shr_i64(dst, src, shift_amount); + break; + case A64_SHIFT_TYPE_ASR: + if (!sf) { + tcg_gen_ext32s_i64(dst, src); } - write_fp_sreg(s, rd, tcg_rd); + tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); break; - } - case 0x3: - { - TCGv_i32 tcg_rn = read_fp_sreg(s, rn); - TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); - TCGv_i32 tcg_ahp = get_ahp_flag(); - tcg_gen_ext16u_i32(tcg_rn, tcg_rn); - if (dtype == 0) { - /* Half to single */ - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); - write_fp_sreg(s, rd, tcg_rd); + case A64_SHIFT_TYPE_ROR: + if (sf) { + tcg_gen_rotr_i64(dst, src, shift_amount); } else { - /* Half to double */ - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); - write_fp_dreg(s, rd, tcg_rd); + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t0, src); + tcg_gen_extrl_i64_i32(t1, shift_amount); + tcg_gen_rotr_i32(t0, t0, t1); + tcg_gen_extu_i32_i64(dst, t0); } break; - } default: - g_assert_not_reached(); + assert(FALSE); /* all shift types should be handled */ + break; + } + + if (!sf) { /* zero extend final result */ + tcg_gen_ext32u_i64(dst, dst); } } -/* Floating point data-processing (1 source) - * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 - * +---+---+---+-----------+------+---+--------+-----------+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | - * +---+---+---+-----------+------+---+--------+-----------+------+------+ +/* Shift a TCGv src by immediate, put result in dst. + * The shift amount must be in range (this should always be true as the + * relevant instructions will UNDEF on bad shift immediates). */ -static void disas_fp_1src(DisasContext *s, uint32_t insn) +static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, + enum a64_shift_type shift_type, unsigned int shift_i) { - int mos = extract32(insn, 29, 3); - int type = extract32(insn, 22, 2); - int opcode = extract32(insn, 15, 6); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - if (mos) { - goto do_unallocated; - } - - switch (opcode) { - case 0x4: case 0x5: case 0x7: - { - /* FCVT between half, single and double precision */ - int dtype = extract32(opcode, 0, 2); - if (type == 2 || dtype == type) { - goto do_unallocated; - } - if (!fp_access_check(s)) { - return; - } + assert(shift_i < (sf ? 64 : 32)); - handle_fp_fcvt(s, opcode, rd, rn, dtype, type); - break; + if (shift_i == 0) { + tcg_gen_mov_i64(dst, src); + } else { + shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); } +} - case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ - if (type > 1 || !dc_isar_feature(aa64_frint, s)) { - goto do_unallocated; - } - /* fall through */ - case 0x0 ... 0x3: - case 0x8 ... 0xc: - case 0xe ... 0xf: - /* 32-to-32 and 64-to-64 ops */ - switch (type) { - case 0: - if (!fp_access_check(s)) { - return; - } - handle_fp_1src_single(s, opcode, rd, rn); - break; - case 1: - if (!fp_access_check(s)) { - return; - } - handle_fp_1src_double(s, opcode, rd, rn); - break; - case 3: - if (!dc_isar_feature(aa64_fp16, s)) { - goto do_unallocated; - } - - if (!fp_access_check(s)) { - return; - } - handle_fp_1src_half(s, opcode, rd, rn); - break; - default: - goto do_unallocated; - } - break; - - case 0x6: - switch (type) { - case 1: /* BFCVT */ - if (!dc_isar_feature(aa64_bf16, s)) { - goto do_unallocated; - } - if (!fp_access_check(s)) { - return; - } - handle_fp_1src_single(s, opcode, rd, rn); - break; - default: - goto do_unallocated; - } - break; +static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, + enum a64_shift_type shift_type) +{ + TCGv_i64 tcg_shift = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); - default: - do_unallocated: - unallocated_encoding(s); - break; - } + tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); + shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); + return true; } -/* Floating point immediate - * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 - * +---+---+---+-----------+------+---+------------+-------+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | - * +---+---+---+-----------+------+---+------------+-------+------+------+ - */ -static void disas_fp_imm(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int imm5 = extract32(insn, 5, 5); - int imm8 = extract32(insn, 13, 8); - int type = extract32(insn, 22, 2); - int mos = extract32(insn, 29, 3); - uint64_t imm; - MemOp sz; +TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) +TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) +TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) +TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) - if (mos || imm5) { - unallocated_encoding(s); - return; - } +static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) +{ + TCGv_i64 tcg_acc, tcg_val, tcg_rd; + TCGv_i32 tcg_bytes; - switch (type) { - case 0: - sz = MO_32; + switch (a->esz) { + case MO_8: + case MO_16: + case MO_32: + tcg_val = tcg_temp_new_i64(); + tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); break; - case 1: - sz = MO_64; + case MO_64: + tcg_val = cpu_reg(s, a->rm); break; - case 3: - sz = MO_16; - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ default: - unallocated_encoding(s); - return; + g_assert_not_reached(); } + tcg_acc = cpu_reg(s, a->rn); + tcg_bytes = tcg_constant_i32(1 << a->esz); + tcg_rd = cpu_reg(s, a->rd); - if (!fp_access_check(s)) { - return; + if (crc32c) { + gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); + } else { + gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); } - - imm = vfp_expand_imm(sz, imm8); - write_fp_dreg(s, rd, tcg_constant_i64(imm)); + return true; } -/* Handle floating point <=> fixed point conversions. Note that we can - * also deal with fp <=> integer conversions as a special case (scale == 64) - * OPTME: consider handling that special case specially or at least skipping - * the call to scalbn in the helpers for zero shifts. - */ -static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, - bool itof, int rmode, int scale, int sf, int type) -{ - bool is_signed = !(opcode & 1); - TCGv_ptr tcg_fpstatus; - TCGv_i32 tcg_shift, tcg_single; - TCGv_i64 tcg_double; +TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) +TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) - tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); - - tcg_shift = tcg_constant_i32(64 - scale); - - if (itof) { - TCGv_i64 tcg_int = cpu_reg(s, rn); - if (!sf) { - TCGv_i64 tcg_extend = tcg_temp_new_i64(); - - if (is_signed) { - tcg_gen_ext32s_i64(tcg_extend, tcg_int); - } else { - tcg_gen_ext32u_i64(tcg_extend, tcg_int); - } - - tcg_int = tcg_extend; - } - - switch (type) { - case 1: /* float64 */ - tcg_double = tcg_temp_new_i64(); - if (is_signed) { - gen_helper_vfp_sqtod(tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_uqtod(tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); - } - write_fp_dreg(s, rd, tcg_double); - break; - - case 0: /* float32 */ - tcg_single = tcg_temp_new_i32(); - if (is_signed) { - gen_helper_vfp_sqtos(tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_uqtos(tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); - } - write_fp_sreg(s, rd, tcg_single); - break; +static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) +{ + TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); + TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); + TCGv_i64 tcg_d = cpu_reg(s, a->rd); - case 3: /* float16 */ - tcg_single = tcg_temp_new_i32(); - if (is_signed) { - gen_helper_vfp_sqtoh(tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_uqtoh(tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); - } - write_fp_sreg(s, rd, tcg_single); - break; + tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); + tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); - default: - g_assert_not_reached(); - } + if (setflag) { + gen_sub_CC(true, tcg_d, tcg_n, tcg_m); } else { - TCGv_i64 tcg_int = cpu_reg(s, rd); - TCGv_i32 tcg_rmode; - - if (extract32(opcode, 2, 1)) { - /* There are too many rounding modes to all fit into rmode, - * so FCVTA[US] is a special case. - */ - rmode = FPROUNDING_TIEAWAY; - } - - tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); - - switch (type) { - case 1: /* float64 */ - tcg_double = read_fp_dreg(s, rn); - if (is_signed) { - if (!sf) { - gen_helper_vfp_tosld(tcg_int, tcg_double, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_tosqd(tcg_int, tcg_double, - tcg_shift, tcg_fpstatus); - } - } else { - if (!sf) { - gen_helper_vfp_tould(tcg_int, tcg_double, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_touqd(tcg_int, tcg_double, - tcg_shift, tcg_fpstatus); - } - } - if (!sf) { - tcg_gen_ext32u_i64(tcg_int, tcg_int); - } - break; + tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); + } + return true; +} - case 0: /* float32 */ - tcg_single = read_fp_sreg(s, rn); - if (sf) { - if (is_signed) { - gen_helper_vfp_tosqs(tcg_int, tcg_single, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_touqs(tcg_int, tcg_single, - tcg_shift, tcg_fpstatus); - } - } else { - TCGv_i32 tcg_dest = tcg_temp_new_i32(); - if (is_signed) { - gen_helper_vfp_tosls(tcg_dest, tcg_single, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_touls(tcg_dest, tcg_single, - tcg_shift, tcg_fpstatus); - } - tcg_gen_extu_i32_i64(tcg_int, tcg_dest); - } - break; +TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) +TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) - case 3: /* float16 */ - tcg_single = read_fp_sreg(s, rn); - if (sf) { - if (is_signed) { - gen_helper_vfp_tosqh(tcg_int, tcg_single, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_touqh(tcg_int, tcg_single, - tcg_shift, tcg_fpstatus); - } - } else { - TCGv_i32 tcg_dest = tcg_temp_new_i32(); - if (is_signed) { - gen_helper_vfp_toslh(tcg_dest, tcg_single, - tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_toulh(tcg_dest, tcg_single, - tcg_shift, tcg_fpstatus); - } - tcg_gen_extu_i32_i64(tcg_int, tcg_dest); - } - break; +static bool trans_IRG(DisasContext *s, arg_rrr *a) +{ + if (dc_isar_feature(aa64_mte_insn_reg, s)) { + TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); + TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); - default: - g_assert_not_reached(); + if (s->ata[0]) { + gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); + } else { + gen_address_with_allocation_tag0(tcg_rd, tcg_rn); } - - gen_restore_rmode(tcg_rmode, tcg_fpstatus); + return true; } + return false; } -/* Floating point <-> fixed point conversions - * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 - * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ - * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | - * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ - */ -static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int scale = extract32(insn, 10, 6); - int opcode = extract32(insn, 16, 3); - int rmode = extract32(insn, 19, 2); - int type = extract32(insn, 22, 2); - bool sbit = extract32(insn, 29, 1); - bool sf = extract32(insn, 31, 1); - bool itof; - - if (sbit || (!sf && scale < 32)) { - unallocated_encoding(s); - return; - } +static bool trans_GMI(DisasContext *s, arg_rrr *a) +{ + if (dc_isar_feature(aa64_mte_insn_reg, s)) { + TCGv_i64 t = tcg_temp_new_i64(); - switch (type) { - case 0: /* float32 */ - case 1: /* float64 */ - break; - case 3: /* float16 */ - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ - default: - unallocated_encoding(s); - return; + tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); + tcg_gen_shl_i64(t, tcg_constant_i64(1), t); + tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); + return true; } + return false; +} - switch ((rmode << 3) | opcode) { - case 0x2: /* SCVTF */ - case 0x3: /* UCVTF */ - itof = true; - break; - case 0x18: /* FCVTZS */ - case 0x19: /* FCVTZU */ - itof = false; - break; - default: - unallocated_encoding(s); - return; +static bool trans_PACGA(DisasContext *s, arg_rrr *a) +{ + if (dc_isar_feature(aa64_pauth, s)) { + gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, + cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); + return true; } + return false; +} - if (!fp_access_check(s)) { - return; - } +typedef void ArithOneOp(TCGv_i64, TCGv_i64); - handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); +static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) +{ + fn(cpu_reg(s, rd), cpu_reg(s, rn)); + return true; } -static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) +static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { - /* FMOV: gpr to or from float, double, or top half of quad fp reg, - * without conversion. - */ + TCGv_i32 t32 = tcg_temp_new_i32(); - if (itof) { - TCGv_i64 tcg_rn = cpu_reg(s, rn); - TCGv_i64 tmp; + tcg_gen_extrl_i64_i32(t32, tcg_rn); + gen_helper_rbit(t32, t32); + tcg_gen_extu_i32_i64(tcg_rd, t32); +} - switch (type) { - case 0: - /* 32 bit */ - tmp = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(tmp, tcg_rn); - write_fp_dreg(s, rd, tmp); - break; - case 1: - /* 64 bit */ - write_fp_dreg(s, rd, tcg_rn); - break; - case 2: - /* 64 bit to top half. */ - tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); - clear_vec_high(s, true, rd); - break; - case 3: - /* 16 bit */ - tmp = tcg_temp_new_i64(); - tcg_gen_ext16u_i64(tmp, tcg_rn); - write_fp_dreg(s, rd, tmp); - break; - default: - g_assert_not_reached(); - } - } else { - TCGv_i64 tcg_rd = cpu_reg(s, rd); +static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) +{ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - switch (type) { - case 0: - /* 32 bit */ - tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); - break; - case 1: - /* 64 bit */ - tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); - break; - case 2: - /* 64 bits from top half */ - tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); - break; - case 3: - /* 16 bit */ - tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); - break; - default: - g_assert_not_reached(); - } - } + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); + tcg_gen_and_i64(tcg_rd, tcg_rn, mask); + tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); + tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); + tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); } -static void handle_fjcvtzs(DisasContext *s, int rd, int rn) +static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { - TCGv_i64 t = read_fp_dreg(s, rn); - TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); + gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); +} - gen_helper_fjcvtzs(t, t, fpstatus); +static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); +} - tcg_gen_ext32u_i64(cpu_reg(s, rd), t); - tcg_gen_extrh_i64_i32(cpu_ZF, t); - tcg_gen_movi_i32(cpu_CF, 0); - tcg_gen_movi_i32(cpu_NF, 0); - tcg_gen_movi_i32(cpu_VF, 0); +static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); } -/* Floating point <-> integer conversions - * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 - * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ - * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | - * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ - */ -static void disas_fp_int_conv(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 16, 3); - int rmode = extract32(insn, 19, 2); - int type = extract32(insn, 22, 2); - bool sbit = extract32(insn, 29, 1); - bool sf = extract32(insn, 31, 1); - bool itof = false; - - if (sbit) { - goto do_unallocated; - } - - switch (opcode) { - case 2: /* SCVTF */ - case 3: /* UCVTF */ - itof = true; - /* fallthru */ - case 4: /* FCVTAS */ - case 5: /* FCVTAU */ - if (rmode != 0) { - goto do_unallocated; - } - /* fallthru */ - case 0: /* FCVT[NPMZ]S */ - case 1: /* FCVT[NPMZ]U */ - switch (type) { - case 0: /* float32 */ - case 1: /* float64 */ - break; - case 3: /* float16 */ - if (!dc_isar_feature(aa64_fp16, s)) { - goto do_unallocated; - } - break; - default: - goto do_unallocated; - } - if (!fp_access_check(s)) { - return; - } - handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); - break; +static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + tcg_gen_bswap64_i64(tcg_rd, tcg_rn); + tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); +} - default: - switch (sf << 7 | type << 5 | rmode << 3 | opcode) { - case 0b01100110: /* FMOV half <-> 32-bit int */ - case 0b01100111: - case 0b11100110: /* FMOV half <-> 64-bit int */ - case 0b11100111: - if (!dc_isar_feature(aa64_fp16, s)) { - goto do_unallocated; - } - /* fallthru */ - case 0b00000110: /* FMOV 32-bit */ - case 0b00000111: - case 0b10100110: /* FMOV 64-bit */ - case 0b10100111: - case 0b11001110: /* FMOV top half of 128-bit */ - case 0b11001111: - if (!fp_access_check(s)) { - return; - } - itof = opcode & 1; - handle_fmov(s, rd, rn, type, itof); - break; +TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) +TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) +TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) +TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) - case 0b00111110: /* FJCVTZS */ - if (!dc_isar_feature(aa64_jscvt, s)) { - goto do_unallocated; - } else if (fp_access_check(s)) { - handle_fjcvtzs(s, rd, rn); - } - break; +static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + TCGv_i32 t32 = tcg_temp_new_i32(); - default: - do_unallocated: - unallocated_encoding(s); - return; - } - break; - } + tcg_gen_extrl_i64_i32(t32, tcg_rn); + tcg_gen_clzi_i32(t32, t32, 32); + tcg_gen_extu_i32_i64(tcg_rd, t32); } -/* FP-specific subcases of table C3-6 (SIMD and FP data processing) - * 31 30 29 28 25 24 0 - * +---+---+---+---------+-----------------------------+ - * | | 0 | | 1 1 1 1 | | - * +---+---+---+---------+-----------------------------+ - */ -static void disas_data_proc_fp(DisasContext *s, uint32_t insn) +static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { - if (extract32(insn, 24, 1)) { - unallocated_encoding(s); /* in decodetree */ - } else if (extract32(insn, 21, 1) == 0) { - /* Floating point to fixed point conversions */ - disas_fp_fixed_conv(s, insn); - } else { - switch (extract32(insn, 10, 2)) { - case 1: - /* Floating point conditional compare */ - disas_fp_ccomp(s, insn); - break; - case 2: - /* Floating point data-processing (2 source) */ - unallocated_encoding(s); /* in decodetree */ - break; - case 3: - /* Floating point conditional select */ - unallocated_encoding(s); /* in decodetree */ - break; - case 0: - switch (ctz32(extract32(insn, 12, 4))) { - case 0: /* [15:12] == xxx1 */ - /* Floating point immediate */ - disas_fp_imm(s, insn); - break; - case 1: /* [15:12] == xx10 */ - /* Floating point compare */ - disas_fp_compare(s, insn); - break; - case 2: /* [15:12] == x100 */ - /* Floating point data-processing (1 source) */ - disas_fp_1src(s, insn); - break; - case 3: /* [15:12] == 1000 */ - unallocated_encoding(s); - break; - default: /* [15:12] == 0000 */ - /* Floating point <-> integer conversions */ - disas_fp_int_conv(s, insn); - break; - } - break; - } - } + tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); } -static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, - int pos) +static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { - /* Extract 64 bits from the middle of two concatenated 64 bit - * vector register slices left:right. The extracted bits start - * at 'pos' bits into the right (least significant) side. - * We return the result in tcg_right, and guarantee not to - * trash tcg_left. - */ - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - assert(pos > 0 && pos < 64); + TCGv_i32 t32 = tcg_temp_new_i32(); - tcg_gen_shri_i64(tcg_right, tcg_right, pos); - tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); - tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); + tcg_gen_extrl_i64_i32(t32, tcg_rn); + tcg_gen_clrsb_i32(t32, t32); + tcg_gen_extu_i32_i64(tcg_rd, t32); } -/* EXT - * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 - * +---+---+-------------+-----+---+------+---+------+---+------+------+ - * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | - * +---+---+-------------+-----+---+------+---+------+---+------+------+ - */ -static void disas_simd_ext(DisasContext *s, uint32_t insn) -{ - int is_q = extract32(insn, 30, 1); - int op2 = extract32(insn, 22, 2); - int imm4 = extract32(insn, 11, 4); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - int pos = imm4 << 3; - TCGv_i64 tcg_resl, tcg_resh; - - if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } +TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) +TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) - tcg_resh = tcg_temp_new_i64(); - tcg_resl = tcg_temp_new_i64(); +static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) +{ + TCGv_i64 tcg_rd, tcg_rn; - /* Vd gets bits starting at pos bits into Vm:Vn. This is - * either extracting 128 bits from a 128:128 concatenation, or - * extracting 64 bits from a 64:64 concatenation. - */ - if (!is_q) { - read_vec_element(s, tcg_resl, rn, 0, MO_64); - if (pos != 0) { - read_vec_element(s, tcg_resh, rm, 0, MO_64); - do_ext64(s, tcg_resh, tcg_resl, pos); + if (a->z) { + if (a->rn != 31) { + return false; } + tcg_rn = tcg_constant_i64(0); } else { - TCGv_i64 tcg_hh; - typedef struct { - int reg; - int elt; - } EltPosns; - EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; - EltPosns *elt = eltposns; - - if (pos >= 64) { - elt++; - pos -= 64; - } - - read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); - elt++; - read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); - elt++; - if (pos != 0) { - do_ext64(s, tcg_resh, tcg_resl, pos); - tcg_hh = tcg_temp_new_i64(); - read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); - do_ext64(s, tcg_hh, tcg_resh, pos); - } + tcg_rn = cpu_reg_sp(s, a->rn); } - - write_vec_element(s, tcg_resl, rd, 0, MO_64); - if (is_q) { - write_vec_element(s, tcg_resh, rd, 1, MO_64); + if (s->pauth_active) { + tcg_rd = cpu_reg(s, a->rd); + fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); } - clear_vec_high(s, is_q, rd); + return true; } -/* TBL/TBX - * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 - * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ - * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | - * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ - */ -static void disas_simd_tb(DisasContext *s, uint32_t insn) -{ - int op2 = extract32(insn, 22, 2); - int is_q = extract32(insn, 30, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - int is_tbx = extract32(insn, 12, 1); - int len = (extract32(insn, 13, 2) + 1) * 16; +TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) +TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) +TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) +TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) - if (op2 != 0) { - unallocated_encoding(s); - return; - } +TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) +TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) +TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) +TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) - if (!fp_access_check(s)) { - return; +static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) +{ + if (s->pauth_active) { + TCGv_i64 tcg_rd = cpu_reg(s, rd); + fn(tcg_rd, tcg_env, tcg_rd); } - - tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rm), tcg_env, - is_q ? 16 : 8, vec_full_reg_size(s), - (len << 6) | (is_tbx << 5) | rn, - gen_helper_simd_tblx); + return true; } -/* ZIP/UZP/TRN - * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 - * +---+---+-------------+------+---+------+---+------------------+------+ - * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | - * +---+---+-------------+------+---+------+---+------------------+------+ - */ -static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - /* opc field bits [1:0] indicate ZIP/UZP/TRN; - * bit 2 indicates 1 vs 2 variant of the insn. - */ - int opcode = extract32(insn, 12, 2); - bool part = extract32(insn, 14, 1); - bool is_q = extract32(insn, 30, 1); - int esize = 8 << size; - int i; - int datasize = is_q ? 128 : 64; - int elements = datasize / esize; - TCGv_i64 tcg_res[2], tcg_ele; +TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) +TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) - if (opcode == 0 || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; +static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, + ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) +{ + TCGv_i64 tcg_rd, tcg_rn, tcg_rm; + + if (!a->sf && (a->sa & (1 << 5))) { + return false; } - if (!fp_access_check(s)) { - return; + tcg_rd = cpu_reg(s, a->rd); + tcg_rn = cpu_reg(s, a->rn); + + tcg_rm = read_cpu_reg(s, a->rm, a->sf); + if (a->sa) { + shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); } - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; - tcg_ele = tcg_temp_new_i64(); + (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + if (setflags) { + gen_logic_CC(a->sf, tcg_rd); + } + return true; +} - for (i = 0; i < elements; i++) { - int o, w; +static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) +{ + /* + * Unshifted ORR and ORN with WZR/XZR is the standard encoding for + * register-register MOV and MVN, so it is worth special casing. + */ + if (a->sa == 0 && a->st == 0 && a->rn == 31) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_rm = cpu_reg(s, a->rm); - switch (opcode) { - case 1: /* UZP1/2 */ - { - int midpoint = elements / 2; - if (i < midpoint) { - read_vec_element(s, tcg_ele, rn, 2 * i + part, size); - } else { - read_vec_element(s, tcg_ele, rm, - 2 * (i - midpoint) + part, size); - } - break; - } - case 2: /* TRN1/2 */ - if (i & 1) { - read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); - } else { - read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); + if (a->n) { + tcg_gen_not_i64(tcg_rd, tcg_rm); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } - break; - case 3: /* ZIP1/2 */ - { - int base = part * elements / 2; - if (i & 1) { - read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); + } else { + if (a->sf) { + tcg_gen_mov_i64(tcg_rd, tcg_rm); } else { - read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); + tcg_gen_ext32u_i64(tcg_rd, tcg_rm); } - break; - } - default: - g_assert_not_reached(); - } - - w = (i * esize) / 64; - o = (i * esize) % 64; - if (o == 0) { - tcg_gen_mov_i64(tcg_res[w], tcg_ele); - } else { - tcg_gen_shli_i64(tcg_ele, tcg_ele, o); - tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); } + return true; } - for (i = 0; i <= is_q; ++i) { - write_vec_element(s, tcg_res[i], rd, i, MO_64); - } - clear_vec_high(s, is_q, rd); + return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); } -/* - * do_reduction_op helper - * - * This mirrors the Reduce() pseudocode in the ARM ARM. It is - * important for correct NaN propagation that we do these - * operations in exactly the order specified by the pseudocode. - * - * This is a recursive function, TCG temps should be freed by the - * calling function once it is done with the values. - */ -static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, - int esize, int size, int vmap, TCGv_ptr fpst) +TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) +TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) +TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) + +static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, + bool sub_op, bool setflags) { - if (esize == size) { - int element; - MemOp msize = esize == 16 ? MO_16 : MO_32; - TCGv_i32 tcg_elem; + TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; - /* We should have one register left here */ - assert(ctpop8(vmap) == 1); - element = ctz32(vmap); - assert(element < 8); + if (a->sa > 4) { + return false; + } - tcg_elem = tcg_temp_new_i32(); - read_vec_element_i32(s, tcg_elem, rn, element, msize); - return tcg_elem; + /* non-flag setting ops may use SP */ + if (!setflags) { + tcg_rd = cpu_reg_sp(s, a->rd); } else { - int bits = size / 2; - int shift = ctpop8(vmap) / 2; - int vmap_lo = (vmap >> shift) & vmap; - int vmap_hi = (vmap & ~vmap_lo); - TCGv_i32 tcg_hi, tcg_lo, tcg_res; - - tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); - tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); - tcg_res = tcg_temp_new_i32(); - - switch (fpopcode) { - case 0x0c: /* fmaxnmv half-precision */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x0f: /* fmaxv half-precision */ - gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x1c: /* fminnmv half-precision */ - gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x1f: /* fminv half-precision */ - gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x2c: /* fmaxnmv */ - gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x2f: /* fmaxv */ - gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x3c: /* fminnmv */ - gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); - break; - case 0x3f: /* fminv */ - gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); - break; - default: - g_assert_not_reached(); - } - return tcg_res; + tcg_rd = cpu_reg(s, a->rd); } -} + tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); -/* AdvSIMD across lanes - * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+-----------+--------+-----+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | - * +---+---+---+-----------+------+-----------+--------+-----+------+------+ - */ -static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - bool is_q = extract32(insn, 30, 1); - bool is_u = extract32(insn, 29, 1); - bool is_fp = false; - bool is_min = false; - int esize; - int elements; - int i; - TCGv_i64 tcg_res, tcg_elt; + tcg_rm = read_cpu_reg(s, a->rm, a->sf); + ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); - switch (opcode) { - case 0x1b: /* ADDV */ - if (is_u) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x3: /* SADDLV, UADDLV */ - case 0xa: /* SMAXV, UMAXV */ - case 0x1a: /* SMINV, UMINV */ - if (size == 3 || (size == 2 && !is_q)) { - unallocated_encoding(s); - return; + tcg_result = tcg_temp_new_i64(); + if (!setflags) { + if (sub_op) { + tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); } - break; - case 0xc: /* FMAXNMV, FMINNMV */ - case 0xf: /* FMAXV, FMINV */ - /* Bit 1 of size field encodes min vs max and the actual size - * depends on the encoding of the U bit. If not set (and FP16 - * enabled) then we do half-precision float instead of single - * precision. - */ - is_min = extract32(size, 1, 1); - is_fp = true; - if (!is_u && dc_isar_feature(aa64_fp16, s)) { - size = 1; - } else if (!is_u || !is_q || extract32(size, 0, 1)) { - unallocated_encoding(s); - return; + } else { + if (sub_op) { + gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); } else { - size = 2; + gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); } - break; - default: - unallocated_encoding(s); - return; } - if (!fp_access_check(s)) { - return; + if (a->sf) { + tcg_gen_mov_i64(tcg_rd, tcg_result); + } else { + tcg_gen_ext32u_i64(tcg_rd, tcg_result); } + return true; +} - esize = 8 << size; - elements = (is_q ? 128 : 64) / esize; - - tcg_res = tcg_temp_new_i64(); - tcg_elt = tcg_temp_new_i64(); - - /* These instructions operate across all lanes of a vector - * to produce a single result. We can guarantee that a 64 - * bit intermediate is sufficient: - * + for [US]ADDLV the maximum element size is 32 bits, and - * the result type is 64 bits - * + for FMAX*V, FMIN*V, ADDV the intermediate type is the - * same as the element size, which is 32 bits at most - * For the integer operations we can choose to work at 64 - * or 32 bits and truncate at the end; for simplicity - * we use 64 bits always. The floating point - * ops do require 32 bit intermediates, though. - */ - if (!is_fp) { - read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); - - for (i = 1; i < elements; i++) { - read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); - - switch (opcode) { - case 0x03: /* SADDLV / UADDLV */ - case 0x1b: /* ADDV */ - tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); - break; - case 0x0a: /* SMAXV / UMAXV */ - if (is_u) { - tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); - } else { - tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); - } - break; - case 0x1a: /* SMINV / UMINV */ - if (is_u) { - tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); - } else { - tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); - } - break; - default: - g_assert_not_reached(); - } +TRANS(ADD_ext, do_addsub_ext, a, false, false) +TRANS(SUB_ext, do_addsub_ext, a, true, false) +TRANS(ADDS_ext, do_addsub_ext, a, false, true) +TRANS(SUBS_ext, do_addsub_ext, a, true, true) - } - } else { - /* Floating point vector reduction ops which work across 32 - * bit (single) or 16 bit (half-precision) intermediates. - * Note that correct NaN propagation requires that we do these - * operations in exactly the order specified by the pseudocode. - */ - TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - int fpopcode = opcode | is_min << 4 | is_u << 5; - int vmap = (1 << elements) - 1; - TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, - (is_q ? 128 : 64), vmap, fpst); - tcg_gen_extu_i32_i64(tcg_res, tcg_res32); - } +static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, + bool sub_op, bool setflags) +{ + TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; - /* Now truncate the result to the width required for the final output */ - if (opcode == 0x03) { - /* SADDLV, UADDLV: result is 2*esize */ - size++; + if (a->st == 3 || (!a->sf && (a->sa & 32))) { + return false; } - switch (size) { - case 0: - tcg_gen_ext8u_i64(tcg_res, tcg_res); - break; - case 1: - tcg_gen_ext16u_i64(tcg_res, tcg_res); - break; - case 2: - tcg_gen_ext32u_i64(tcg_res, tcg_res); - break; - case 3: - break; - default: - g_assert_not_reached(); - } + tcg_rd = cpu_reg(s, a->rd); + tcg_rn = read_cpu_reg(s, a->rn, a->sf); + tcg_rm = read_cpu_reg(s, a->rm, a->sf); - write_fp_dreg(s, rd, tcg_res); -} + shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); -/* AdvSIMD modified immediate - * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 - * +---+---+----+---------------------+-----+-------+----+---+-------+------+ - * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | - * +---+---+----+---------------------+-----+-------+----+---+-------+------+ - * - * There are a number of operations that can be carried out here: - * MOVI - move (shifted) imm into register - * MVNI - move inverted (shifted) imm into register - * ORR - bitwise OR of (shifted) imm with register - * BIC - bitwise clear of (shifted) imm with register - * With ARMv8.2 we also have: - * FMOV half-precision - */ -static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int cmode = extract32(insn, 12, 4); - int o2 = extract32(insn, 11, 1); - uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); - bool is_neg = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - uint64_t imm = 0; - - if (o2) { - if (cmode != 0xf || is_neg) { - unallocated_encoding(s); - return; - } - /* FMOV (vector, immediate) - half-precision */ - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; + tcg_result = tcg_temp_new_i64(); + if (!setflags) { + if (sub_op) { + tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); } - imm = vfp_expand_imm(MO_16, abcdefgh); - /* now duplicate across the lanes */ - imm = dup_const(MO_16, imm); } else { - if (cmode == 0xf && is_neg && !is_q) { - unallocated_encoding(s); - return; + if (sub_op) { + gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); + } else { + gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); } - imm = asimd_imm_const(abcdefgh, cmode, is_neg); - } - - if (!fp_access_check(s)) { - return; } - if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { - /* MOVI or MVNI, with MVNI negation handled above. */ - tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, - vec_full_reg_size(s), imm); + if (a->sf) { + tcg_gen_mov_i64(tcg_rd, tcg_result); } else { - /* ORR or BIC, with BIC negation to AND handled above. */ - if (is_neg) { - gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); - } else { - gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); - } + tcg_gen_ext32u_i64(tcg_rd, tcg_result); } + return true; } -/* - * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) - * - * This code is handles the common shifting code and is used by both - * the vector and scalar code. - */ -static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, - TCGv_i64 tcg_rnd, bool accumulate, - bool is_u, int size, int shift) -{ - bool extended_result = false; - bool round = tcg_rnd != NULL; - int ext_lshift = 0; - TCGv_i64 tcg_src_hi; - - if (round && size == 3) { - extended_result = true; - ext_lshift = 64 - shift; - tcg_src_hi = tcg_temp_new_i64(); - } else if (shift == 64) { - if (!accumulate && is_u) { - /* result is zero */ - tcg_gen_movi_i64(tcg_res, 0); - return; - } - } +TRANS(ADD_r, do_addsub_reg, a, false, false) +TRANS(SUB_r, do_addsub_reg, a, true, false) +TRANS(ADDS_r, do_addsub_reg, a, false, true) +TRANS(SUBS_r, do_addsub_reg, a, true, true) - /* Deal with the rounding step */ - if (round) { - if (extended_result) { - TCGv_i64 tcg_zero = tcg_constant_i64(0); - if (!is_u) { - /* take care of sign extending tcg_res */ - tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); - tcg_gen_add2_i64(tcg_src, tcg_src_hi, - tcg_src, tcg_src_hi, - tcg_rnd, tcg_zero); - } else { - tcg_gen_add2_i64(tcg_src, tcg_src_hi, - tcg_src, tcg_zero, - tcg_rnd, tcg_zero); - } - } else { - tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); - } +static bool do_mulh(DisasContext *s, arg_rrr *a, + void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 discard = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + TCGv_i64 tcg_rm = cpu_reg(s, a->rm); + + fn(discard, tcg_rd, tcg_rn, tcg_rm); + return true; +} + +TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) +TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) + +static bool do_muladd(DisasContext *s, arg_rrrr *a, + bool sf, bool is_sub, MemOp mop) +{ + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_op1, tcg_op2; + + if (mop == MO_64) { + tcg_op1 = cpu_reg(s, a->rn); + tcg_op2 = cpu_reg(s, a->rm); + } else { + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); + tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); } - /* Now do the shift right */ - if (round && extended_result) { - /* extended case, >64 bit precision required */ - if (ext_lshift == 0) { - /* special case, only high bits matter */ - tcg_gen_mov_i64(tcg_src, tcg_src_hi); - } else { - tcg_gen_shri_i64(tcg_src, tcg_src, shift); - tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); - tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); - } + if (a->ra == 31 && !is_sub) { + /* Special-case MADD with rA == XZR; it is the standard MUL alias */ + tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); } else { - if (is_u) { - if (shift == 64) { - /* essentially shifting in 64 zeros */ - tcg_gen_movi_i64(tcg_src, 0); - } else { - tcg_gen_shri_i64(tcg_src, tcg_src, shift); - } + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + TCGv_i64 tcg_ra = cpu_reg(s, a->ra); + + tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); + if (is_sub) { + tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); } else { - if (shift == 64) { - /* effectively extending the sign-bit */ - tcg_gen_sari_i64(tcg_src, tcg_src, 63); - } else { - tcg_gen_sari_i64(tcg_src, tcg_src, shift); - } + tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); } } - if (accumulate) { - tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); - } else { - tcg_gen_mov_i64(tcg_res, tcg_src); + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } + return true; } -/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ -static void handle_scalar_simd_shri(DisasContext *s, - bool is_u, int immh, int immb, - int opcode, int rn, int rd) -{ - const int size = 3; - int immhb = immh << 3 | immb; - int shift = 2 * (8 << size) - immhb; - bool accumulate = false; - bool round = false; - bool insert = false; - TCGv_i64 tcg_rn; - TCGv_i64 tcg_rd; - TCGv_i64 tcg_round; +TRANS(MADD_w, do_muladd, a, false, false, MO_64) +TRANS(MSUB_w, do_muladd, a, false, true, MO_64) +TRANS(MADD_x, do_muladd, a, true, false, MO_64) +TRANS(MSUB_x, do_muladd, a, true, true, MO_64) - if (!extract32(immh, 3, 1)) { - unallocated_encoding(s); - return; - } +TRANS(SMADDL, do_muladd, a, true, false, MO_SL) +TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) +TRANS(UMADDL, do_muladd, a, true, false, MO_UL) +TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) - if (!fp_access_check(s)) { - return; - } +static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, + bool is_sub, bool setflags) +{ + TCGv_i64 tcg_y, tcg_rn, tcg_rd; - switch (opcode) { - case 0x02: /* SSRA / USRA (accumulate) */ - accumulate = true; - break; - case 0x04: /* SRSHR / URSHR (rounding) */ - round = true; - break; - case 0x06: /* SRSRA / URSRA (accum + rounding) */ - accumulate = round = true; - break; - case 0x08: /* SRI */ - insert = true; - break; - } + tcg_rd = cpu_reg(s, a->rd); + tcg_rn = cpu_reg(s, a->rn); - if (round) { - tcg_round = tcg_constant_i64(1ULL << (shift - 1)); + if (is_sub) { + tcg_y = tcg_temp_new_i64(); + tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); } else { - tcg_round = NULL; + tcg_y = cpu_reg(s, a->rm); } - tcg_rn = read_fp_dreg(s, rn); - tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); - - if (insert) { - /* shift count same as element size is valid but does nothing; - * special case to avoid potential shift by 64. - */ - int esize = 8 << size; - if (shift != esize) { - tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); - } + if (setflags) { + gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); } else { - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); } - - write_fp_dreg(s, rd, tcg_rd); + return true; } -/* SHL/SLI - Scalar shift left */ -static void handle_scalar_simd_shli(DisasContext *s, bool insert, - int immh, int immb, int opcode, - int rn, int rd) +TRANS(ADC, do_adc_sbc, a, false, false) +TRANS(SBC, do_adc_sbc, a, true, false) +TRANS(ADCS, do_adc_sbc, a, false, true) +TRANS(SBCS, do_adc_sbc, a, true, true) + +static bool trans_RMIF(DisasContext *s, arg_RMIF *a) { - int size = 32 - clz32(immh) - 1; - int immhb = immh << 3 | immb; - int shift = immhb - (8 << size); + int mask = a->mask; TCGv_i64 tcg_rn; - TCGv_i64 tcg_rd; + TCGv_i32 nzcv; - if (!extract32(immh, 3, 1)) { - unallocated_encoding(s); - return; + if (!dc_isar_feature(aa64_condm_4, s)) { + return false; } - if (!fp_access_check(s)) { - return; + tcg_rn = read_cpu_reg(s, a->rn, 1); + tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); + + nzcv = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(nzcv, tcg_rn); + + if (mask & 8) { /* N */ + tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); } + if (mask & 4) { /* Z */ + tcg_gen_not_i32(cpu_ZF, nzcv); + tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); + } + if (mask & 2) { /* C */ + tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); + } + if (mask & 1) { /* V */ + tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); + } + return true; +} - tcg_rn = read_fp_dreg(s, rn); - tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); +static bool do_setf(DisasContext *s, int rn, int shift) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); - if (insert) { - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); - } else { - tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); - } - - write_fp_dreg(s, rd, tcg_rd); -} - -/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with - * (signed/unsigned) narrowing */ -static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, - bool is_u_shift, bool is_u_narrow, - int immh, int immb, int opcode, - int rn, int rd) -{ - int immhb = immh << 3 | immb; - int size = 32 - clz32(immh) - 1; - int esize = 8 << size; - int shift = (2 * esize) - immhb; - int elements = is_scalar ? 1 : (64 / esize); - bool round = extract32(opcode, 0, 1); - MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); - TCGv_i64 tcg_rn, tcg_rd, tcg_round; - TCGv_i32 tcg_rd_narrowed; - TCGv_i64 tcg_final; - - static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_unarrow_sat8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_unarrow_sat16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_unarrow_sat32 }, - { NULL, NULL }, - }; - static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { - gen_helper_neon_narrow_sat_u8, - gen_helper_neon_narrow_sat_u16, - gen_helper_neon_narrow_sat_u32, - NULL - }; - NeonGenNarrowEnvFn *narrowfn; + tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); + tcg_gen_shli_i32(cpu_NF, tmp, shift); + tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); + return true; +} - int i; +TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) +TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) - assert(size < 4); +/* CCMP, CCMN */ +static bool trans_CCMP(DisasContext *s, arg_CCMP *a) +{ + TCGv_i32 tcg_t0 = tcg_temp_new_i32(); + TCGv_i32 tcg_t1 = tcg_temp_new_i32(); + TCGv_i32 tcg_t2 = tcg_temp_new_i32(); + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + TCGv_i64 tcg_rn, tcg_y; + DisasCompare c; + unsigned nzcv; + bool has_andc; - if (extract32(immh, 3, 1)) { - unallocated_encoding(s); - return; - } + /* Set T0 = !COND. */ + arm_test_cc(&c, a->cond); + tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); - if (!fp_access_check(s)) { - return; + /* Load the arguments for the new comparison. */ + if (a->imm) { + tcg_y = tcg_constant_i64(a->y); + } else { + tcg_y = cpu_reg(s, a->y); } + tcg_rn = cpu_reg(s, a->rn); - if (is_u_shift) { - narrowfn = unsigned_narrow_fns[size]; + /* Set the flags for the new comparison. */ + if (a->op) { + gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); } else { - narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; + gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); } - tcg_rn = tcg_temp_new_i64(); - tcg_rd = tcg_temp_new_i64(); - tcg_rd_narrowed = tcg_temp_new_i32(); - tcg_final = tcg_temp_new_i64(); + /* + * If COND was false, force the flags to #nzcv. Compute two masks + * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). + * For tcg hosts that support ANDC, we can make do with just T1. + * In either case, allow the tcg optimizer to delete any unused mask. + */ + tcg_gen_neg_i32(tcg_t1, tcg_t0); + tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); - if (round) { - tcg_round = tcg_constant_i64(1ULL << (shift - 1)); + nzcv = a->nzcv; + has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); + if (nzcv & 8) { /* N */ + tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); } else { - tcg_round = NULL; + if (has_andc) { + tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); + } else { + tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); + } } - - for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, ldop); - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - false, is_u_shift, size+1, shift); - narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); - tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); - if (i == 0) { - tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize); + if (nzcv & 4) { /* Z */ + if (has_andc) { + tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); } else { - tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); } + } else { + tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); } - - if (!is_q) { - write_vec_element(s, tcg_final, rd, 0, MO_64); + if (nzcv & 2) { /* C */ + tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); + } else { + if (has_andc) { + tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); + } else { + tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); + } + } + if (nzcv & 1) { /* V */ + tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); } else { - write_vec_element(s, tcg_final, rd, 1, MO_64); + if (has_andc) { + tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); + } else { + tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); + } } - clear_vec_high(s, is_q, rd); + return true; } -/* SQSHLU, UQSHL, SQSHL: saturating left shifts */ -static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, - bool src_unsigned, bool dst_unsigned, - int immh, int immb, int rn, int rd) +static bool trans_CSEL(DisasContext *s, arg_CSEL *a) { - int immhb = immh << 3 | immb; - int size = 32 - clz32(immh) - 1; - int shift = immhb - (8 << size); - int pass; + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 zero = tcg_constant_i64(0); + DisasCompare64 c; - assert(immh != 0); - assert(!(scalar && is_q)); + a64_test_cc(&c, a->cond); - if (!scalar) { - if (!is_q && extract32(immh, 3, 1)) { - unallocated_encoding(s); - return; + if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { + /* CSET & CSETM. */ + if (a->else_inv) { + tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), + tcg_rd, c.value, zero); + } else { + tcg_gen_setcond_i64(tcg_invert_cond(c.cond), + tcg_rd, c.value, zero); } + } else { + TCGv_i64 t_true = cpu_reg(s, a->rn); + TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); - /* Since we use the variable-shift helpers we must - * replicate the shift count into each element of - * the tcg_shift value. - */ - switch (size) { - case 0: - shift |= shift << 8; - /* fall through */ - case 1: - shift |= shift << 16; - break; - case 2: - case 3: - break; - default: - g_assert_not_reached(); + if (a->else_inv && a->else_inc) { + tcg_gen_neg_i64(t_false, t_false); + } else if (a->else_inv) { + tcg_gen_not_i64(t_false, t_false); + } else if (a->else_inc) { + tcg_gen_addi_i64(t_false, t_false, 1); } + tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); } - if (!fp_access_check(s)) { - return; + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } + return true; +} - if (size == 3) { - TCGv_i64 tcg_shift = tcg_constant_i64(shift); - static NeonGenTwo64OpEnvFn * const fns[2][2] = { - { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, - { NULL, gen_helper_neon_qshl_u64 }, - }; - NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; - int maxpass = is_q ? 2 : 1; - - for (pass = 0; pass < maxpass; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); +typedef struct FPScalar1Int { + void (*gen_h)(TCGv_i32, TCGv_i32); + void (*gen_s)(TCGv_i32, TCGv_i32); + void (*gen_d)(TCGv_i64, TCGv_i64); +} FPScalar1Int; - read_vec_element(s, tcg_op, rn, pass, MO_64); - genfn(tcg_op, tcg_env, tcg_op, tcg_shift); - write_vec_element(s, tcg_op, rd, pass, MO_64); - } - clear_vec_high(s, is_q, rd); - } else { - TCGv_i32 tcg_shift = tcg_constant_i32(shift); - static NeonGenTwoOpEnvFn * const fns[2][2][3] = { - { - { gen_helper_neon_qshl_s8, - gen_helper_neon_qshl_s16, - gen_helper_neon_qshl_s32 }, - { gen_helper_neon_qshlu_s8, - gen_helper_neon_qshlu_s16, - gen_helper_neon_qshlu_s32 } - }, { - { NULL, NULL, NULL }, - { gen_helper_neon_qshl_u8, - gen_helper_neon_qshl_u16, - gen_helper_neon_qshl_u32 } +static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, + const FPScalar1Int *f, + bool merging) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + f->gen_d(t, t); + if (merging) { + write_fp_dreg_merging(s, a->rd, a->rd, t); + } else { + write_fp_dreg(s, a->rd, t); } - }; - NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; - MemOp memop = scalar ? size : MO_32; - int maxpass = scalar ? 1 : is_q ? 4 : 2; - - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op, rn, pass, memop); - genfn(tcg_op, tcg_env, tcg_op, tcg_shift); - if (scalar) { - switch (size) { - case 0: - tcg_gen_ext8u_i32(tcg_op, tcg_op); - break; - case 1: - tcg_gen_ext16u_i32(tcg_op, tcg_op); - break; - case 2: - break; - default: - g_assert_not_reached(); - } - write_fp_sreg(s, rd, tcg_op); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t = read_fp_sreg(s, a->rn); + f->gen_s(t, t); + if (merging) { + write_fp_sreg_merging(s, a->rd, a->rd, t); } else { - write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + write_fp_sreg(s, a->rd, t); } } - - if (!scalar) { - clear_vec_high(s, is_q, rd); + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; } + if (fp_access_check(s)) { + TCGv_i32 t = read_fp_hreg(s, a->rn); + f->gen_h(t, t); + if (merging) { + write_fp_hreg_merging(s, a->rd, a->rd, t); + } else { + write_fp_sreg(s, a->rd, t); + } + } + break; + default: + return false; } + return true; } -/* Common vector code for handling integer to FP conversion */ -static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, - int elements, int is_signed, - int fracbits, int size) +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, + const FPScalar1Int *fnormal, + const FPScalar1Int *fah) { - TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - TCGv_i32 tcg_shift = NULL; + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); +} - MemOp mop = size | (is_signed ? MO_SIGN : 0); - int pass; +static const FPScalar1Int f_scalar_fmov = { + tcg_gen_mov_i32, + tcg_gen_mov_i32, + tcg_gen_mov_i64, +}; +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) - if (fracbits || size == MO_64) { - tcg_shift = tcg_constant_i32(fracbits); - } +static const FPScalar1Int f_scalar_fabs = { + gen_vfp_absh, + gen_vfp_abss, + gen_vfp_absd, +}; +static const FPScalar1Int f_scalar_ah_fabs = { + gen_vfp_ah_absh, + gen_vfp_ah_abss, + gen_vfp_ah_absd, +}; +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) - if (size == MO_64) { - TCGv_i64 tcg_int64 = tcg_temp_new_i64(); - TCGv_i64 tcg_double = tcg_temp_new_i64(); +static const FPScalar1Int f_scalar_fneg = { + gen_vfp_negh, + gen_vfp_negs, + gen_vfp_negd, +}; +static const FPScalar1Int f_scalar_ah_fneg = { + gen_vfp_ah_negh, + gen_vfp_ah_negs, + gen_vfp_ah_negd, +}; +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) - for (pass = 0; pass < elements; pass++) { - read_vec_element(s, tcg_int64, rn, pass, mop); +typedef struct FPScalar1 { + void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); +} FPScalar1; - if (is_signed) { - gen_helper_vfp_sqtod(tcg_double, tcg_int64, - tcg_shift, tcg_fpst); - } else { - gen_helper_vfp_uqtod(tcg_double, tcg_int64, - tcg_shift, tcg_fpst); - } - if (elements == 1) { - write_fp_dreg(s, rd, tcg_double); - } else { - write_vec_element(s, tcg_double, rd, pass, MO_64); - } - } - } else { - TCGv_i32 tcg_int32 = tcg_temp_new_i32(); - TCGv_i32 tcg_float = tcg_temp_new_i32(); - - for (pass = 0; pass < elements; pass++) { - read_vec_element_i32(s, tcg_int32, rn, pass, mop); - - switch (size) { - case MO_32: - if (fracbits) { - if (is_signed) { - gen_helper_vfp_sltos(tcg_float, tcg_int32, - tcg_shift, tcg_fpst); - } else { - gen_helper_vfp_ultos(tcg_float, tcg_int32, - tcg_shift, tcg_fpst); - } - } else { - if (is_signed) { - gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); - } else { - gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); - } - } - break; - case MO_16: - if (fracbits) { - if (is_signed) { - gen_helper_vfp_sltoh(tcg_float, tcg_int32, - tcg_shift, tcg_fpst); - } else { - gen_helper_vfp_ultoh(tcg_float, tcg_int32, - tcg_shift, tcg_fpst); - } - } else { - if (is_signed) { - gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); - } else { - gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); - } - } - break; - default: - g_assert_not_reached(); - } +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode, + ARMFPStatusFlavour fpsttype) +{ + TCGv_i32 tcg_rmode = NULL; + TCGv_ptr fpst; + TCGv_i64 t64; + TCGv_i32 t32; + int check = fp_access_check_scalar_hsd(s, a->esz); - if (elements == 1) { - write_fp_sreg(s, rd, tcg_float); - } else { - write_vec_element_i32(s, tcg_float, rd, pass, size); - } - } + if (check <= 0) { + return check == 0; } - clear_vec_high(s, elements << size == 16, rd); -} - -/* UCVTF/SCVTF - Integer to FP conversion */ -static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int opcode, - int rn, int rd) -{ - int size, elements, fracbits; - int immhb = immh << 3 | immb; - - if (immh & 8) { - size = MO_64; - if (!is_scalar && !is_q) { - unallocated_encoding(s); - return; - } - } else if (immh & 4) { - size = MO_32; - } else if (immh & 2) { - size = MO_16; - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - } else { - /* immh == 0 would be a failure of the decode logic */ - g_assert(immh == 1); - unallocated_encoding(s); - return; + fpst = fpstatus_ptr(fpsttype); + if (rmode >= 0) { + tcg_rmode = gen_set_rmode(rmode, fpst); } - if (is_scalar) { - elements = 1; - } else { - elements = (8 << is_q) >> size; + switch (a->esz) { + case MO_64: + t64 = read_fp_dreg(s, a->rn); + f->gen_d(t64, t64, fpst); + write_fp_dreg_merging(s, a->rd, a->rd, t64); + break; + case MO_32: + t32 = read_fp_sreg(s, a->rn); + f->gen_s(t32, t32, fpst); + write_fp_sreg_merging(s, a->rd, a->rd, t32); + break; + case MO_16: + t32 = read_fp_hreg(s, a->rn); + f->gen_h(t32, t32, fpst); + write_fp_hreg_merging(s, a->rd, a->rd, t32); + break; + default: + g_assert_not_reached(); } - fracbits = (16 << size) - immhb; - if (!fp_access_check(s)) { - return; + if (rmode >= 0) { + gen_restore_rmode(tcg_rmode, fpst); } + return true; +} - handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode) +{ + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); } -/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ -static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int rn, int rd) +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode) { - int immhb = immh << 3 | immb; - int pass, size, fracbits; - TCGv_ptr tcg_fpstatus; - TCGv_i32 tcg_rmode, tcg_shift; + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); +} - if (immh & 0x8) { - size = MO_64; - if (!is_scalar && !is_q) { - unallocated_encoding(s); - return; - } - } else if (immh & 0x4) { - size = MO_32; - } else if (immh & 0x2) { - size = MO_16; - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - } else { - /* Should have split out AdvSIMD modified immediate earlier. */ - assert(immh == 1); - unallocated_encoding(s); - return; - } +static const FPScalar1 f_scalar_fsqrt = { + gen_helper_vfp_sqrth, + gen_helper_vfp_sqrts, + gen_helper_vfp_sqrtd, +}; +TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) - if (!fp_access_check(s)) { - return; - } +static const FPScalar1 f_scalar_frint = { + gen_helper_advsimd_rinth, + gen_helper_rints, + gen_helper_rintd, +}; +TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) +TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) +TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) +TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) +TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) +TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) + +static const FPScalar1 f_scalar_frintx = { + gen_helper_advsimd_rinth_exact, + gen_helper_rints_exact, + gen_helper_rintd_exact, +}; +TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) - assert(!(is_scalar && is_q)); +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) +{ + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; + TCGv_i32 t32; + int check; - tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); - fracbits = (16 << size) - immhb; - tcg_shift = tcg_constant_i32(fracbits); + if (!dc_isar_feature(aa64_bf16, s)) { + return false; + } - if (size == MO_64) { - int maxpass = is_scalar ? 1 : 2; + check = fp_access_check_scalar_hsd(s, a->esz); - for (pass = 0; pass < maxpass; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); + if (check <= 0) { + return check == 0; + } - read_vec_element(s, tcg_op, rn, pass, MO_64); - if (is_u) { - gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); - } - write_vec_element(s, tcg_op, rd, pass, MO_64); - } - clear_vec_high(s, is_q, rd); - } else { - void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); - int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); + t32 = read_fp_sreg(s, a->rn); + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); + write_fp_hreg_merging(s, a->rd, a->rd, t32); + return true; +} - switch (size) { - case MO_16: - if (is_u) { - fn = gen_helper_vfp_touhh; - } else { - fn = gen_helper_vfp_toshh; - } - break; - case MO_32: - if (is_u) { - fn = gen_helper_vfp_touls; - } else { - fn = gen_helper_vfp_tosls; - } - break; - default: - g_assert_not_reached(); - } +static const FPScalar1 f_scalar_frint32 = { + NULL, + gen_helper_frint32_s, + gen_helper_frint32_d, +}; +TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, + &f_scalar_frint32, FPROUNDING_ZERO) +TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) + +static const FPScalar1 f_scalar_frint64 = { + NULL, + gen_helper_frint64_s, + gen_helper_frint64_d, +}; +TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, + &f_scalar_frint64, FPROUNDING_ZERO) +TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) + +static const FPScalar1 f_scalar_frecpe = { + gen_helper_recpe_f16, + gen_helper_recpe_f32, + gen_helper_recpe_f64, +}; +static const FPScalar1 f_scalar_frecpe_rpres = { + gen_helper_recpe_f16, + gen_helper_recpe_rpres_f32, + gen_helper_recpe_f64, +}; +TRANS(FRECPE_s, do_fp1_scalar_ah, a, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) + +static const FPScalar1 f_scalar_frecpx = { + gen_helper_frecpx_f16, + gen_helper_frecpx_f32, + gen_helper_frecpx_f64, +}; +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); +static const FPScalar1 f_scalar_frsqrte = { + gen_helper_rsqrte_f16, + gen_helper_rsqrte_f32, + gen_helper_rsqrte_f64, +}; +static const FPScalar1 f_scalar_frsqrte_rpres = { + gen_helper_rsqrte_f16, + gen_helper_rsqrte_rpres_f32, + gen_helper_rsqrte_f64, +}; +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) - read_vec_element_i32(s, tcg_op, rn, pass, size); - fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); - if (is_scalar) { - if (size == MO_16 && !is_u) { - tcg_gen_ext16u_i32(tcg_op, tcg_op); - } - write_fp_sreg(s, rd, tcg_op); - } else { - write_vec_element_i32(s, tcg_op, rd, pass, size); - } - } - if (!is_scalar) { - clear_vec_high(s, is_q, rd); - } - } +static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); - gen_restore_rmode(tcg_rmode, tcg_fpstatus); + gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); + } + return true; } -/* AdvSIMD scalar shift by immediate - * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 - * +-----+---+-------------+------+------+--------+---+------+------+ - * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | - * +-----+---+-------------+------+------+--------+---+------+------+ - * - * This is the scalar version so it works on a fixed sized registers - */ -static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) +static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) { - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 5); - int immb = extract32(insn, 16, 3); - int immh = extract32(insn, 19, 4); - bool is_u = extract32(insn, 29, 1); + if (fp_access_check(s)) { + TCGv_i32 tmp = read_fp_sreg(s, a->rn); + TCGv_i32 ahp = get_ahp_flag(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); - if (immh == 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0x08: /* SRI */ - if (!is_u) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x00: /* SSHR / USHR */ - case 0x02: /* SSRA / USRA */ - case 0x04: /* SRSHR / URSHR */ - case 0x06: /* SRSRA / URSRA */ - handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); - break; - case 0x0a: /* SHL / SLI */ - handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); - break; - case 0x1c: /* SCVTF, UCVTF */ - handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, - opcode, rn, rd); - break; - case 0x10: /* SQSHRUN, SQSHRUN2 */ - case 0x11: /* SQRSHRUN, SQRSHRUN2 */ - if (!is_u) { - unallocated_encoding(s); - return; - } - handle_vec_simd_sqshrn(s, true, false, false, true, - immh, immb, opcode, rn, rd); - break; - case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ - case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ - handle_vec_simd_sqshrn(s, true, false, is_u, is_u, - immh, immb, opcode, rn, rd); - break; - case 0xc: /* SQSHLU */ - if (!is_u) { - unallocated_encoding(s); - return; - } - handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); - break; - case 0xe: /* SQSHL, UQSHL */ - handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); - break; - case 0x1f: /* FCVTZS, FCVTZU */ - handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); - break; - default: - unallocated_encoding(s); - break; + gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); + /* write_fp_hreg_merging is OK here because top half of result is zero */ + write_fp_hreg_merging(s, a->rd, a->rd, tmp); } + return true; } -/* AdvSIMD scalar three different - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +-----+---+-----------+------+---+------+--------+-----+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | - * +-----+---+-----------+------+---+------+--------+-----+------+------+ - */ -static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) +static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) { - bool is_u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 4); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); - if (is_u) { - unallocated_encoding(s); - return; + gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); } + return true; +} - switch (opcode) { - case 0x9: /* SQDMLAL, SQDMLAL2 */ - case 0xb: /* SQDMLSL, SQDMLSL2 */ - case 0xd: /* SQDMULL, SQDMULL2 */ - if (size == 0 || size == 3) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; - } +static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + TCGv_i32 ahp = get_ahp_flag(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); - if (!fp_access_check(s)) { - return; + gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); } + return true; +} - if (size == 2) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); - read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); - - tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res); - - switch (opcode) { - case 0xd: /* SQDMULL, SQDMULL2 */ - break; - case 0xb: /* SQDMLSL, SQDMLSL2 */ - tcg_gen_neg_i64(tcg_res, tcg_res); - /* fall through */ - case 0x9: /* SQDMLAL, SQDMLAL2 */ - read_vec_element(s, tcg_op1, rd, 0, MO_64); - gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, - tcg_res, tcg_op1); - break; - default: - g_assert_not_reached(); - } - - write_fp_dreg(s, rd, tcg_res); - } else { - TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); - TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res); - - switch (opcode) { - case 0xd: /* SQDMULL, SQDMULL2 */ - break; - case 0xb: /* SQDMLSL, SQDMLSL2 */ - gen_helper_neon_negl_u32(tcg_res, tcg_res); - /* fall through */ - case 0x9: /* SQDMLAL, SQDMLAL2 */ - { - TCGv_i64 tcg_op3 = tcg_temp_new_i64(); - read_vec_element(s, tcg_op3, rd, 0, MO_32); - gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, - tcg_res, tcg_op3); - break; - } - default: - g_assert_not_reached(); - } +static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); + TCGv_i32 tcg_ahp = get_ahp_flag(); - tcg_gen_ext32u_i64(tcg_res, tcg_res); - write_fp_dreg(s, rd, tcg_res); + gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); } + return true; } -/* AdvSIMD scalar three same extra - * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 - * +-----+---+-----------+------+---+------+---+--------+---+----+----+ - * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | - * +-----+---+-----------+------+---+------+---+--------+---+----+----+ - */ -static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, - uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 4); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool u = extract32(insn, 29, 1); - TCGv_i32 ele1, ele2, ele3; - TCGv_i64 res; - bool feature; - - switch (u * 16 + opcode) { - case 0x10: /* SQRDMLAH (vector) */ - case 0x11: /* SQRDMLSH (vector) */ - if (size != 1 && size != 2) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_rdm, s); - break; - default: - unallocated_encoding(s); - return; - } - if (!feature) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } +static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); + TCGv_i32 tcg_ahp = get_ahp_flag(); - /* Do a single operation on the lowest element in the vector. - * We use the standard Neon helpers and rely on 0 OP 0 == 0 - * with no side effects for all these operations. - * OPTME: special-purpose helpers would avoid doing some - * unnecessary work in the helper for the 16 bit cases. - */ - ele1 = tcg_temp_new_i32(); - ele2 = tcg_temp_new_i32(); - ele3 = tcg_temp_new_i32(); - - read_vec_element_i32(s, ele1, rn, 0, size); - read_vec_element_i32(s, ele2, rm, 0, size); - read_vec_element_i32(s, ele3, rd, 0, size); - - switch (opcode) { - case 0x0: /* SQRDMLAH */ - if (size == 1) { - gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3); - } else { - gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3); - } - break; - case 0x1: /* SQRDMLSH */ - if (size == 1) { - gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3); - } else { - gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3); - } - break; - default: - g_assert_not_reached(); + gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); } - - res = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(res, ele3); - write_fp_dreg(s, rd, res); + return true; } -static void handle_2misc_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, - TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) +static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, + TCGv_i64 tcg_int, bool is_signed) { - /* Handle 64->64 opcodes which are shared between the scalar and - * vector 2-reg-misc groups. We cover every integer opcode where size == 3 - * is valid in either group and also the double-precision fp ops. - * The caller only need provide tcg_rmode and tcg_fpstatus if the op - * requires them. - */ - TCGCond cond; + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_shift, tcg_single; + TCGv_i64 tcg_double; + + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + tcg_shift = tcg_constant_i32(shift); - switch (opcode) { - case 0x4: /* CLS, CLZ */ - if (u) { - tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); + switch (esz) { + case MO_64: + tcg_double = tcg_temp_new_i64(); + if (is_signed) { + gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); } else { - tcg_gen_clrsb_i64(tcg_rd, tcg_rn); + gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); } + write_fp_dreg_merging(s, rd, rd, tcg_double); break; - case 0x5: /* NOT */ - /* This opcode is shared with CNT and RBIT but we have earlier - * enforced that size == 3 if and only if this is the NOT insn. - */ - tcg_gen_not_i64(tcg_rd, tcg_rn); - break; - case 0x7: /* SQABS, SQNEG */ - if (u) { - gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); + + case MO_32: + tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } else { - gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); + gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } + write_fp_sreg_merging(s, rd, rd, tcg_single); break; - case 0xa: /* CMLT */ - cond = TCG_COND_LT; - do_cmop: - /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ - tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); - break; - case 0x8: /* CMGT, CMGE */ - cond = u ? TCG_COND_GE : TCG_COND_GT; - goto do_cmop; - case 0x9: /* CMEQ, CMLE */ - cond = u ? TCG_COND_LE : TCG_COND_EQ; - goto do_cmop; - case 0xb: /* ABS, NEG */ - if (u) { - tcg_gen_neg_i64(tcg_rd, tcg_rn); + + case MO_16: + tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } else { - tcg_gen_abs_i64(tcg_rd, tcg_rn); + gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } + write_fp_hreg_merging(s, rd, rd, tcg_single); break; - case 0x2f: /* FABS */ - gen_vfp_absd(tcg_rd, tcg_rn); - break; - case 0x6f: /* FNEG */ - gen_vfp_negd(tcg_rd, tcg_rn); - break; - case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); - break; - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); - break; - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); - break; - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ - case 0x58: /* FRINTA */ - case 0x79: /* FRINTI */ - gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); - break; - case 0x59: /* FRINTX */ - gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); - break; - case 0x1e: /* FRINT32Z */ - case 0x5e: /* FRINT32X */ - gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); - break; - case 0x1f: /* FRINT64Z */ - case 0x5f: /* FRINT64X */ - gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); - break; + default: g_assert_not_reached(); } + return true; } -static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) +static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) { - bool is_double = (size == MO_64); - TCGv_ptr fpst; + TCGv_i64 tcg_int; + int check = fp_access_check_scalar_hsd(s, a->esz); - if (!fp_access_check(s)) { - return; + if (check <= 0) { + return check == 0; } - fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - - if (is_double) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_zero = tcg_constant_i64(0); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - NeonGenTwoDoubleOpFn *genfn; - bool swap = false; - int pass; - - switch (opcode) { - case 0x2e: /* FCMLT (zero) */ - swap = true; - /* fallthrough */ - case 0x2c: /* FCMGT (zero) */ - genfn = gen_helper_neon_cgt_f64; - break; - case 0x2d: /* FCMEQ (zero) */ - genfn = gen_helper_neon_ceq_f64; - break; - case 0x6d: /* FCMLE (zero) */ - swap = true; - /* fall through */ - case 0x6c: /* FCMGE (zero) */ - genfn = gen_helper_neon_cge_f64; - break; - default: - g_assert_not_reached(); - } - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - read_vec_element(s, tcg_op, rn, pass, MO_64); - if (swap) { - genfn(tcg_res, tcg_zero, tcg_op, fpst); - } else { - genfn(tcg_res, tcg_op, tcg_zero, fpst); - } - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - - clear_vec_high(s, !is_scalar, rd); + if (a->sf) { + tcg_int = cpu_reg(s, a->rn); } else { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_zero = tcg_constant_i32(0); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - NeonGenTwoSingleOpFn *genfn; - bool swap = false; - int pass, maxpasses; - - if (size == MO_16) { - switch (opcode) { - case 0x2e: /* FCMLT (zero) */ - swap = true; - /* fall through */ - case 0x2c: /* FCMGT (zero) */ - genfn = gen_helper_advsimd_cgt_f16; - break; - case 0x2d: /* FCMEQ (zero) */ - genfn = gen_helper_advsimd_ceq_f16; - break; - case 0x6d: /* FCMLE (zero) */ - swap = true; - /* fall through */ - case 0x6c: /* FCMGE (zero) */ - genfn = gen_helper_advsimd_cge_f16; - break; - default: - g_assert_not_reached(); - } - } else { - switch (opcode) { - case 0x2e: /* FCMLT (zero) */ - swap = true; - /* fall through */ - case 0x2c: /* FCMGT (zero) */ - genfn = gen_helper_neon_cgt_f32; - break; - case 0x2d: /* FCMEQ (zero) */ - genfn = gen_helper_neon_ceq_f32; - break; - case 0x6d: /* FCMLE (zero) */ - swap = true; - /* fall through */ - case 0x6c: /* FCMGE (zero) */ - genfn = gen_helper_neon_cge_f32; - break; - default: - g_assert_not_reached(); - } - } - - if (is_scalar) { - maxpasses = 1; + tcg_int = read_cpu_reg(s, a->rn, true); + if (is_signed) { + tcg_gen_ext32s_i64(tcg_int, tcg_int); } else { - int vector_size = 8 << is_q; - maxpasses = vector_size >> size; - } - - for (pass = 0; pass < maxpasses; pass++) { - read_vec_element_i32(s, tcg_op, rn, pass, size); - if (swap) { - genfn(tcg_res, tcg_zero, tcg_op, fpst); - } else { - genfn(tcg_res, tcg_op, tcg_zero, fpst); - } - if (is_scalar) { - write_fp_sreg(s, rd, tcg_res); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, size); - } - } - - if (!is_scalar) { - clear_vec_high(s, is_q, rd); + tcg_gen_ext32u_i64(tcg_int, tcg_int); } } + return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); } -static void handle_2misc_reciprocal(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) -{ - bool is_double = (size == 3); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); +TRANS(SCVTF_g, do_cvtf_g, a, true) +TRANS(UCVTF_g, do_cvtf_g, a, false) - if (is_double) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - int pass; - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - read_vec_element(s, tcg_op, rn, pass, MO_64); - switch (opcode) { - case 0x3d: /* FRECPE */ - gen_helper_recpe_f64(tcg_res, tcg_op, fpst); - break; - case 0x3f: /* FRECPX */ - gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); - break; - case 0x7d: /* FRSQRTE */ - gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); - break; - default: - g_assert_not_reached(); - } - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - clear_vec_high(s, !is_scalar, rd); - } else { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - int pass, maxpasses; - - if (is_scalar) { - maxpasses = 1; - } else { - maxpasses = is_q ? 4 : 2; - } - - for (pass = 0; pass < maxpasses; pass++) { - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); - - switch (opcode) { - case 0x3c: /* URECPE */ - gen_helper_recpe_u32(tcg_res, tcg_op); - break; - case 0x3d: /* FRECPE */ - gen_helper_recpe_f32(tcg_res, tcg_op, fpst); - break; - case 0x3f: /* FRECPX */ - gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); - break; - case 0x7d: /* FRSQRTE */ - gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); - break; - default: - g_assert_not_reached(); - } +/* + * [US]CVTF (vector), scalar version. + * Which sounds weird, but really just means input from fp register + * instead of input from general register. Input and output element + * size are always equal. + */ +static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) +{ + TCGv_i64 tcg_int; + int check = fp_access_check_scalar_hsd(s, a->esz); - if (is_scalar) { - write_fp_sreg(s, rd, tcg_res); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } - } - if (!is_scalar) { - clear_vec_high(s, is_q, rd); - } + if (check <= 0) { + return check == 0; } -} -static void handle_2misc_narrow(DisasContext *s, bool scalar, - int opcode, bool u, bool is_q, - int size, int rn, int rd) -{ - /* Handle 2-reg-misc ops which are narrowing (so each 2*size element - * in the source becomes a size element in the destination). - */ - int pass; - TCGv_i32 tcg_res[2]; - int destelt = is_q ? 2 : 0; - int passes = scalar ? 1 : 2; + tcg_int = tcg_temp_new_i64(); + read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); + return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); +} - if (scalar) { - tcg_res[1] = tcg_constant_i32(0); - } +TRANS(SCVTF_f, do_cvtf_f, a, true) +TRANS(UCVTF_f, do_cvtf_f, a, false) - for (pass = 0; pass < passes; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - NeonGenNarrowFn *genfn = NULL; - NeonGenNarrowEnvFn *genenvfn = NULL; +static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, + TCGv_i64 tcg_out, int shift, int rn, + ARMFPRounding rmode) +{ + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_shift, tcg_rmode, tcg_single; - if (scalar) { - read_vec_element(s, tcg_op, rn, pass, size + 1); - } else { - read_vec_element(s, tcg_op, rn, pass, MO_64); - } - tcg_res[pass] = tcg_temp_new_i32(); + tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + tcg_shift = tcg_constant_i32(shift); + tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); - switch (opcode) { - case 0x12: /* XTN, SQXTUN */ - { - static NeonGenNarrowFn * const xtnfns[3] = { - gen_helper_neon_narrow_u8, - gen_helper_neon_narrow_u16, - tcg_gen_extrl_i64_i32, - }; - static NeonGenNarrowEnvFn * const sqxtunfns[3] = { - gen_helper_neon_unarrow_sat8, - gen_helper_neon_unarrow_sat16, - gen_helper_neon_unarrow_sat32, - }; - if (u) { - genenvfn = sqxtunfns[size]; - } else { - genfn = xtnfns[size]; - } - break; - } - case 0x14: /* SQXTN, UQXTN */ - { - static NeonGenNarrowEnvFn * const fns[3][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_narrow_sat_u8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_narrow_sat_u16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_narrow_sat_u32 }, - }; - genenvfn = fns[size][u]; + switch (esz) { + case MO_64: + read_vec_element(s, tcg_out, rn, 0, MO_64); + switch (out) { + case MO_64 | MO_SIGN: + gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); break; - } - case 0x16: /* FCVTN, FCVTN2 */ - /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ - if (size == 2) { - gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env); - } else { - TCGv_i32 tcg_lo = tcg_temp_new_i32(); - TCGv_i32 tcg_hi = tcg_temp_new_i32(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - TCGv_i32 ahp = get_ahp_flag(); - - tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); - gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); - gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); - tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); - } + case MO_64: + gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); break; - case 0x36: /* BFCVTN, BFCVTN2 */ - { - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); - } + case MO_32 | MO_SIGN: + gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); break; - case 0x56: /* FCVTXN, FCVTXN2 */ - /* 64 bit to 32 bit float conversion - * with von Neumann rounding (round to odd) - */ - assert(size == 2); - gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env); + case MO_32: + gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); break; default: g_assert_not_reached(); } - - if (genfn) { - genfn(tcg_res[pass], tcg_op); - } else if (genenvfn) { - genenvfn(tcg_res[pass], tcg_env, tcg_op); - } - } - - for (pass = 0; pass < 2; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); - } - clear_vec_high(s, is_q, rd); -} - -/* AdvSIMD scalar two reg misc - * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - */ -static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 12, 5); - int size = extract32(insn, 22, 2); - bool u = extract32(insn, 29, 1); - bool is_fcvt = false; - int rmode; - TCGv_i32 tcg_rmode; - TCGv_ptr tcg_fpstatus; - - switch (opcode) { - case 0x7: /* SQABS / SQNEG */ - break; - case 0xa: /* CMLT */ - if (u) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x8: /* CMGT, CMGE */ - case 0x9: /* CMEQ, CMLE */ - case 0xb: /* ABS, NEG */ - if (size != 3) { - unallocated_encoding(s); - return; - } break; - case 0x12: /* SQXTUN */ - if (!u) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x14: /* SQXTN, UQXTN */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); - return; - case 0xc ... 0xf: - case 0x16 ... 0x1d: - case 0x1f: - /* Floating point: U, size[1] and opcode indicate operation; - * size[0] indicates single or double precision. - */ - opcode |= (extract32(size, 1, 1) << 5) | (u << 6); - size = extract32(size, 0, 1) ? 3 : 2; - switch (opcode) { - case 0x2c: /* FCMGT (zero) */ - case 0x2d: /* FCMEQ (zero) */ - case 0x2e: /* FCMLT (zero) */ - case 0x6c: /* FCMGE (zero) */ - case 0x6d: /* FCMLE (zero) */ - handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); - return; - case 0x1d: /* SCVTF */ - case 0x5d: /* UCVTF */ - { - bool is_signed = (opcode == 0x1d); - if (!fp_access_check(s)) { - return; - } - handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); - return; - } - case 0x3d: /* FRECPE */ - case 0x3f: /* FRECPX */ - case 0x7d: /* FRSQRTE */ - if (!fp_access_check(s)) { - return; - } - handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); - return; - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - is_fcvt = true; - rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + + case MO_32: + tcg_single = read_fp_sreg(s, rn); + switch (out) { + case MO_64 | MO_SIGN: + gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); break; - case 0x1c: /* FCVTAS */ - case 0x5c: /* FCVTAU */ - /* TIEAWAY doesn't fit in the usual rounding mode encoding */ - is_fcvt = true; - rmode = FPROUNDING_TIEAWAY; + case MO_64: + gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); + break; + case MO_32 | MO_SIGN: + gen_helper_vfp_tosls(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); + break; + case MO_32: + gen_helper_vfp_touls(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); break; - case 0x56: /* FCVTXN, FCVTXN2 */ - if (size == 2) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); - return; default: - unallocated_encoding(s); - return; + g_assert_not_reached(); } break; - default: - case 0x3: /* USQADD / SUQADD */ - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - - if (is_fcvt) { - tcg_fpstatus = fpstatus_ptr(FPST_FPCR); - tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); - } else { - tcg_fpstatus = NULL; - tcg_rmode = NULL; - } - - if (size == 3) { - TCGv_i64 tcg_rn = read_fp_dreg(s, rn); - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - - handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); - write_fp_dreg(s, rd, tcg_rd); - } else { - TCGv_i32 tcg_rn = tcg_temp_new_i32(); - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_rn, rn, 0, size); - - switch (opcode) { - case 0x7: /* SQABS, SQNEG */ - { - NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, - { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, - }; - genfn = fns[size][u]; - genfn(tcg_rd, tcg_env, tcg_rn); + case MO_16: + tcg_single = read_fp_hreg(s, rn); + switch (out) { + case MO_64 | MO_SIGN: + gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); break; - } - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), - tcg_fpstatus); + case MO_64: + gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); + break; + case MO_32 | MO_SIGN: + gen_helper_vfp_toslh(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); + break; + case MO_32: + gen_helper_vfp_toulh(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); break; - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), - tcg_fpstatus); + case MO_16 | MO_SIGN: + gen_helper_vfp_toshh(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); + break; + case MO_16: + gen_helper_vfp_touhh(tcg_single, tcg_single, + tcg_shift, tcg_fpstatus); + tcg_gen_extu_i32_i64(tcg_out, tcg_single); break; default: g_assert_not_reached(); } + break; - write_fp_sreg(s, rd, tcg_rd); + default: + g_assert_not_reached(); } - if (is_fcvt) { - gen_restore_rmode(tcg_rmode, tcg_fpstatus); - } + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } -/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ -static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, - int immh, int immb, int opcode, int rn, int rd) +static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, + ARMFPRounding rmode, bool is_signed) { - int size = 32 - clz32(immh) - 1; - int immhb = immh << 3 | immb; - int shift = 2 * (8 << size) - immhb; - GVecGen2iFn *gvec_fn; + TCGv_i64 tcg_int; + int check = fp_access_check_scalar_hsd(s, a->esz); - if (extract32(immh, 3, 1) && !is_q) { - unallocated_encoding(s); - return; + if (check <= 0) { + return check == 0; } - tcg_debug_assert(size <= 3); - if (!fp_access_check(s)) { - return; - } + tcg_int = cpu_reg(s, a->rd); + do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), + a->esz, tcg_int, a->shift, a->rn, rmode); - switch (opcode) { - case 0x02: /* SSRA / USRA (accumulate) */ - gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; - break; + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_int, tcg_int); + } + return true; +} - case 0x08: /* SRI */ - gvec_fn = gen_gvec_sri; - break; +TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) +TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) +TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) +TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) +TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) +TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) +TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) +TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) +TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) +TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) - case 0x00: /* SSHR / USHR */ - if (is_u) { - if (shift == 8 << size) { - /* Shift count the same size as element size produces zero. */ - tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), 0); - return; - } - gvec_fn = tcg_gen_gvec_shri; - } else { - /* Shift count the same size as element size produces all sign. */ - if (shift == 8 << size) { - shift -= 1; - } - gvec_fn = tcg_gen_gvec_sari; - } - break; +/* + * FCVT* (vector), scalar version. + * Which sounds weird, but really just means output to fp register + * instead of output to general register. Input and output element + * size are always equal. + */ +static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, + ARMFPRounding rmode, bool is_signed) +{ + TCGv_i64 tcg_int; + int check = fp_access_check_scalar_hsd(s, a->esz); - case 0x04: /* SRSHR / URSHR (rounding) */ - gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; - break; + if (check <= 0) { + return check == 0; + } - case 0x06: /* SRSRA / URSRA (accum + rounding) */ - gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; - break; + tcg_int = tcg_temp_new_i64(); + do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), + a->esz, tcg_int, a->shift, a->rn, rmode); - default: - g_assert_not_reached(); + if (!s->fpcr_nep) { + clear_vec(s, a->rd); } - - gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); + write_vec_element(s, tcg_int, a->rd, 0, a->esz); + return true; } -/* SHL/SLI - Vector shift left */ -static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, - int immh, int immb, int opcode, int rn, int rd) -{ - int size = 32 - clz32(immh) - 1; - int immhb = immh << 3 | immb; - int shift = immhb - (8 << size); - - /* Range of size is limited by decode: immh is a non-zero 4 bit field */ - assert(size >= 0 && size <= 3); +TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) +TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) +TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) +TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) +TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) +TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) +TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) +TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) +TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) +TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) - if (extract32(immh, 3, 1) && !is_q) { - unallocated_encoding(s); - return; +static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) +{ + if (!dc_isar_feature(aa64_jscvt, s)) { + return false; } + if (fp_access_check(s)) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); - if (!fp_access_check(s)) { - return; - } + gen_helper_fjcvtzs(t, t, fpstatus); - if (insert) { - gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); - } else { - gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); + tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); + tcg_gen_extrh_i64_i32(cpu_ZF, t); + tcg_gen_movi_i32(cpu_CF, 0); + tcg_gen_movi_i32(cpu_NF, 0); + tcg_gen_movi_i32(cpu_VF, 0); } + return true; } -/* USHLL/SHLL - Vector shift left with widening */ -static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, - int immh, int immb, int opcode, int rn, int rd) +static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) { - int size = 32 - clz32(immh) - 1; - int immhb = immh << 3 | immb; - int shift = immhb - (8 << size); - int dsize = 64; - int esize = 8 << size; - int elements = dsize/esize; - TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - int i; - - if (size >= 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; + if (!dc_isar_feature(aa64_fp16, s)) { + return false; } - - /* For the LL variants the store is larger than the load, - * so if rd == rn we would overwrite parts of our input. - * So load everything right now and use shifts in the main loop. - */ - read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); - - for (i = 0; i < elements; i++) { - tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); - ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); - tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); - write_vec_element(s, tcg_rd, rd, i, size + 1); + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext16u_i64(tmp, tcg_rn); + write_fp_dreg(s, a->rd, tmp); } + return true; } -/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ -static void handle_vec_simd_shrn(DisasContext *s, bool is_q, - int immh, int immb, int opcode, int rn, int rd) +static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) { - int immhb = immh << 3 | immb; - int size = 32 - clz32(immh) - 1; - int dsize = 64; - int esize = 8 << size; - int elements = dsize/esize; - int shift = (2 * esize) - immhb; - bool round = extract32(opcode, 0, 1); - TCGv_i64 tcg_rn, tcg_rd, tcg_final; - TCGv_i64 tcg_round; - int i; - - if (extract32(immh, 3, 1)) { - unallocated_encoding(s); - return; + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(tmp, tcg_rn); + write_fp_dreg(s, a->rd, tmp); } + return true; +} - if (!fp_access_check(s)) { - return; +static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + write_fp_dreg(s, a->rd, tcg_rn); } + return true; +} - tcg_rn = tcg_temp_new_i64(); - tcg_rd = tcg_temp_new_i64(); - tcg_final = tcg_temp_new_i64(); - read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); - - if (round) { - tcg_round = tcg_constant_i64(1ULL << (shift - 1)); - } else { - tcg_round = NULL; +static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); + clear_vec_high(s, true, a->rd); } + return true; +} - for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, size+1); - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - false, true, size+1, shift); - - tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); +static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) +{ + if (!dc_isar_feature(aa64_fp16, s)) { + return false; } - - if (!is_q) { - write_vec_element(s, tcg_final, rd, 0, MO_64); - } else { - write_vec_element(s, tcg_final, rd, 1, MO_64); + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); } - - clear_vec_high(s, is_q, rd); + return true; } - -/* AdvSIMD shift by immediate - * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 - * +---+---+---+-------------+------+------+--------+---+------+------+ - * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | - * +---+---+---+-------------+------+------+--------+---+------+------+ - */ -static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 5); - int immb = extract32(insn, 16, 3); - int immh = extract32(insn, 19, 4); - bool is_u = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - - /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ - assert(immh != 0); - - switch (opcode) { - case 0x08: /* SRI */ - if (!is_u) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x00: /* SSHR / USHR */ - case 0x02: /* SSRA / USRA (accumulate) */ - case 0x04: /* SRSHR / URSHR (rounding) */ - case 0x06: /* SRSRA / URSRA (accum + rounding) */ - handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); - break; - case 0x0a: /* SHL / SLI */ - handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); - break; - case 0x10: /* SHRN */ - case 0x11: /* RSHRN / SQRSHRUN */ - if (is_u) { - handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, - opcode, rn, rd); - } else { - handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); - } - break; - case 0x12: /* SQSHRN / UQSHRN */ - case 0x13: /* SQRSHRN / UQRSHRN */ - handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, - opcode, rn, rd); - break; - case 0x14: /* SSHLL / USHLL */ - handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); - break; - case 0x1c: /* SCVTF / UCVTF */ - handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, - opcode, rn, rd); - break; - case 0xc: /* SQSHLU */ - if (!is_u) { - unallocated_encoding(s); - return; - } - handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); - break; - case 0xe: /* SQSHL, UQSHL */ - handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); - break; - case 0x1f: /* FCVTZS/ FCVTZU */ - handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); - return; - default: - unallocated_encoding(s); - return; +static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) +{ + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); } + return true; } -/* Generate code to do a "long" addition or subtraction, ie one done in - * TCGv_i64 on vector lanes twice the width specified by size. - */ -static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, - TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) +static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) { - static NeonGenTwo64OpFn * const fns[3][2] = { - { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, - { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, - { tcg_gen_add_i64, tcg_gen_sub_i64 }, - }; - NeonGenTwo64OpFn *genfn; - assert(size < 3); - - genfn = fns[size][is_sub]; - genfn(tcg_res, tcg_op1, tcg_op2); + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); + } + return true; } -static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, - int opcode, int rd, int rn, int rm) +static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) { - /* 3-reg-different widening insns: 64 x 64 -> 128 */ - TCGv_i64 tcg_res[2]; - int pass, accop; + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); + } + return true; +} - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); +typedef struct ENVScalar1 { + NeonGenOneOpEnvFn *gen_bhs[3]; + NeonGenOne64OpEnvFn *gen_d; +} ENVScalar1; - /* Does this op do an adding accumulate, a subtracting accumulate, - * or no accumulate at all? - */ - switch (opcode) { - case 5: - case 8: - case 9: - accop = 1; - break; - case 10: - case 11: - accop = -1; - break; - default: - accop = 0; - break; +static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) +{ + if (!fp_access_check(s)) { + return true; } + if (a->esz == MO_64) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + f->gen_d(t, tcg_env, t); + write_fp_dreg(s, a->rd, t); + } else { + TCGv_i32 t = tcg_temp_new_i32(); - if (accop != 0) { - read_vec_element(s, tcg_res[0], rd, 0, MO_64); - read_vec_element(s, tcg_res[1], rd, 1, MO_64); + read_vec_element_i32(s, t, a->rn, 0, a->esz); + f->gen_bhs[a->esz](t, tcg_env, t); + write_fp_sreg(s, a->rd, t); } + return true; +} - /* size == 2 means two 32x32->64 operations; this is worth special - * casing because we can generally handle it inline. - */ - if (size == 2) { - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_passres; - MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); - - int elt = pass + is_q * 2; - - read_vec_element(s, tcg_op1, rn, elt, memop); - read_vec_element(s, tcg_op2, rm, elt, memop); - - if (accop == 0) { - tcg_passres = tcg_res[pass]; - } else { - tcg_passres = tcg_temp_new_i64(); - } - - switch (opcode) { - case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ - tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); - break; - case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ - tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); - break; - case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ - case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ - { - TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); - TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); - - tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); - tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); - tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_passres, - tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); - break; - } - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ - tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); - break; - case 9: /* SQDMLAL, SQDMLAL2 */ - case 11: /* SQDMLSL, SQDMLSL2 */ - case 13: /* SQDMULL, SQDMULL2 */ - tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, - tcg_passres, tcg_passres); - break; - default: - g_assert_not_reached(); - } +static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) +{ + if (a->esz == MO_64 && !a->q) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + if (a->esz == MO_64) { + TCGv_i64 t = tcg_temp_new_i64(); - if (opcode == 9 || opcode == 11) { - /* saturating accumulate ops */ - if (accop < 0) { - tcg_gen_neg_i64(tcg_passres, tcg_passres); - } - gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, - tcg_res[pass], tcg_passres); - } else if (accop > 0) { - tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); - } else if (accop < 0) { - tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); - } + for (int i = 0; i < 2; ++i) { + read_vec_element(s, t, a->rn, i, MO_64); + f->gen_d(t, tcg_env, t); + write_vec_element(s, t, a->rd, i, MO_64); } } else { - /* size 0 or 1, generally helper functions */ - for (pass = 0; pass < 2; pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i64 tcg_passres; - int elt = pass + is_q * 2; - - read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); - read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); - - if (accop == 0) { - tcg_passres = tcg_res[pass]; - } else { - tcg_passres = tcg_temp_new_i64(); - } - - switch (opcode) { - case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ - case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ - { - TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); - static NeonGenWidenFn * const widenfns[2][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, - }; - NeonGenWidenFn *widenfn = widenfns[size][is_u]; - - widenfn(tcg_op2_64, tcg_op2); - widenfn(tcg_passres, tcg_op1); - gen_neon_addl(size, (opcode == 2), tcg_passres, - tcg_passres, tcg_op2_64); - break; - } - case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ - case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ - if (size == 0) { - if (is_u) { - gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); - } else { - gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); - } - } else { - if (is_u) { - gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); - } else { - gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); - } - } - break; - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ - if (size == 0) { - if (is_u) { - gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); - } else { - gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); - } - } else { - if (is_u) { - gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); - } else { - gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); - } - } - break; - case 9: /* SQDMLAL, SQDMLAL2 */ - case 11: /* SQDMLSL, SQDMLSL2 */ - case 13: /* SQDMULL, SQDMULL2 */ - assert(size == 1); - gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, - tcg_passres, tcg_passres); - break; - default: - g_assert_not_reached(); - } + TCGv_i32 t = tcg_temp_new_i32(); + int n = (a->q ? 16 : 8) >> a->esz; - if (accop != 0) { - if (opcode == 9 || opcode == 11) { - /* saturating accumulate ops */ - if (accop < 0) { - gen_helper_neon_negl_u32(tcg_passres, tcg_passres); - } - gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, - tcg_res[pass], - tcg_passres); - } else { - gen_neon_addl(size, (accop < 0), tcg_res[pass], - tcg_res[pass], tcg_passres); - } - } + for (int i = 0; i < n; ++i) { + read_vec_element_i32(s, t, a->rn, i, a->esz); + f->gen_bhs[a->esz](t, tcg_env, t); + write_vec_element_i32(s, t, a->rd, i, a->esz); } } - - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); + clear_vec_high(s, a->q, a->rd); + return true; } -static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, - int opcode, int rd, int rn, int rm) -{ - TCGv_i64 tcg_res[2]; - int part = is_q ? 2 : 0; - int pass; - - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); - static NeonGenWidenFn * const widenfns[3][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, - { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, - }; - NeonGenWidenFn *widenfn = widenfns[size][is_u]; - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); - widenfn(tcg_op2_wide, tcg_op2); - tcg_res[pass] = tcg_temp_new_i64(); - gen_neon_addl(size, (opcode == 3), - tcg_res[pass], tcg_op1, tcg_op2_wide); - } +static const ENVScalar1 f_scalar_sqabs = { + { gen_helper_neon_qabs_s8, + gen_helper_neon_qabs_s16, + gen_helper_neon_qabs_s32 }, + gen_helper_neon_qabs_s64, +}; +TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) +TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) + +static const ENVScalar1 f_scalar_sqneg = { + { gen_helper_neon_qneg_s8, + gen_helper_neon_qneg_s16, + gen_helper_neon_qneg_s32 }, + gen_helper_neon_qneg_s64, +}; +TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) +TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); +static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) +{ + if (fp_access_check(s)) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + f(t, t); + write_fp_dreg(s, a->rd, t); } + return true; } -static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) -{ - tcg_gen_addi_i64(in, in, 1U << 31); - tcg_gen_extrh_i64_i32(res, in); -} +TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) +TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) -static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, - int opcode, int rd, int rn, int rm) +static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) { - TCGv_i32 tcg_res[2]; - int part = is_q ? 2 : 0; - int pass; - - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_wideres = tcg_temp_new_i64(); - static NeonGenNarrowFn * const narrowfns[3][2] = { - { gen_helper_neon_narrow_high_u8, - gen_helper_neon_narrow_round_high_u8 }, - { gen_helper_neon_narrow_high_u16, - gen_helper_neon_narrow_round_high_u16 }, - { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, - }; - NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); - - tcg_res[pass] = tcg_temp_new_i32(); - gennarrow(tcg_res[pass], tcg_wideres); - } - - for (pass = 0; pass < 2; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); + if (fp_access_check(s)) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); + write_fp_dreg(s, a->rd, t); } - clear_vec_high(s, is_q, rd); + return true; } -/* AdvSIMD three different - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - */ -static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) -{ - /* Instructions in this group fall into three basic classes - * (in each case with the operation working on each element in - * the input vectors): - * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra - * 128 bit input) - * (2) wide 64 x 128 -> 128 - * (3) narrowing 128 x 128 -> 64 - * Here we do initial decode, catch unallocated cases and - * dispatch to separate functions for each class. - */ - int is_q = extract32(insn, 30, 1); - int is_u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 4); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - switch (opcode) { - case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ - case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ - /* 64 x 128 -> 128 */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); - break; - case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ - case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ - /* 128 x 128 -> 64 */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); - break; - case 14: /* PMULL, PMULL2 */ - if (is_u) { - unallocated_encoding(s); - return; - } - switch (size) { - case 0: /* PMULL.P8 */ - if (!fp_access_check(s)) { - return; - } - /* The Q field specifies lo/hi half input for this insn. */ - gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, - gen_helper_neon_pmull_h); - break; +TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) +TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) +TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) +TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) +TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) - case 3: /* PMULL.P64 */ - if (!dc_isar_feature(aa64_pmull, s)) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - /* The Q field specifies lo/hi half input for this insn. */ - gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, - gen_helper_gvec_pmull_q); - break; - - default: - unallocated_encoding(s); - break; - } - return; - case 9: /* SQDMLAL, SQDMLAL2 */ - case 11: /* SQDMLSL, SQDMLSL2 */ - case 13: /* SQDMULL, SQDMULL2 */ - if (is_u || size == 0) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ - case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ - case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ - case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ - /* 64 x 64 -> 128 */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - - handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); - break; - default: - /* opcode 15 not allocated */ - unallocated_encoding(s); - break; +static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, + ArithOneOp * const fn[3]) +{ + if (a->esz == MO_64) { + return false; } -} + if (fp_access_check(s)) { + TCGv_i64 t = tcg_temp_new_i64(); -/* AdvSIMD three same extra - * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ - * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | - * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ - */ -static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 4); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool u = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - bool feature; - int rot; - - switch (u * 16 + opcode) { - case 0x10: /* SQRDMLAH (vector) */ - case 0x11: /* SQRDMLSH (vector) */ - if (size != 1 && size != 2) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_rdm, s); - break; - case 0x02: /* SDOT (vector) */ - case 0x12: /* UDOT (vector) */ - if (size != MO_32) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_dp, s); - break; - case 0x03: /* USDOT */ - if (size != MO_32) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_i8mm, s); - break; - case 0x04: /* SMMLA */ - case 0x14: /* UMMLA */ - case 0x05: /* USMMLA */ - if (!is_q || size != MO_32) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_i8mm, s); - break; - case 0x18: /* FCMLA, #0 */ - case 0x19: /* FCMLA, #90 */ - case 0x1a: /* FCMLA, #180 */ - case 0x1b: /* FCMLA, #270 */ - case 0x1c: /* FCADD, #90 */ - case 0x1e: /* FCADD, #270 */ - if (size == 0 - || (size == 1 && !dc_isar_feature(aa64_fp16, s)) - || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_fcma, s); - break; - case 0x1d: /* BFMMLA */ - if (size != MO_16 || !is_q) { - unallocated_encoding(s); - return; - } - feature = dc_isar_feature(aa64_bf16, s); - break; - case 0x1f: - switch (size) { - case 1: /* BFDOT */ - case 3: /* BFMLAL{B,T} */ - feature = dc_isar_feature(aa64_bf16, s); - break; - default: - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; - } - if (!feature) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; + read_vec_element(s, t, a->rn, 0, a->esz + 1); + fn[a->esz](t, t); + clear_vec(s, a->rd); + write_vec_element(s, t, a->rd, 0, a->esz); } + return true; +} - switch (opcode) { - case 0x0: /* SQRDMLAH (vector) */ - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); - return; - - case 0x1: /* SQRDMLSH (vector) */ - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); - return; +#define WRAP_ENV(NAME) \ + static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ + { gen_helper_##NAME(d, tcg_env, n); } - case 0x2: /* SDOT / UDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, - u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); - return; +WRAP_ENV(neon_unarrow_sat8) +WRAP_ENV(neon_unarrow_sat16) +WRAP_ENV(neon_unarrow_sat32) - case 0x3: /* USDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); - return; +static ArithOneOp * const f_scalar_sqxtun[] = { + gen_neon_unarrow_sat8, + gen_neon_unarrow_sat16, + gen_neon_unarrow_sat32, +}; +TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) - case 0x04: /* SMMLA, UMMLA */ - gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, - u ? gen_helper_gvec_ummla_b - : gen_helper_gvec_smmla_b); - return; - case 0x05: /* USMMLA */ - gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); - return; +WRAP_ENV(neon_narrow_sat_s8) +WRAP_ENV(neon_narrow_sat_s16) +WRAP_ENV(neon_narrow_sat_s32) - case 0x8: /* FCMLA, #0 */ - case 0x9: /* FCMLA, #90 */ - case 0xa: /* FCMLA, #180 */ - case 0xb: /* FCMLA, #270 */ - rot = extract32(opcode, 0, 2); - switch (size) { - case 1: - gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, - gen_helper_gvec_fcmlah); - break; - case 2: - gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, - gen_helper_gvec_fcmlas); - break; - case 3: - gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, - gen_helper_gvec_fcmlad); - break; - default: - g_assert_not_reached(); - } - return; +static ArithOneOp * const f_scalar_sqxtn[] = { + gen_neon_narrow_sat_s8, + gen_neon_narrow_sat_s16, + gen_neon_narrow_sat_s32, +}; +TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) - case 0xc: /* FCADD, #90 */ - case 0xe: /* FCADD, #270 */ - rot = extract32(opcode, 1, 1); - switch (size) { - case 1: - gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, - gen_helper_gvec_fcaddh); - break; - case 2: - gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, - gen_helper_gvec_fcadds); - break; - case 3: - gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, - gen_helper_gvec_fcaddd); - break; - default: - g_assert_not_reached(); - } - return; +WRAP_ENV(neon_narrow_sat_u8) +WRAP_ENV(neon_narrow_sat_u16) +WRAP_ENV(neon_narrow_sat_u32) - case 0xd: /* BFMMLA */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); - return; - case 0xf: - switch (size) { - case 1: /* BFDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); - break; - case 3: /* BFMLAL{B,T} */ - gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, - gen_helper_gvec_bfmlal); - break; - default: - g_assert_not_reached(); - } - return; +static ArithOneOp * const f_scalar_uqxtn[] = { + gen_neon_narrow_sat_u8, + gen_neon_narrow_sat_u16, + gen_neon_narrow_sat_u32, +}; +TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) - default: - g_assert_not_reached(); +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) +{ + if (fp_access_check(s)) { + /* + * 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + TCGv_i64 src = read_fp_dreg(s, a->rn); + TCGv_i32 dst = tcg_temp_new_i32(); + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); + write_fp_sreg_merging(s, a->rd, a->rd, dst); } + return true; } -static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, - int size, int rn, int rd) -{ - /* Handle 2-reg-misc ops which are widening (so each size element - * in the source becomes a 2*size element in the destination. - * The only instruction like this is FCVTL. - */ - int pass; - - if (size == 3) { - /* 32 -> 64 bit fp conversion */ - TCGv_i64 tcg_res[2]; - int srcelt = is_q ? 2 : 0; - - for (pass = 0; pass < 2; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - tcg_res[pass] = tcg_temp_new_i64(); +#undef WRAP_ENV - read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); - gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); - } - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - } - } else { - /* 16 -> 32 bit fp conversion */ - int srcelt = is_q ? 4 : 0; - TCGv_i32 tcg_res[4]; - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - TCGv_i32 ahp = get_ahp_flag(); +static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); + } + return true; +} - for (pass = 0; pass < 4; pass++) { - tcg_res[pass] = tcg_temp_new_i32(); +TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) +TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) +TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) +TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) +TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) +TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) +TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) +TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) +TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) +TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) +TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) +TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) +TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) +TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) - read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], - fpst, ahp); - } - for (pass = 0; pass < 4; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); - } +static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) +{ + if (a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); } + return true; } -static void handle_rev(DisasContext *s, int opcode, bool u, - bool is_q, int size, int rn, int rd) -{ - int op = (opcode << 1) | u; - int opsz = op + size; - int grp_size = 3 - opsz; - int dsize = is_q ? 128 : 64; - int i; +TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) +TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) +TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) +TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) +TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) +TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) +TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) - if (opsz >= 3) { - unallocated_encoding(s); - return; +static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, + ArithOneOp * const fn[3]) +{ + if (a->esz == MO_64) { + return false; } + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); - if (!fp_access_check(s)) { - return; + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rn, 1, MO_64); + fn[a->esz](t0, t0); + fn[a->esz](t1, t1); + write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); + write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); + clear_vec_high(s, a->q, a->rd); } + return true; +} - if (size == 0) { - /* Special case bytes, use bswap op on each group of elements */ - int groups = dsize / (8 << grp_size); - - for (i = 0; i < groups; i++) { - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - - read_vec_element(s, tcg_tmp, rn, i, grp_size); - switch (grp_size) { - case MO_16: - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); - break; - case MO_32: - tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); - break; - case MO_64: - tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); - break; - default: - g_assert_not_reached(); - } - write_vec_element(s, tcg_tmp, rd, i, grp_size); - } - clear_vec_high(s, is_q, rd); - } else { - int revmask = (1 << grp_size) - 1; - int esize = 8 << size; - int elements = dsize / esize; - TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd[2]; +static ArithOneOp * const f_scalar_xtn[] = { + gen_helper_neon_narrow_u8, + gen_helper_neon_narrow_u16, + tcg_gen_ext32u_i64, +}; +TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) +TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) +TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) +TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) - for (i = 0; i < 2; i++) { - tcg_rd[i] = tcg_temp_new_i64(); - tcg_gen_movi_i64(tcg_rd[i], 0); - } +static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i32 tcg_lo = tcg_temp_new_i32(); + TCGv_i32 tcg_hi = tcg_temp_new_i32(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); + TCGv_i32 ahp = get_ahp_flag(); - for (i = 0; i < elements; i++) { - int e_rev = (i & 0xf) ^ revmask; - int w = (e_rev * esize) / 64; - int o = (e_rev * esize) % 64; + tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); + gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); + gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); + tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); + tcg_gen_extu_i32_i64(d, tcg_lo); +} - read_vec_element(s, tcg_rn, rn, i, size); - tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); - } +static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); - for (i = 0; i < 2; i++) { - write_vec_element(s, tcg_rd[i], rd, i, MO_64); - } - clear_vec_high(s, true, rd); - } + gen_helper_vfp_fcvtsd(tmp, n, fpst); + tcg_gen_extu_i32_i64(d, tmp); } -static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, - bool is_q, int size, int rn, int rd) +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) { - /* Implement the pairwise operations from 2-misc: - * SADDLP, UADDLP, SADALP, UADALP. - * These all add pairs of elements in the input to produce a - * double-width result element in the output (possibly accumulating). + /* + * 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) */ - bool accum = (opcode == 0x6); - int maxpass = is_q ? 2 : 1; - int pass; - TCGv_i64 tcg_res[2]; - - if (size == 2) { - /* 32 + 32 -> 64 op */ - MemOp memop = size + (u ? 0 : MO_SIGN); + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); + tcg_gen_extu_i32_i64(d, tmp); +} - for (pass = 0; pass < maxpass; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); +static ArithOneOp * const f_vector_fcvtn[] = { + NULL, + gen_fcvtn_hs, + gen_fcvtn_sd, +}; +static ArithOneOp * const f_scalar_fcvtxn[] = { + NULL, + NULL, + gen_fcvtxn_sd, +}; +TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) +TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) - tcg_res[pass] = tcg_temp_new_i64(); +static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_ptr fpst = fpstatus_ptr(FPST_A64); + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_bfcvt_pair(tmp, n, fpst); + tcg_gen_extu_i32_i64(d, tmp); +} - read_vec_element(s, tcg_op1, rn, pass * 2, memop); - read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); - tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); - if (accum) { - read_vec_element(s, tcg_op1, rd, pass, MO_64); - tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - } - } else { - for (pass = 0; pass < maxpass; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - NeonGenOne64OpFn *genfn; - static NeonGenOne64OpFn * const fns[2][2] = { - { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, - { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, - }; +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_bfcvt_pair(tmp, n, fpst); + tcg_gen_extu_i32_i64(d, tmp); +} - genfn = fns[size][u]; +static ArithOneOp * const f_vector_bfcvtn[2][3] = { + { + NULL, + gen_bfcvtn_hs, + NULL, + }, { + NULL, + gen_bfcvtn_ah_hs, + NULL, + } +}; +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, + f_vector_bfcvtn[s->fpcr_ah]) - tcg_res[pass] = tcg_temp_new_i64(); +static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) +{ + static NeonGenWidenFn * const widenfns[3] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + NeonGenWidenFn *widenfn; + TCGv_i64 tcg_res[2]; + TCGv_i32 tcg_op; + int part, pass; - read_vec_element(s, tcg_op, rn, pass, MO_64); - genfn(tcg_res[pass], tcg_op); - - if (accum) { - read_vec_element(s, tcg_op, rd, pass, MO_64); - if (size == 0) { - gen_helper_neon_addl_u16(tcg_res[pass], - tcg_res[pass], tcg_op); - } else { - gen_helper_neon_addl_u32(tcg_res[pass], - tcg_res[pass], tcg_op); - } - } - } - } - if (!is_q) { - tcg_res[1] = tcg_constant_i64(0); + if (a->esz == MO_64) { + return false; } - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + if (!fp_access_check(s)) { + return true; } -} -static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) -{ - /* Implement SHLL and SHLL2 */ - int pass; - int part = is_q ? 2 : 0; - TCGv_i64 tcg_res[2]; + tcg_op = tcg_temp_new_i32(); + widenfn = widenfns[a->esz]; + part = a->q ? 2 : 0; for (pass = 0; pass < 2; pass++) { - static NeonGenWidenFn * const widenfns[3] = { - gen_helper_neon_widen_u8, - gen_helper_neon_widen_u16, - tcg_gen_extu_i32_i64, - }; - NeonGenWidenFn *widenfn = widenfns[size]; - TCGv_i32 tcg_op = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); + read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); tcg_res[pass] = tcg_temp_new_i64(); widenfn(tcg_res[pass], tcg_op); - tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); + tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); } for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); } + return true; } -/* AdvSIMD two reg misc - * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+-----------+--------+-----+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | - * +---+---+---+-----------+------+-----------+--------+-----+------+------+ - */ -static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - bool u = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool need_fpstatus = false; - int rmode = -1; - TCGv_i32 tcg_rmode; - TCGv_ptr tcg_fpstatus; - - switch (opcode) { - case 0x0: /* REV64, REV32 */ - case 0x1: /* REV16 */ - handle_rev(s, opcode, u, is_q, size, rn, rd); - return; - case 0x5: /* CNT, NOT, RBIT */ - if (u && size == 0) { - /* NOT */ - break; - } else if (u && size == 1) { - /* RBIT */ - break; - } else if (!u && size == 0) { - /* CNT */ - break; - } - unallocated_encoding(s); - return; - case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ - case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - - handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); - return; - case 0x4: /* CLS, CLZ */ - if (size == 3) { - unallocated_encoding(s); - return; - } - break; - case 0x2: /* SADDLP, UADDLP */ - case 0x6: /* SADALP, UADALP */ - if (size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); - return; - case 0x13: /* SHLL, SHLL2 */ - if (u == 0 || size == 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_shll(s, is_q, size, rn, rd); - return; - case 0xa: /* CMLT */ - if (u == 1) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x8: /* CMGT, CMGE */ - case 0x9: /* CMEQ, CMLE */ - case 0xb: /* ABS, NEG */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x7: /* SQABS, SQNEG */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0xc ... 0xf: - case 0x16 ... 0x1f: - { - /* Floating point: U, size[1] and opcode indicate operation; - * size[0] indicates single or double precision. - */ - int is_double = extract32(size, 0, 1); - opcode |= (extract32(size, 1, 1) << 5) | (u << 6); - size = is_double ? 3 : 2; - switch (opcode) { - case 0x2f: /* FABS */ - case 0x6f: /* FNEG */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x1d: /* SCVTF */ - case 0x5d: /* UCVTF */ - { - bool is_signed = (opcode == 0x1d) ? true : false; - int elements = is_double ? 2 : is_q ? 4 : 2; - if (is_double && !is_q) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); - return; - } - case 0x2c: /* FCMGT (zero) */ - case 0x2d: /* FCMEQ (zero) */ - case 0x2e: /* FCMLT (zero) */ - case 0x6c: /* FCMGE (zero) */ - case 0x6d: /* FCMLE (zero) */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); - return; - case 0x7f: /* FSQRT */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - need_fpstatus = true; - rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x5c: /* FCVTAU */ - case 0x1c: /* FCVTAS */ - need_fpstatus = true; - rmode = FPROUNDING_TIEAWAY; - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x3c: /* URECPE */ - if (size == 3) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x3d: /* FRECPE */ - case 0x7d: /* FRSQRTE */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); - return; - case 0x56: /* FCVTXN, FCVTXN2 */ - if (size == 2) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x16: /* FCVTN, FCVTN2 */ - /* handle_2misc_narrow does a 2*size -> size operation, but these - * instructions encode the source size rather than dest size. - */ - if (!fp_access_check(s)) { - return; - } - handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); - return; - case 0x36: /* BFCVTN, BFCVTN2 */ - if (!dc_isar_feature(aa64_bf16, s) || size != 2) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); - return; - case 0x17: /* FCVTL, FCVTL2 */ - if (!fp_access_check(s)) { - return; - } - handle_2misc_widening(s, opcode, is_q, size, rn, rd); - return; - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ - rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); - /* fall through */ - case 0x59: /* FRINTX */ - case 0x79: /* FRINTI */ - need_fpstatus = true; - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x58: /* FRINTA */ - rmode = FPROUNDING_TIEAWAY; - need_fpstatus = true; - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - case 0x7c: /* URSQRTE */ - if (size == 3) { - unallocated_encoding(s); - return; - } - break; - case 0x1e: /* FRINT32Z */ - case 0x1f: /* FRINT64Z */ - rmode = FPROUNDING_ZERO; - /* fall through */ - case 0x5e: /* FRINT32X */ - case 0x5f: /* FRINT64X */ - need_fpstatus = true; - if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; - } - break; - } - default: - case 0x3: /* SUQADD, USQADD */ - unallocated_encoding(s); - return; - } +static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) +{ + int check = fp_access_check_vector_hsd(s, a->q, a->esz); - if (!fp_access_check(s)) { - return; + if (check <= 0) { + return check == 0; } - if (need_fpstatus || rmode >= 0) { - tcg_fpstatus = fpstatus_ptr(FPST_FPCR); - } else { - tcg_fpstatus = NULL; - } - if (rmode >= 0) { - tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); - } else { - tcg_rmode = NULL; - } + gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); + return true; +} - switch (opcode) { - case 0x5: - if (u && size == 0) { /* NOT */ - gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); - return; - } - break; - case 0x8: /* CMGT, CMGE */ - if (u) { - gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); - } else { - gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); - } - return; - case 0x9: /* CMEQ, CMLE */ - if (u) { - gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); - } else { - gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); - } - return; - case 0xa: /* CMLT */ - gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); - return; - case 0xb: - if (u) { /* ABS, NEG */ - gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); - } else { - gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); - } - return; - } +TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) +TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) - if (size == 3) { - /* All 64-bit element operations can be shared with scalar 2misc */ - int pass; +static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, + const FPScalar1 *f, int rmode) +{ + TCGv_i32 tcg_rmode = NULL; + TCGv_ptr fpst; + int check = fp_access_check_vector_hsd(s, a->q, a->esz); - /* Coverity claims (size == 3 && !is_q) has been eliminated - * from all paths leading to here. - */ - tcg_debug_assert(is_q); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); + if (check <= 0) { + return check == 0; + } - read_vec_element(s, tcg_op, rn, pass, MO_64); + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); + if (rmode >= 0) { + tcg_rmode = gen_set_rmode(rmode, fpst); + } - handle_2misc_64(s, opcode, u, tcg_res, tcg_op, - tcg_rmode, tcg_fpstatus); + if (a->esz == MO_64) { + TCGv_i64 t64 = tcg_temp_new_i64(); - write_vec_element(s, tcg_res, rd, pass, MO_64); + for (int pass = 0; pass < 2; ++pass) { + read_vec_element(s, t64, a->rn, pass, MO_64); + f->gen_d(t64, t64, fpst); + write_vec_element(s, t64, a->rd, pass, MO_64); } } else { - int pass; - - for (pass = 0; pass < (is_q ? 4 : 2); pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); - - if (size == 2) { - /* Special cases for 32 bit elements */ - switch (opcode) { - case 0x4: /* CLS */ - if (u) { - tcg_gen_clzi_i32(tcg_res, tcg_op, 32); - } else { - tcg_gen_clrsb_i32(tcg_res, tcg_op); - } - break; - case 0x7: /* SQABS, SQNEG */ - if (u) { - gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); - } else { - gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); - } - break; - case 0x2f: /* FABS */ - gen_vfp_abss(tcg_res, tcg_op); - break; - case 0x6f: /* FNEG */ - gen_vfp_negs(tcg_res, tcg_op); - break; - case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); - break; - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - gen_helper_vfp_tosls(tcg_res, tcg_op, - tcg_constant_i32(0), tcg_fpstatus); - break; - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - gen_helper_vfp_touls(tcg_res, tcg_op, - tcg_constant_i32(0), tcg_fpstatus); - break; - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ - case 0x58: /* FRINTA */ - case 0x79: /* FRINTI */ - gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x59: /* FRINTX */ - gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x7c: /* URSQRTE */ - gen_helper_rsqrte_u32(tcg_res, tcg_op); - break; - case 0x1e: /* FRINT32Z */ - case 0x5e: /* FRINT32X */ - gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x1f: /* FRINT64Z */ - case 0x5f: /* FRINT64X */ - gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); - break; - default: - g_assert_not_reached(); - } - } else { - /* Use helpers for 8 and 16 bit elements */ - switch (opcode) { - case 0x5: /* CNT, RBIT */ - /* For these two insns size is part of the opcode specifier - * (handled earlier); they always operate on byte elements. - */ - if (u) { - gen_helper_neon_rbit_u8(tcg_res, tcg_op); - } else { - gen_helper_neon_cnt_u8(tcg_res, tcg_op); - } - break; - case 0x7: /* SQABS, SQNEG */ - { - NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, - }; - genfn = fns[size][u]; - genfn(tcg_res, tcg_env, tcg_op); - break; - } - case 0x4: /* CLS, CLZ */ - if (u) { - if (size == 0) { - gen_helper_neon_clz_u8(tcg_res, tcg_op); - } else { - gen_helper_neon_clz_u16(tcg_res, tcg_op); - } - } else { - if (size == 0) { - gen_helper_neon_cls_s8(tcg_res, tcg_op); - } else { - gen_helper_neon_cls_s16(tcg_res, tcg_op); - } - } - break; - default: - g_assert_not_reached(); - } - } + TCGv_i32 t32 = tcg_temp_new_i32(); + void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) + = (a->esz == MO_16 ? f->gen_h : f->gen_s); - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { + read_vec_element_i32(s, t32, a->rn, pass, a->esz); + gen(t32, t32, fpst); + write_vec_element_i32(s, t32, a->rd, pass, a->esz); } } - clear_vec_high(s, is_q, rd); + clear_vec_high(s, a->q, a->rd); - if (tcg_rmode) { - gen_restore_rmode(tcg_rmode, tcg_fpstatus); + if (rmode >= 0) { + gen_restore_rmode(tcg_rmode, fpst); } + return true; } -/* AdvSIMD [scalar] two register miscellaneous (FP16) - * - * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ - * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | - * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ - * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 - * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 - * - * This actually covers two groups where scalar access is governed by - * bit 28. A bunch of the instructions (float to integral) only exist - * in the vector form and are un-allocated for the scalar decode. Also - * in the scalar decode Q is always 1. - */ -static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) -{ - int fpop, opcode, a, u; - int rn, rd; - bool is_q; - bool is_scalar; - bool only_in_vector = false; +TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) - int pass; - TCGv_i32 tcg_rmode = NULL; - TCGv_ptr tcg_fpstatus = NULL; - bool need_fpst = true; - int rmode = -1; - - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } +TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) +TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) +TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) +TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) +TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) +TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) +TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) - rd = extract32(insn, 0, 5); - rn = extract32(insn, 5, 5); +TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, + &f_scalar_frint32, FPROUNDING_ZERO) +TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) +TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, + &f_scalar_frint64, FPROUNDING_ZERO) +TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) - a = extract32(insn, 23, 1); - u = extract32(insn, 29, 1); - is_scalar = extract32(insn, 28, 1); - is_q = extract32(insn, 30, 1); +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, + bool is_q, int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3], + ARMFPStatusFlavour fpsttype) +{ + int check = fp_access_check_vector_hsd(s, is_q, esz); + TCGv_ptr fpst; - opcode = extract32(insn, 12, 5); - fpop = deposit32(opcode, 5, 1, a); - fpop = deposit32(fpop, 6, 1, u); + if (check <= 0) { + return check == 0; + } - switch (fpop) { - case 0x1d: /* SCVTF */ - case 0x5d: /* UCVTF */ - { - int elements; + fpst = fpstatus_ptr(fpsttype); + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), fpst, + is_q ? 16 : 8, vec_full_reg_size(s), + data, fns[esz - 1]); + return true; +} - if (is_scalar) { - elements = 1; - } else { - elements = (is_q ? 8 : 4); - } +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, + int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3]) +{ + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, + esz == MO_16 ? FPST_A64_F16 : + FPST_A64); +} - if (!fp_access_check(s)) { - return; - } - handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); - return; - } - break; - case 0x2c: /* FCMGT (zero) */ - case 0x2d: /* FCMEQ (zero) */ - case 0x2e: /* FCMLT (zero) */ - case 0x6c: /* FCMGE (zero) */ - case 0x6d: /* FCMLE (zero) */ - handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); - return; - case 0x3d: /* FRECPE */ - case 0x3f: /* FRECPX */ - break; - case 0x18: /* FRINTN */ - only_in_vector = true; - rmode = FPROUNDING_TIEEVEN; - break; - case 0x19: /* FRINTM */ - only_in_vector = true; - rmode = FPROUNDING_NEGINF; - break; - case 0x38: /* FRINTP */ - only_in_vector = true; - rmode = FPROUNDING_POSINF; - break; - case 0x39: /* FRINTZ */ - only_in_vector = true; - rmode = FPROUNDING_ZERO; - break; - case 0x58: /* FRINTA */ - only_in_vector = true; - rmode = FPROUNDING_TIEAWAY; - break; - case 0x59: /* FRINTX */ - case 0x79: /* FRINTI */ - only_in_vector = true; - /* current rounding mode */ - break; - case 0x1a: /* FCVTNS */ - rmode = FPROUNDING_TIEEVEN; - break; - case 0x1b: /* FCVTMS */ - rmode = FPROUNDING_NEGINF; - break; - case 0x1c: /* FCVTAS */ - rmode = FPROUNDING_TIEAWAY; - break; - case 0x3a: /* FCVTPS */ - rmode = FPROUNDING_POSINF; - break; - case 0x3b: /* FCVTZS */ - rmode = FPROUNDING_ZERO; - break; - case 0x5a: /* FCVTNU */ - rmode = FPROUNDING_TIEEVEN; - break; - case 0x5b: /* FCVTMU */ - rmode = FPROUNDING_NEGINF; - break; - case 0x5c: /* FCVTAU */ - rmode = FPROUNDING_TIEAWAY; - break; - case 0x7a: /* FCVTPU */ - rmode = FPROUNDING_POSINF; - break; - case 0x7b: /* FCVTZU */ - rmode = FPROUNDING_ZERO; - break; - case 0x2f: /* FABS */ - case 0x6f: /* FNEG */ - need_fpst = false; - break; - case 0x7d: /* FRSQRTE */ - case 0x7f: /* FSQRT (vector) */ - break; - default: - unallocated_encoding(s); - return; - } +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, + int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3]) +{ + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, + fns, select_ah_fpst(s, esz)); +} +static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { + gen_helper_gvec_vcvt_sh, + gen_helper_gvec_vcvt_sf, + gen_helper_gvec_vcvt_sd, +}; +TRANS(SCVTF_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) +TRANS(SCVTF_vf, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) + +static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { + gen_helper_gvec_vcvt_uh, + gen_helper_gvec_vcvt_uf, + gen_helper_gvec_vcvt_ud, +}; +TRANS(UCVTF_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) +TRANS(UCVTF_vf, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) + +static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { + gen_helper_gvec_vcvt_rz_hs, + gen_helper_gvec_vcvt_rz_fs, + gen_helper_gvec_vcvt_rz_ds, +}; +TRANS(FCVTZS_vf, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) - /* Check additional constraints for the scalar encoding */ - if (is_scalar) { - if (!is_q) { - unallocated_encoding(s); - return; - } - /* FRINTxx is only in the vector form */ - if (only_in_vector) { - unallocated_encoding(s); - return; - } - } +static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { + gen_helper_gvec_vcvt_rz_hu, + gen_helper_gvec_vcvt_rz_fu, + gen_helper_gvec_vcvt_rz_du, +}; +TRANS(FCVTZU_vf, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) - if (!fp_access_check(s)) { - return; - } +static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { + gen_helper_gvec_vcvt_rm_sh, + gen_helper_gvec_vcvt_rm_ss, + gen_helper_gvec_vcvt_rm_sd, +}; - if (rmode >= 0 || need_fpst) { - tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); - } +static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { + gen_helper_gvec_vcvt_rm_uh, + gen_helper_gvec_vcvt_rm_us, + gen_helper_gvec_vcvt_rm_ud, +}; - if (rmode >= 0) { - tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); - } +TRANS(FCVTNS_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) +TRANS(FCVTNU_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) +TRANS(FCVTPS_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) +TRANS(FCVTPU_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) +TRANS(FCVTMS_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) +TRANS(FCVTMU_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) +TRANS(FCVTZS_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) +TRANS(FCVTZU_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) +TRANS(FCVTAS_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) +TRANS(FCVTAU_vi, do_gvec_op2_fpst, + a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) + +static gen_helper_gvec_2_ptr * const f_fceq0[] = { + gen_helper_gvec_fceq0_h, + gen_helper_gvec_fceq0_s, + gen_helper_gvec_fceq0_d, +}; +TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) - if (is_scalar) { - TCGv_i32 tcg_op = read_fp_hreg(s, rn); - TCGv_i32 tcg_res = tcg_temp_new_i32(); +static gen_helper_gvec_2_ptr * const f_fcgt0[] = { + gen_helper_gvec_fcgt0_h, + gen_helper_gvec_fcgt0_s, + gen_helper_gvec_fcgt0_d, +}; +TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) - switch (fpop) { - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x3d: /* FRECPE */ - gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x3f: /* FRECPX */ - gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x6f: /* FNEG */ - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); - break; - case 0x7d: /* FRSQRTE */ - gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - default: - g_assert_not_reached(); - } +static gen_helper_gvec_2_ptr * const f_fcge0[] = { + gen_helper_gvec_fcge0_h, + gen_helper_gvec_fcge0_s, + gen_helper_gvec_fcge0_d, +}; +TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) - /* limit any sign extension going on */ - tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); - write_fp_sreg(s, rd, tcg_res); - } else { - for (pass = 0; pass < (is_q ? 8 : 4); pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op, rn, pass, MO_16); - - switch (fpop) { - case 0x1a: /* FCVTNS */ - case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x3a: /* FCVTPS */ - case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x3d: /* FRECPE */ - gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x5a: /* FCVTNU */ - case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x7a: /* FCVTPU */ - case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ - case 0x58: /* FRINTA */ - case 0x79: /* FRINTI */ - gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x59: /* FRINTX */ - gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x2f: /* FABS */ - tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); - break; - case 0x6f: /* FNEG */ - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); - break; - case 0x7d: /* FRSQRTE */ - gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - case 0x7f: /* FSQRT */ - gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); - break; - default: - g_assert_not_reached(); - } +static gen_helper_gvec_2_ptr * const f_fclt0[] = { + gen_helper_gvec_fclt0_h, + gen_helper_gvec_fclt0_s, + gen_helper_gvec_fclt0_d, +}; +TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) - write_vec_element_i32(s, tcg_res, rd, pass, MO_16); - } +static gen_helper_gvec_2_ptr * const f_fcle0[] = { + gen_helper_gvec_fcle0_h, + gen_helper_gvec_fcle0_s, + gen_helper_gvec_fcle0_d, +}; +TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) - clear_vec_high(s, is_q, rd); - } +static gen_helper_gvec_2_ptr * const f_frecpe[] = { + gen_helper_gvec_frecpe_h, + gen_helper_gvec_frecpe_s, + gen_helper_gvec_frecpe_d, +}; +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { + gen_helper_gvec_frecpe_h, + gen_helper_gvec_frecpe_rpres_s, + gen_helper_gvec_frecpe_d, +}; +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) - if (tcg_rmode) { - gen_restore_rmode(tcg_rmode, tcg_fpstatus); - } -} +static gen_helper_gvec_2_ptr * const f_frsqrte[] = { + gen_helper_gvec_frsqrte_h, + gen_helper_gvec_frsqrte_s, + gen_helper_gvec_frsqrte_d, +}; +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { + gen_helper_gvec_frsqrte_h, + gen_helper_gvec_frsqrte_rpres_s, + gen_helper_gvec_frsqrte_d, +}; +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) -/* AdvSIMD scalar x indexed element - * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 - * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ - * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | - * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ - * AdvSIMD vector x indexed element - * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ - * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | - * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ - */ -static void disas_simd_indexed(DisasContext *s, uint32_t insn) -{ - /* This encoding has two kinds of instruction: - * normal, where we perform elt x idxelt => elt for each - * element in the vector - * long, where we perform elt x idxelt and generate a result of - * double the width of the input element - * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). +static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) +{ + /* Handle 2-reg-misc ops which are widening (so each size element + * in the source becomes a 2*size element in the destination. + * The only instruction like this is FCVTL. */ - bool is_scalar = extract32(insn, 28, 1); - bool is_q = extract32(insn, 30, 1); - bool u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int l = extract32(insn, 21, 1); - int m = extract32(insn, 20, 1); - /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ - int rm = extract32(insn, 16, 4); - int opcode = extract32(insn, 12, 4); - int h = extract32(insn, 11, 1); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool is_long = false; - int is_fp = 0; - bool is_fp16 = false; - int index; + int pass; TCGv_ptr fpst; - switch (16 * u + opcode) { - case 0x02: /* SMLAL, SMLAL2 */ - case 0x12: /* UMLAL, UMLAL2 */ - case 0x06: /* SMLSL, SMLSL2 */ - case 0x16: /* UMLSL, UMLSL2 */ - case 0x0a: /* SMULL, SMULL2 */ - case 0x1a: /* UMULL, UMULL2 */ - if (is_scalar) { - unallocated_encoding(s); - return; - } - is_long = true; - break; - case 0x03: /* SQDMLAL, SQDMLAL2 */ - case 0x07: /* SQDMLSL, SQDMLSL2 */ - case 0x0b: /* SQDMULL, SQDMULL2 */ - is_long = true; - break; - case 0x1d: /* SQRDMLAH */ - case 0x1f: /* SQRDMLSH */ - if (!dc_isar_feature(aa64_rdm, s)) { - unallocated_encoding(s); - return; - } - break; - case 0x0e: /* SDOT */ - case 0x1e: /* UDOT */ - if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { - unallocated_encoding(s); - return; - } - break; - case 0x0f: - switch (size) { - case 0: /* SUDOT */ - case 2: /* USDOT */ - if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { - unallocated_encoding(s); - return; - } - size = MO_32; - break; - case 1: /* BFDOT */ - if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { - unallocated_encoding(s); - return; - } - size = MO_32; - break; - case 3: /* BFMLAL{B,T} */ - if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { - unallocated_encoding(s); - return; - } - /* can't set is_fp without other incorrect size checks */ - size = MO_16; - break; - default: - unallocated_encoding(s); - return; - } - break; - case 0x11: /* FCMLA #0 */ - case 0x13: /* FCMLA #90 */ - case 0x15: /* FCMLA #180 */ - case 0x17: /* FCMLA #270 */ - if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { - unallocated_encoding(s); - return; - } - is_fp = 2; - break; - default: - case 0x00: /* FMLAL */ - case 0x01: /* FMLA */ - case 0x04: /* FMLSL */ - case 0x05: /* FMLS */ - case 0x08: /* MUL */ - case 0x09: /* FMUL */ - case 0x0c: /* SQDMULH */ - case 0x0d: /* SQRDMULH */ - case 0x10: /* MLA */ - case 0x14: /* MLS */ - case 0x18: /* FMLAL2 */ - case 0x19: /* FMULX */ - case 0x1c: /* FMLSL2 */ - unallocated_encoding(s); - return; - } - - switch (is_fp) { - case 1: /* normal fp */ - unallocated_encoding(s); /* in decodetree */ - return; - - case 2: /* complex fp */ - /* Each indexable element is a complex pair. */ - size += 1; - switch (size) { - case MO_32: - if (h && !is_q) { - unallocated_encoding(s); - return; - } - is_fp16 = true; - break; - case MO_64: - break; - default: - unallocated_encoding(s); - return; - } - break; - - default: /* integer */ - switch (size) { - case MO_8: - case MO_64: - unallocated_encoding(s); - return; - } - break; - } - if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - - /* Given MemOp size, adjust register and indexing. */ - switch (size) { - case MO_16: - index = h << 2 | l << 1 | m; - break; - case MO_32: - index = h << 1 | l; - rm |= m << 4; - break; - case MO_64: - if (l || !is_q) { - unallocated_encoding(s); - return; - } - index = h; - rm |= m << 4; - break; - default: - g_assert_not_reached(); - } - if (!fp_access_check(s)) { - return; - } - - if (is_fp) { - fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); - } else { - fpst = NULL; + return true; } - switch (16 * u + opcode) { - case 0x0e: /* SDOT */ - case 0x1e: /* UDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, - u ? gen_helper_gvec_udot_idx_b - : gen_helper_gvec_sdot_idx_b); - return; - case 0x0f: - switch (extract32(insn, 22, 2)) { - case 0: /* SUDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, - gen_helper_gvec_sudot_idx_b); - return; - case 1: /* BFDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, - gen_helper_gvec_bfdot_idx); - return; - case 2: /* USDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, - gen_helper_gvec_usdot_idx_b); - return; - case 3: /* BFMLAL{B,T} */ - gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, - gen_helper_gvec_bfmlal_idx); - return; - } - g_assert_not_reached(); - case 0x11: /* FCMLA #0 */ - case 0x13: /* FCMLA #90 */ - case 0x15: /* FCMLA #180 */ - case 0x17: /* FCMLA #270 */ - { - int rot = extract32(insn, 13, 2); - int data = (index << 2) | rot; - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, rd), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), data, - size == MO_64 - ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - } - return; - } + if (a->esz == MO_64) { + /* 32 -> 64 bit fp conversion */ + TCGv_i64 tcg_res[2]; + TCGv_i32 tcg_op = tcg_temp_new_i32(); + int srcelt = a->q ? 2 : 0; - if (size == 3) { - g_assert_not_reached(); - } else if (!is_long) { - /* 32 bit floating point, or 16 or 32 bit integer. - * For the 16 bit scalar case we use the usual Neon helpers and - * rely on the fact that 0 op 0 == 0 with no side effects. - */ - TCGv_i32 tcg_idx = tcg_temp_new_i32(); - int pass, maxpasses; + fpst = fpstatus_ptr(FPST_A64); - if (is_scalar) { - maxpasses = 1; - } else { - maxpasses = is_q ? 4 : 2; + for (pass = 0; pass < 2; pass++) { + tcg_res[pass] = tcg_temp_new_i64(); + read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); + gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); } - - read_vec_element_i32(s, tcg_idx, rm, index, size); - - if (size == 1 && !is_scalar) { - /* The simplest way to handle the 16x16 indexed ops is to duplicate - * the index into both halves of the 32 bit tcg_idx and then use - * the usual Neon helpers. - */ - tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); - } - - for (pass = 0; pass < maxpasses; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); - - switch (16 * u + opcode) { - case 0x10: /* MLA */ - case 0x14: /* MLS */ - { - static NeonGenTwoOpFn * const fns[2][2] = { - { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, - { tcg_gen_add_i32, tcg_gen_sub_i32 }, - }; - NeonGenTwoOpFn *genfn; - bool is_sub = opcode == 0x4; - - if (size == 1) { - gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); - } else { - tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); - } - if (opcode == 0x8) { - break; - } - read_vec_element_i32(s, tcg_op, rd, pass, MO_32); - genfn = fns[size - 1][is_sub]; - genfn(tcg_res, tcg_op, tcg_res); - break; - } - case 0x0c: /* SQDMULH */ - if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, - tcg_op, tcg_idx); - } else { - gen_helper_neon_qdmulh_s32(tcg_res, tcg_env, - tcg_op, tcg_idx); - } - break; - case 0x0d: /* SQRDMULH */ - if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env, - tcg_op, tcg_idx); - } else { - gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env, - tcg_op, tcg_idx); - } - break; - case 0x1d: /* SQRDMLAH */ - read_vec_element_i32(s, tcg_res, rd, pass, - is_scalar ? size : MO_32); - if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env, - tcg_op, tcg_idx, tcg_res); - } else { - gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env, - tcg_op, tcg_idx, tcg_res); - } - break; - case 0x1f: /* SQRDMLSH */ - read_vec_element_i32(s, tcg_res, rd, pass, - is_scalar ? size : MO_32); - if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env, - tcg_op, tcg_idx, tcg_res); - } else { - gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env, - tcg_op, tcg_idx, tcg_res); - } - break; - default: - case 0x01: /* FMLA */ - case 0x05: /* FMLS */ - case 0x09: /* FMUL */ - case 0x19: /* FMULX */ - g_assert_not_reached(); - } - - if (is_scalar) { - write_fp_sreg(s, rd, tcg_res); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); } - - clear_vec_high(s, is_q, rd); } else { - /* long ops: 16x16->32 or 32x32->64 */ - TCGv_i64 tcg_res[2]; - int pass; - bool satop = extract32(opcode, 0, 1); - MemOp memop = MO_32; - - if (satop || !u) { - memop |= MO_SIGN; - } - - if (size == 2) { - TCGv_i64 tcg_idx = tcg_temp_new_i64(); - - read_vec_element(s, tcg_idx, rm, index, memop); - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_passres; - int passelt; - - if (is_scalar) { - passelt = 0; - } else { - passelt = pass + (is_q * 2); - } - - read_vec_element(s, tcg_op, rn, passelt, memop); - - tcg_res[pass] = tcg_temp_new_i64(); - - if (opcode == 0xa || opcode == 0xb) { - /* Non-accumulating ops */ - tcg_passres = tcg_res[pass]; - } else { - tcg_passres = tcg_temp_new_i64(); - } - - tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); - - if (satop) { - /* saturating, doubling */ - gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, - tcg_passres, tcg_passres); - } - - if (opcode == 0xa || opcode == 0xb) { - continue; - } - - /* Accumulating op: handle accumulate step */ - read_vec_element(s, tcg_res[pass], rd, pass, MO_64); - - switch (opcode) { - case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); - break; - case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); - break; - case 0x7: /* SQDMLSL, SQDMLSL2 */ - tcg_gen_neg_i64(tcg_passres, tcg_passres); - /* fall through */ - case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, - tcg_res[pass], - tcg_passres); - break; - default: - g_assert_not_reached(); - } - } - - clear_vec_high(s, !is_scalar, rd); - } else { - TCGv_i32 tcg_idx = tcg_temp_new_i32(); - - assert(size == 1); - read_vec_element_i32(s, tcg_idx, rm, index, size); - - if (!is_scalar) { - /* The simplest way to handle the 16x16 indexed ops is to - * duplicate the index into both halves of the 32 bit tcg_idx - * and then use the usual Neon helpers. - */ - tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); - } - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i64 tcg_passres; - - if (is_scalar) { - read_vec_element_i32(s, tcg_op, rn, pass, size); - } else { - read_vec_element_i32(s, tcg_op, rn, - pass + (is_q * 2), MO_32); - } - - tcg_res[pass] = tcg_temp_new_i64(); - - if (opcode == 0xa || opcode == 0xb) { - /* Non-accumulating ops */ - tcg_passres = tcg_res[pass]; - } else { - tcg_passres = tcg_temp_new_i64(); - } - - if (memop & MO_SIGN) { - gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); - } else { - gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); - } - if (satop) { - gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, - tcg_passres, tcg_passres); - } - - if (opcode == 0xa || opcode == 0xb) { - continue; - } - - /* Accumulating op: handle accumulate step */ - read_vec_element(s, tcg_res[pass], rd, pass, MO_64); - - switch (opcode) { - case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], - tcg_passres); - break; - case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], - tcg_passres); - break; - case 0x7: /* SQDMLSL, SQDMLSL2 */ - gen_helper_neon_negl_u32(tcg_passres, tcg_passres); - /* fall through */ - case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, - tcg_res[pass], - tcg_passres); - break; - default: - g_assert_not_reached(); - } - } + /* 16 -> 32 bit fp conversion */ + int srcelt = a->q ? 4 : 0; + TCGv_i32 tcg_res[4]; + TCGv_i32 ahp = get_ahp_flag(); - if (is_scalar) { - tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); - } - } + fpst = fpstatus_ptr(FPST_A64_F16); - if (is_scalar) { - tcg_res[1] = tcg_constant_i64(0); + for (pass = 0; pass < 4; pass++) { + tcg_res[pass] = tcg_temp_new_i32(); + read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], + fpst, ahp); } - - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + for (pass = 0; pass < 4; pass++) { + write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); } } -} - -/* C3.6 Data processing - SIMD, inc Crypto - * - * As the decode gets a little complex we are using a table based - * approach for this part of the decode. - */ -static const AArch64DecodeTable data_proc_simd[] = { - /* pattern , mask , fn */ - { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, - { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, - { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, - { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, - { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ - /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ - { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, - { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, - { 0x0e000000, 0xbf208c00, disas_simd_tb }, - { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, - { 0x2e000000, 0xbf208400, disas_simd_ext }, - { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, - { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, - { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, - { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ - { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, - { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, - { 0x00000000, 0x00000000, NULL } -}; - -static void disas_data_proc_simd(DisasContext *s, uint32_t insn) -{ - /* Note that this is called with all non-FP cases from - * table C3-6 so it must UNDEF for entries not specifically - * allocated to instructions in that table. - */ - AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); - if (fn) { - fn(s, insn); - } else { - unallocated_encoding(s); - } -} - -/* C3.6 Data processing - SIMD and floating point */ -static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) -{ - if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { - disas_data_proc_fp(s, insn); - } else { - /* SIMD, including crypto */ - disas_data_proc_simd(s, insn); - } + clear_vec_high(s, true, a->rd); + return true; } static bool trans_OK(DisasContext *s, arg_OK *a) @@ -12584,37 +10040,6 @@ static bool trans_FAIL(DisasContext *s, arg_OK *a) } /** - * is_guarded_page: - * @env: The cpu environment - * @s: The DisasContext - * - * Return true if the page is guarded. - */ -static bool is_guarded_page(CPUARMState *env, DisasContext *s) -{ - uint64_t addr = s->base.pc_first; -#ifdef CONFIG_USER_ONLY - return page_get_flags(addr) & PAGE_BTI; -#else - CPUTLBEntryFull *full; - void *host; - int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); - int flags; - - /* - * We test this immediately after reading an insn, which means - * that the TLB entry must be present and valid, and thus this - * access will never raise an exception. - */ - flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, - false, &host, &full, 0); - assert(!(flags & TLB_INVALID_MASK)); - - return full->extra.arm.guarded; -#endif -} - -/** * btype_destination_ok: * @insn: The instruction at the branch destination * @bt: SCTLR_ELx.BT @@ -12666,24 +10091,6 @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype) return false; } -/* C3.1 A64 instruction index by encoding */ -static void disas_a64_legacy(DisasContext *s, uint32_t insn) -{ - switch (extract32(insn, 25, 4)) { - case 0x5: - case 0xd: /* Data processing - register */ - disas_data_proc_reg(s, insn); - break; - case 0x7: - case 0xf: /* Data processing - SIMD and floating point */ - disas_data_proc_simd_fp(s, insn); - break; - default: - unallocated_encoding(s); - break; - } -} - static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) { @@ -12738,6 +10145,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -12831,7 +10240,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * start of the TB. */ assert(s->base.num_insns == 1); - gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); s->base.is_jmp = DISAS_NORETURN; s->base.pc_next = QEMU_ALIGN_UP(pc, 4); return; @@ -12842,8 +10251,8 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) s->insn = insn; s->base.pc_next = pc + 4; - s->fp_access_checked = false; - s->sve_access_checked = false; + s->fp_access_checked = 0; + s->sve_access_checked = 0; if (s->pstate_il) { /* @@ -12856,19 +10265,6 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (dc_isar_feature(aa64_bti, s)) { if (s->base.num_insns == 1) { - /* - * At the first insn of the TB, compute s->guarded_page. - * We delayed computing this until successfully reading - * the first insn of the TB, above. This (mostly) ensures - * that the softmmu tlb entry has been populated, and the - * page table GP bit is available. - * - * Note that we need to compute this even if btype == 0, - * because this value is used for BR instructions later - * where ENV is not available. - */ - s->guarded_page = is_guarded_page(env, s); - /* First insn can have btype set to non-zero. */ tcg_debug_assert(s->btype >= 0); @@ -12877,12 +10273,13 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * priority -- below debugging exceptions but above most * everything else. This allows us to handle this now * instead of waiting until the insn is otherwise decoded. + * + * We can check all but the guarded page check here; + * defer the latter to a helper. */ if (s->btype != 0 - && s->guarded_page && !btype_destination_ok(insn, s->bt, s->btype)) { - gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); - return; + gen_helper_guarded_page_check(tcg_env); } } else { /* Not the first insn: btype must be 0. */ @@ -12898,7 +10295,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (!disas_a64(s, insn) && !disas_sme(s, insn) && !disas_sve(s, insn)) { - disas_a64_legacy(s, insn); + unallocated_encoding(s); } /* diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 0fcf7cb..b2420f5 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -65,7 +65,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, static inline void assert_fp_access_checked(DisasContext *s) { #ifdef CONFIG_DEBUG_TCG - if (unlikely(!s->fp_access_checked || s->fp_excp_el)) { + if (unlikely(s->fp_access_checked <= 0)) { fprintf(stderr, "target-arm: FP access check missing for " "instruction 0x%08x\n", s->insn); abort(); @@ -185,6 +185,19 @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) return ret; } +/* + * Return the ARMFPStatusFlavour to use based on element size and + * whether FPCR.AH is set. + */ +static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz) +{ + if (s->fpcr_ah) { + return esz == MO_16 ? FPST_AH_F16 : FPST_AH; + } else { + return esz == MO_16 ? FPST_A64_F16 : FPST_A64; + } +} + bool disas_sve(DisasContext *, uint32_t); bool disas_sme(DisasContext *, uint32_t); diff --git a/target/arm/tcg/translate-m-nocp.c b/target/arm/tcg/translate-m-nocp.c index f564d06..b92773b 100644 --- a/target/arm/tcg/translate-m-nocp.c +++ b/target/arm/tcg/translate-m-nocp.c @@ -332,7 +332,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, if (dc_isar_feature(aa32_mve, s)) { /* QC is only present for MVE; otherwise RES0 */ TCGv_i32 qc = tcg_temp_new_i32(); - tcg_gen_andi_i32(qc, tmp, FPCR_QC); + tcg_gen_andi_i32(qc, tmp, FPSR_QC); /* * The 4 vfp.qc[] fields need only be "zero" vs "non-zero"; * here writing the same value into all elements is simplest. @@ -340,11 +340,11 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc), 16, 16, qc); } - tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); - fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); - tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK); + tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK); + fpscr = load_cpu_field_low32(vfp.fpsr); + tcg_gen_andi_i32(fpscr, fpscr, ~FPSR_NZCV_MASK); tcg_gen_or_i32(fpscr, fpscr, tmp); - store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]); + store_cpu_field_low32(fpscr, vfp.fpsr); break; } case ARM_VFP_FPCXT_NS: @@ -390,7 +390,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, tcg_gen_deposit_i32(control, control, sfpa, R_V7M_CONTROL_SFPA_SHIFT, 1); store_cpu_field(control, v7m.control[M_REG_S]); - tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK); + tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK); gen_helper_vfp_set_fpscr(tcg_env, tmp); s->base.is_jmp = DISAS_UPDATE_NOCHAIN; break; @@ -457,7 +457,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, case ARM_VFP_FPSCR_NZCVQC: tmp = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(tmp, tcg_env); - tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK); + tcg_gen_andi_i32(tmp, tmp, FPSR_NZCVQC_MASK); storefn(s, opaque, tmp, true); break; case QEMU_VFP_FPSCR_NZCV: @@ -465,8 +465,8 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, * Read just NZCV; this is a special case to avoid the * helper call for the "VMRS to CPSR.NZCV" insn. */ - tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); - tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); + tmp = load_cpu_field_low32(vfp.fpsr); + tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK); storefn(s, opaque, tmp, true); break; case ARM_VFP_FPCXT_S: @@ -476,7 +476,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, tmp = tcg_temp_new_i32(); sfpa = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(tmp, tcg_env); - tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK); + tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK); control = load_cpu_field(v7m.control[M_REG_S]); tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK); tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT); @@ -529,7 +529,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, sfpa = tcg_temp_new_i32(); fpscr = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(fpscr, tcg_env); - tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK); + tcg_gen_andi_i32(tmp, fpscr, ~FPSR_NZCV_MASK); control = load_cpu_field(v7m.control[M_REG_S]); tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK); tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT); diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index 915c9e5..c4fecb8 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -148,6 +148,37 @@ static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm, return true; } +static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm, + int data, gen_helper_gvec_4_ptr *fn_gvec) +{ + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) { + return false; + } + + /* + * UNDEF accesses to odd registers for each bit of Q. + * Q will be 0b111 for all Q-reg instructions, otherwise + * when we have mixed Q- and D-reg inputs. + */ + if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + int opr_sz = q ? 16 : 8; + tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd), + vfp_reg_offset(1, vn), + vfp_reg_offset(1, vm), + vfp_reg_offset(1, vd), + tcg_env, + opr_sz, opr_sz, data, fn_gvec); + return true; +} + static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, int data, ARMFPStatusFlavour fp_flavour, gen_helper_gvec_4_ptr *fn_gvec_ptr) @@ -266,8 +297,8 @@ static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) if (!dc_isar_feature(aa32_bf16, s)) { return false; } - return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_bfdot); + return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0, + gen_helper_gvec_bfdot); } static bool trans_VFML(DisasContext *s, arg_VFML *a) @@ -360,8 +391,8 @@ static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) if (!dc_isar_feature(aa32_bf16, s)) { return false; } - return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_bfdot_idx); + return do_neon_ddda_env(s, a->q * 6, a->vd, a->vn, a->vm, a->index, + gen_helper_gvec_bfdot_idx); } static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) @@ -1068,144 +1099,18 @@ DO_2SH(VRSHR_S, gen_gvec_srshr) DO_2SH(VRSHR_U, gen_gvec_urshr) DO_2SH(VRSRA_S, gen_gvec_srsra) DO_2SH(VRSRA_U, gen_gvec_ursra) - -static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) -{ - /* Signed shift out of range results in all-sign-bits */ - a->shift = MIN(a->shift, (8 << a->size) - 1); - return do_vector_2sh(s, a, tcg_gen_gvec_sari); -} - -static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t oprsz, uint32_t maxsz) -{ - tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); -} - -static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) -{ - /* Shift out of range is architecturally valid and results in zero. */ - if (a->shift >= (8 << a->size)) { - return do_vector_2sh(s, a, gen_zero_rd_2sh); - } else { - return do_vector_2sh(s, a, tcg_gen_gvec_shri); - } -} - -static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, - NeonGenTwo64OpEnvFn *fn) -{ - /* - * 2-reg-and-shift operations, size == 3 case, where the - * function needs to be passed tcg_env. - */ - TCGv_i64 constimm; - int pass; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if ((a->vm | a->vd) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - /* - * To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - constimm = tcg_constant_i64(dup_const(a->size, a->shift)); - - for (pass = 0; pass < a->q + 1; pass++) { - TCGv_i64 tmp = tcg_temp_new_i64(); - - read_neon_element64(tmp, a->vm, pass, MO_64); - fn(tmp, tcg_env, tmp, constimm); - write_neon_element64(tmp, a->vd, pass, MO_64); - } - return true; -} - -static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, - NeonGenTwoOpEnvFn *fn) -{ - /* - * 2-reg-and-shift operations, size < 3 case, where the - * helper needs to be passed tcg_env. - */ - TCGv_i32 constimm, tmp; - int pass; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if ((a->vm | a->vd) & a->q) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - /* - * To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - constimm = tcg_constant_i32(dup_const(a->size, a->shift)); - tmp = tcg_temp_new_i32(); - - for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - read_neon_element32(tmp, a->vm, pass, MO_32); - fn(tmp, tcg_env, tmp, constimm); - write_neon_element32(tmp, a->vd, pass, MO_32); - } - return true; -} - -#define DO_2SHIFT_ENV(INSN, FUNC) \ - static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ - { \ - return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ - } \ - static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ - { \ - static NeonGenTwoOpEnvFn * const fns[] = { \ - gen_helper_neon_##FUNC##8, \ - gen_helper_neon_##FUNC##16, \ - gen_helper_neon_##FUNC##32, \ - }; \ - assert(a->size < ARRAY_SIZE(fns)); \ - return do_2shift_env_32(s, a, fns[a->size]); \ - } - -DO_2SHIFT_ENV(VQSHLU, qshlu_s) -DO_2SHIFT_ENV(VQSHL_U, qshl_u) -DO_2SHIFT_ENV(VQSHL_S, qshl_s) +DO_2SH(VSHR_S, gen_gvec_sshr) +DO_2SH(VSHR_U, gen_gvec_ushr) +DO_2SH(VQSHLU, gen_neon_sqshlui) +DO_2SH(VQSHL_U, gen_neon_uqshli) +DO_2SH(VQSHL_S, gen_neon_sqshli) static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, NeonGenTwo64OpFn *shiftfn, - NeonGenNarrowEnvFn *narrowfn) + NeonGenOne64OpEnvFn *narrowfn) { /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ - TCGv_i64 constimm, rm1, rm2; - TCGv_i32 rd; + TCGv_i64 constimm, rm1, rm2, rd; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -1232,7 +1137,7 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, constimm = tcg_constant_i64(-a->shift); rm1 = tcg_temp_new_i64(); rm2 = tcg_temp_new_i64(); - rd = tcg_temp_new_i32(); + rd = tcg_temp_new_i64(); /* Load both inputs first to avoid potential overwrite if rm == rd */ read_neon_element64(rm1, a->vm, 0, MO_64); @@ -1240,18 +1145,18 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, shiftfn(rm1, rm1, constimm); narrowfn(rd, tcg_env, rm1); - write_neon_element32(rd, a->vd, 0, MO_32); + write_neon_element64(rd, a->vd, 0, MO_32); shiftfn(rm2, rm2, constimm); narrowfn(rd, tcg_env, rm2); - write_neon_element32(rd, a->vd, 1, MO_32); + write_neon_element64(rd, a->vd, 1, MO_32); return true; } static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, NeonGenTwoOpFn *shiftfn, - NeonGenNarrowEnvFn *narrowfn) + NeonGenOne64OpEnvFn *narrowfn) { /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ TCGv_i32 constimm, rm1, rm2, rm3, rm4; @@ -1306,16 +1211,16 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, tcg_gen_concat_i32_i64(rtmp, rm1, rm2); - narrowfn(rm1, tcg_env, rtmp); - write_neon_element32(rm1, a->vd, 0, MO_32); + narrowfn(rtmp, tcg_env, rtmp); + write_neon_element64(rtmp, a->vd, 0, MO_32); shiftfn(rm3, rm3, constimm); shiftfn(rm4, rm4, constimm); tcg_gen_concat_i32_i64(rtmp, rm3, rm4); - narrowfn(rm3, tcg_env, rtmp); - write_neon_element32(rm3, a->vd, 1, MO_32); + narrowfn(rtmp, tcg_env, rtmp); + write_neon_element64(rtmp, a->vd, 1, MO_32); return true; } @@ -1330,17 +1235,17 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ } -static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +static void gen_neon_narrow_u32(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src) { - tcg_gen_extrl_i64_i32(dest, src); + tcg_gen_ext32u_i64(dest, src); } -static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +static void gen_neon_narrow_u16(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src) { gen_helper_neon_narrow_u16(dest, src); } -static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +static void gen_neon_narrow_u8(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src) { gen_helper_neon_narrow_u8(dest, src); } @@ -1504,13 +1409,13 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) -DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) -DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) +DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_rz_fs) +DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_rz_fu) DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) -DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) -DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) +DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_rz_hs) +DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_rz_hu) static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, GVecGen2iFn *fn) @@ -1655,8 +1560,8 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, NULL, NULL, \ }; \ static NeonGenTwo64OpFn * const addfn[] = { \ - gen_helper_neon_##OP##l_u16, \ - gen_helper_neon_##OP##l_u32, \ + tcg_gen_vec_##OP##16_i64, \ + tcg_gen_vec_##OP##32_i64, \ tcg_gen_##OP##_i64, \ NULL, \ }; \ @@ -1734,8 +1639,8 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a, static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ { \ static NeonGenTwo64OpFn * const addfn[] = { \ - gen_helper_neon_##OP##l_u16, \ - gen_helper_neon_##OP##l_u32, \ + tcg_gen_vec_##OP##16_i64, \ + tcg_gen_vec_##OP##32_i64, \ tcg_gen_##OP##_i64, \ NULL, \ }; \ @@ -1856,8 +1761,8 @@ static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) NULL, }; static NeonGenTwo64OpFn * const addfn[] = { - gen_helper_neon_addl_u16, - gen_helper_neon_addl_u32, + tcg_gen_vec_add16_i64, + tcg_gen_vec_add32_i64, tcg_gen_add_i64, NULL, }; @@ -1874,8 +1779,8 @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) NULL, }; static NeonGenTwo64OpFn * const addfn[] = { - gen_helper_neon_addl_u16, - gen_helper_neon_addl_u32, + tcg_gen_vec_add16_i64, + tcg_gen_vec_add32_i64, tcg_gen_add_i64, NULL, }; @@ -1935,8 +1840,8 @@ static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) NULL, \ }; \ static NeonGenTwo64OpFn * const accfn[] = { \ - gen_helper_neon_##ACC##l_u16, \ - gen_helper_neon_##ACC##l_u32, \ + tcg_gen_vec_##ACC##16_i64, \ + tcg_gen_vec_##ACC##32_i64, \ tcg_gen_##ACC##_i64, \ NULL, \ }; \ @@ -2466,7 +2371,7 @@ static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) }; \ static NeonGenTwo64OpFn * const accfn[] = { \ NULL, \ - gen_helper_neon_##ACC##l_u32, \ + tcg_gen_vec_##ACC##32_i64, \ tcg_gen_##ACC##_i64, \ NULL, \ }; \ @@ -2660,204 +2565,6 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) return true; } -static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) -{ - int pass, half; - TCGv_i32 tmp[2]; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if ((a->vd | a->vm) & a->q) { - return false; - } - - if (a->size == 3) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - tmp[0] = tcg_temp_new_i32(); - tmp[1] = tcg_temp_new_i32(); - - for (pass = 0; pass < (a->q ? 2 : 1); pass++) { - for (half = 0; half < 2; half++) { - read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); - switch (a->size) { - case 0: - tcg_gen_bswap32_i32(tmp[half], tmp[half]); - break; - case 1: - gen_swap_half(tmp[half], tmp[half]); - break; - case 2: - break; - default: - g_assert_not_reached(); - } - } - write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); - write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); - } - return true; -} - -static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, - NeonGenWidenFn *widenfn, - NeonGenTwo64OpFn *opfn, - NeonGenTwo64OpFn *accfn) -{ - /* - * Pairwise long operations: widen both halves of the pair, - * combine the pairs with the opfn, and then possibly accumulate - * into the destination with the accfn. - */ - int pass; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vm) & 0x10)) { - return false; - } - - if ((a->vd | a->vm) & a->q) { - return false; - } - - if (!widenfn) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - for (pass = 0; pass < a->q + 1; pass++) { - TCGv_i32 tmp; - TCGv_i64 rm0_64, rm1_64, rd_64; - - rm0_64 = tcg_temp_new_i64(); - rm1_64 = tcg_temp_new_i64(); - rd_64 = tcg_temp_new_i64(); - - tmp = tcg_temp_new_i32(); - read_neon_element32(tmp, a->vm, pass * 2, MO_32); - widenfn(rm0_64, tmp); - read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); - widenfn(rm1_64, tmp); - - opfn(rd_64, rm0_64, rm1_64); - - if (accfn) { - TCGv_i64 tmp64 = tcg_temp_new_i64(); - read_neon_element64(tmp64, a->vd, pass, MO_64); - accfn(rd_64, tmp64, rd_64); - } - write_neon_element64(rd_64, a->vd, pass, MO_64); - } - return true; -} - -static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) -{ - static NeonGenWidenFn * const widenfn[] = { - gen_helper_neon_widen_s8, - gen_helper_neon_widen_s16, - tcg_gen_ext_i32_i64, - NULL, - }; - static NeonGenTwo64OpFn * const opfn[] = { - gen_helper_neon_paddl_u16, - gen_helper_neon_paddl_u32, - tcg_gen_add_i64, - NULL, - }; - - return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); -} - -static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) -{ - static NeonGenWidenFn * const widenfn[] = { - gen_helper_neon_widen_u8, - gen_helper_neon_widen_u16, - tcg_gen_extu_i32_i64, - NULL, - }; - static NeonGenTwo64OpFn * const opfn[] = { - gen_helper_neon_paddl_u16, - gen_helper_neon_paddl_u32, - tcg_gen_add_i64, - NULL, - }; - - return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); -} - -static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) -{ - static NeonGenWidenFn * const widenfn[] = { - gen_helper_neon_widen_s8, - gen_helper_neon_widen_s16, - tcg_gen_ext_i32_i64, - NULL, - }; - static NeonGenTwo64OpFn * const opfn[] = { - gen_helper_neon_paddl_u16, - gen_helper_neon_paddl_u32, - tcg_gen_add_i64, - NULL, - }; - static NeonGenTwo64OpFn * const accfn[] = { - gen_helper_neon_addl_u16, - gen_helper_neon_addl_u32, - tcg_gen_add_i64, - NULL, - }; - - return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], - accfn[a->size]); -} - -static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) -{ - static NeonGenWidenFn * const widenfn[] = { - gen_helper_neon_widen_u8, - gen_helper_neon_widen_u16, - tcg_gen_extu_i32_i64, - NULL, - }; - static NeonGenTwo64OpFn * const opfn[] = { - gen_helper_neon_paddl_u16, - gen_helper_neon_paddl_u32, - tcg_gen_add_i64, - NULL, - }; - static NeonGenTwo64OpFn * const accfn[] = { - gen_helper_neon_addl_u16, - gen_helper_neon_addl_u32, - tcg_gen_add_i64, - NULL, - }; - - return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], - accfn[a->size]); -} - typedef void ZipFn(TCGv_ptr, TCGv_ptr); static bool do_zip_uzp(DisasContext *s, arg_2misc *a, @@ -2931,10 +2638,9 @@ static bool trans_VZIP(DisasContext *s, arg_2misc *a) } static bool do_vmovn(DisasContext *s, arg_2misc *a, - NeonGenNarrowEnvFn *narrowfn) + NeonGenOne64OpEnvFn *narrowfn) { - TCGv_i64 rm; - TCGv_i32 rd0, rd1; + TCGv_i64 rm, rd0, rd1; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -2959,22 +2665,22 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a, } rm = tcg_temp_new_i64(); - rd0 = tcg_temp_new_i32(); - rd1 = tcg_temp_new_i32(); + rd0 = tcg_temp_new_i64(); + rd1 = tcg_temp_new_i64(); read_neon_element64(rm, a->vm, 0, MO_64); narrowfn(rd0, tcg_env, rm); read_neon_element64(rm, a->vm, 1, MO_64); narrowfn(rd1, tcg_env, rm); - write_neon_element32(rd0, a->vd, 0, MO_32); - write_neon_element32(rd1, a->vd, 1, MO_32); + write_neon_element64(rd0, a->vd, 0, MO_32); + write_neon_element64(rd1, a->vd, 1, MO_32); return true; } #define DO_VMOVN(INSN, FUNC) \ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ { \ - static NeonGenNarrowEnvFn * const narrowfn[] = { \ + static NeonGenOne64OpEnvFn * const narrowfn[] = { \ FUNC##8, \ FUNC##16, \ FUNC##32, \ @@ -3216,6 +2922,13 @@ DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) DO_2MISC_VEC(VCLE0, gen_gvec_cle0) DO_2MISC_VEC(VCGE0, gen_gvec_cge0) DO_2MISC_VEC(VCLT0, gen_gvec_clt0) +DO_2MISC_VEC(VCLS, gen_gvec_cls) +DO_2MISC_VEC(VCLZ, gen_gvec_clz) +DO_2MISC_VEC(VREV64, gen_gvec_rev64) +DO_2MISC_VEC(VPADDL_S, gen_gvec_saddlp) +DO_2MISC_VEC(VPADDL_U, gen_gvec_uaddlp) +DO_2MISC_VEC(VPADAL_S, gen_gvec_sadalp) +DO_2MISC_VEC(VPADAL_U, gen_gvec_uadalp) static bool trans_VMVN(DisasContext *s, arg_2misc *a) { @@ -3225,6 +2938,30 @@ static bool trans_VMVN(DisasContext *s, arg_2misc *a) return do_2misc_vec(s, a, tcg_gen_gvec_not); } +static bool trans_VCNT(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc_vec(s, a, gen_gvec_cnt); +} + +static bool trans_VREV16(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc_vec(s, a, gen_gvec_rev16); +} + +static bool trans_VREV32(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0 && a->size != 1) { + return false; + } + return do_2misc_vec(s, a, gen_gvec_rev32); +} + #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ uint32_t rm_ofs, uint32_t oprsz, \ @@ -3304,68 +3041,6 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) return true; } -static bool trans_VREV32(DisasContext *s, arg_2misc *a) -{ - static NeonGenOneOpFn * const fn[] = { - tcg_gen_bswap32_i32, - gen_swap_half, - NULL, - NULL, - }; - return do_2misc(s, a, fn[a->size]); -} - -static bool trans_VREV16(DisasContext *s, arg_2misc *a) -{ - if (a->size != 0) { - return false; - } - return do_2misc(s, a, gen_rev16); -} - -static bool trans_VCLS(DisasContext *s, arg_2misc *a) -{ - static NeonGenOneOpFn * const fn[] = { - gen_helper_neon_cls_s8, - gen_helper_neon_cls_s16, - gen_helper_neon_cls_s32, - NULL, - }; - return do_2misc(s, a, fn[a->size]); -} - -static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) -{ - tcg_gen_clzi_i32(rd, rm, 32); -} - -static bool trans_VCLZ(DisasContext *s, arg_2misc *a) -{ - static NeonGenOneOpFn * const fn[] = { - gen_helper_neon_clz_u8, - gen_helper_neon_clz_u16, - do_VCLZ_32, - NULL, - }; - return do_2misc(s, a, fn[a->size]); -} - -static bool trans_VCNT(DisasContext *s, arg_2misc *a) -{ - if (a->size != 0) { - return false; - } - return do_2misc(s, a, gen_helper_neon_cnt_u8); -} - -static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - uint32_t oprsz, uint32_t maxsz) -{ - tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, - vece == MO_16 ? 0x7fff : 0x7fffffff, - oprsz, maxsz); -} - static bool trans_VABS_F(DisasContext *s, arg_2misc *a) { if (a->size == MO_16) { @@ -3375,15 +3050,7 @@ static bool trans_VABS_F(DisasContext *s, arg_2misc *a) } else if (a->size != MO_32) { return false; } - return do_2misc_vec(s, a, gen_VABS_F); -} - -static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - uint32_t oprsz, uint32_t maxsz) -{ - tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, - vece == MO_16 ? 0x8000 : 0x80000000, - oprsz, maxsz); + return do_2misc_vec(s, a, gen_gvec_fabs); } static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) @@ -3395,7 +3062,7 @@ static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) } else if (a->size != MO_32) { return false; } - return do_2misc_vec(s, a, gen_VNEG_F); + return do_2misc_vec(s, a, gen_gvec_fneg); } static bool trans_VRECPE(DisasContext *s, arg_2misc *a) @@ -3403,7 +3070,7 @@ static bool trans_VRECPE(DisasContext *s, arg_2misc *a) if (a->size != 2) { return false; } - return do_2misc(s, a, gen_helper_recpe_u32); + return do_2misc_vec(s, a, gen_gvec_urecpe); } static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) @@ -3411,7 +3078,7 @@ static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) if (a->size != 2) { return false; } - return do_2misc(s, a, gen_helper_rsqrte_u32); + return do_2misc_vec(s, a, gen_gvec_ursqrte); } #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ @@ -3699,8 +3366,8 @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) if (!dc_isar_feature(aa32_bf16, s)) { return false; } - return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_bfmmla); + return do_neon_ddda_env(s, 7, a->vd, a->vn, a->vm, 0, + gen_helper_gvec_bfmmla); } static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index 46c7fce..fcbb350 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, /* Prepare a power-of-two modulo via extraction of @len bits. */ len = ctz32(streaming_vec_reg_size(s)) - esz; - if (vertical) { + if (!len) { + /* + * SVL is 128 and the element size is 128. There is exactly + * one 128x128 tile in the ZA storage, and so we calculate + * (Rs + imm) MOD 1, which is always 0. We need to special case + * this because TCG doesn't allow deposit ops with len 0. + */ + tcg_gen_movi_i32(tmp, 0); + } else if (vertical) { /* * Compute the byte offset of the index within the tile: * (index % (svl / size)) * size @@ -304,6 +312,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, } static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, + ARMFPStatusFlavour e_fpst, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); @@ -319,18 +328,41 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); pm = pred_full_reg_ptr(s, a->pm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(e_fpst); fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); return true; } -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) -TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) -TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) +static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, + gen_helper_gvec_5_ptr *fn) +{ + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->sub); + TCGv_ptr za, zn, zm, pn, pm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + za = get_tile(s, esz, a->zad); + zn = vec_full_reg_ptr(s, a->zn); + zm = vec_full_reg_ptr(s, a->zm); + pn = pred_full_reg_ptr(s, a->pn); + pm = pred_full_reg_ptr(s, a->pm); + + fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc)); + return true; +} + +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, + MO_32, gen_helper_sme_fmopa_h) +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, + MO_32, FPST_A64, gen_helper_sme_fmopa_s) +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, + MO_64, FPST_A64, gen_helper_sme_fmopa_d) -/* TODO: FEAT_EBF16 */ -TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) +TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa) TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 798ab2b..f3cf028 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x) static int tszimm_shr(DisasContext *s, int x) { - return (16 << tszimm_esz(s, x)) - x; + /* + * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the + * trans function will check for esz < 0), so we can return any + * value we like from here in that case as long as we avoid UB. + */ + int esz = tszimm_esz(s, x); + if (esz < 0) { + return esz; + } + return (16 << esz) - x; } /* See e.g. LSL (immediate, predicated). */ static int tszimm_shl(DisasContext *s, int x) { - return x - (8 << tszimm_esz(s, x)); + /* As with tszimm_shr(), value will be unused if esz < 0 */ + int esz = tszimm_esz(s, x); + if (esz < 0) { + return esz; + } + return x - (8 << esz); } /* The SH bit is in bit 8. Extract the low 8 and shift. */ @@ -123,11 +137,11 @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, return true; } -static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, - arg_rr_esz *a, int data) +static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, + arg_rr_esz *a, int data) { return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + select_ah_fpst(s, a->esz)); } /* Invoke an out-of-line helper on 3 Zregs. */ @@ -177,7 +191,14 @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, arg_rrr_esz *a, int data) { return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + +static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, + arg_rrr_esz *a, int data) +{ + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, + select_ah_fpst(s, a->esz)); } /* Invoke an out-of-line helper on 4 Zregs. */ @@ -238,6 +259,25 @@ static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, return ret; } +static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + int rd, int rn, int rm, int ra, + int data) +{ + return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env); +} + +static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + arg_rrrr_esz *a, int data) +{ + return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); +} + +static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + arg_rrxr_esz *a) +{ + return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); +} + /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, int rd, int rn, int rm, int ra, int pg, @@ -364,7 +404,7 @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, arg_rprr_esz *a) { return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } /* Invoke a vector expander on two Zregs and an immediate. */ @@ -563,14 +603,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m, TCGv_vec k) { - if (TCG_TARGET_HAS_bitsel_vec) { - tcg_gen_not_vec(vece, n, n); - tcg_gen_bitsel_vec(vece, d, k, n, m); - } else { - tcg_gen_andc_vec(vece, n, k, n); - tcg_gen_andc_vec(vece, m, m, k); - tcg_gen_or_vec(vece, d, n, m); - } + tcg_gen_not_vec(vece, n, n); + tcg_gen_bitsel_vec(vece, d, k, n, m); } static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, @@ -595,7 +629,7 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) * = | ~(m | k) */ tcg_gen_and_i64(n, n, k); - if (TCG_TARGET_HAS_orc_i64) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { tcg_gen_or_i64(m, m, k); tcg_gen_orc_i64(d, n, m); } else { @@ -607,14 +641,8 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m, TCGv_vec k) { - if (TCG_TARGET_HAS_bitsel_vec) { - tcg_gen_not_vec(vece, m, m); - tcg_gen_bitsel_vec(vece, d, k, n, m); - } else { - tcg_gen_and_vec(vece, n, n, k); - tcg_gen_or_vec(vece, m, m, k); - tcg_gen_orc_vec(vece, d, n, m); - } + tcg_gen_not_vec(vece, m, m); + tcg_gen_bitsel_vec(vece, d, k, n, m); } static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, @@ -755,13 +783,23 @@ static gen_helper_gvec_3 * const fabs_fns[4] = { NULL, gen_helper_sve_fabs_h, gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, }; -TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) +static gen_helper_gvec_3 * const fabs_ah_fns[4] = { + NULL, gen_helper_sve_ah_fabs_h, + gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d, +}; +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, + s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0) static gen_helper_gvec_3 * const fneg_fns[4] = { NULL, gen_helper_sve_fneg_h, gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, }; -TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) +static gen_helper_gvec_3 * const fneg_ah_fns[4] = { + NULL, gen_helper_sve_ah_fneg_h, + gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d, +}; +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, + s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0) static gen_helper_gvec_3 * const sxtb_fns[4] = { NULL, gen_helper_sve_sxtb_h, @@ -1200,14 +1238,14 @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, }; TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, - fexpa_fns[a->esz], a->rd, a->rn, 0) + fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah) static gen_helper_gvec_3 * const ftssel_fns[4] = { NULL, gen_helper_sve_ftssel_h, gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, }; TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, - ftssel_fns[a->esz], a, 0) + ftssel_fns[a->esz], a, s->fpcr_ah) /* *** SVE Predicate Logical Operations Group @@ -3486,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) *** SVE Floating Point Multiply-Add Indexed Group */ -static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) -{ - static gen_helper_gvec_4_ptr * const fns[4] = { - NULL, - gen_helper_gvec_fmla_idx_h, - gen_helper_gvec_fmla_idx_s, - gen_helper_gvec_fmla_idx_d, - }; - return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, - (a->index << 1) | sub, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); -} +static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { + NULL, gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d +}; +TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, + fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) -TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) -TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) +static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { + { NULL, NULL }, + { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, + { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, + { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, +}; +TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, + fmls_idx_fns[a->esz][s->fpcr_ah], + a->rd, a->rn, a->rm, a->ra, a->index, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) /* *** SVE Floating Point Multiply Indexed Group @@ -3512,7 +3553,7 @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { }; TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) /* *** SVE Floating Point Fast Reduction Group @@ -3545,7 +3586,7 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); fn(temp, t_zn, t_pg, status, t_desc); @@ -3560,11 +3601,23 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a, }; \ TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) +#define DO_VPZ_AH(NAME, name) \ + static gen_helper_fp_reduce * const name##_fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ + }; \ + static gen_helper_fp_reduce * const name##_ah_fns[4] = { \ + NULL, gen_helper_sve_ah_##name##_h, \ + gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \ + }; \ + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) + DO_VPZ(FADDV, faddv) DO_VPZ(FMINNMV, fminnmv) DO_VPZ(FMAXNMV, fmaxnmv) -DO_VPZ(FMINV, fminv) -DO_VPZ(FMAXV, fmaxv) +DO_VPZ_AH(FMINV, fminv) +DO_VPZ_AH(FMAXV, fmaxv) #undef DO_VPZ @@ -3576,13 +3629,25 @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = { NULL, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, }; -TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) +static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = { + NULL, gen_helper_gvec_frecpe_h, + gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d, +}; +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0) static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { NULL, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, }; -TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) +static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = { + NULL, gen_helper_gvec_frsqrte_h, + gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d, +}; +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0) /* *** SVE Floating Point Compare with Zero Group @@ -3597,7 +3662,7 @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); TCGv_ptr status = - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), @@ -3632,8 +3697,9 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, }; TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + ftmad_fns[a->esz], a->rd, a->rn, a->rm, + a->imm | (s->fpcr_ah << 3), + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) /* *** SVE Floating Point Accumulating Reduction Group @@ -3666,7 +3732,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); - t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); @@ -3686,11 +3752,23 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) }; \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) +#define DO_FP3_AH(NAME, name) \ + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ + NULL, gen_helper_gvec_##name##_h, \ + gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ + }; \ + static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \ + NULL, gen_helper_gvec_ah_##name##_h, \ + gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \ + }; \ + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0) + DO_FP3(FADD_zzz, fadd) DO_FP3(FSUB_zzz, fsub) DO_FP3(FMUL_zzz, fmul) -DO_FP3(FRECPS, recps) -DO_FP3(FRSQRTS, rsqrts) +DO_FP3_AH(FRECPS, recps) +DO_FP3_AH(FRSQRTS, rsqrts) #undef DO_FP3 @@ -3712,14 +3790,27 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, }; \ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) +#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + NULL, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ + NULL, gen_helper_##ah_name##_h, \ + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ + name##_zpzz_fns[a->esz], a) + DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) -DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) -DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) +DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) +DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) -DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) +DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) @@ -3741,7 +3832,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); - status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); + status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); fn(t_zd, t_zn, t_pg, scalar, status, desc); } @@ -3774,14 +3865,35 @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ name##_const[a->esz][a->imm], name##_fns[a->esz]) +#define DO_FP_AH_IMM(NAME, name, const0, const1) \ + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, \ + gen_helper_sve_##name##_d \ + }; \ + static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \ + NULL, gen_helper_sve_ah_##name##_h, \ + gen_helper_sve_ah_##name##_s, \ + gen_helper_sve_ah_##name##_d \ + }; \ + static uint64_t const name##_const[4][2] = { \ + { -1, -1 }, \ + { float16_##const0, float16_##const1 }, \ + { float32_##const0, float32_##const1 }, \ + { float64_##const0, float64_##const1 }, \ + }; \ + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ + name##_const[a->esz][a->imm], \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) + DO_FP_IMM(FADD, fadds, half, one) DO_FP_IMM(FSUB, fsubs, half, one) DO_FP_IMM(FMUL, fmuls, half, two) DO_FP_IMM(FSUBR, fsubrs, half, one) DO_FP_IMM(FMAXNM, fmaxnms, zero, one) DO_FP_IMM(FMINNM, fminnms, zero, one) -DO_FP_IMM(FMAX, fmaxs, zero, one) -DO_FP_IMM(FMIN, fmins, zero, one) +DO_FP_AH_IMM(FMAX, fmaxs, zero, one) +DO_FP_AH_IMM(FMIN, fmins, zero, one) #undef DO_FP_IMM @@ -3793,7 +3905,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vec_full_reg_offset(s, a->rm), @@ -3825,22 +3937,28 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = { gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, }; TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], - a->rd, a->rn, a->rm, a->pg, a->rot, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) -#define DO_FMLA(NAME, name) \ +#define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ + static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ + NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) -DO_FMLA(FMLA_zpzzz, fmla_zpzzz) -DO_FMLA(FMLS_zpzzz, fmls_zpzzz) -DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) -DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) +/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ +DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) +DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz) +DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz) +DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz) #undef DO_FMLA @@ -3849,67 +3967,68 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, }; TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], - a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2), + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL }; TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) /* *** SVE Floating Point Unary Operations Predicated Group */ TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_sh, a, 0, FPST_A64) TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16) TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, - gen_helper_sve_bfcvt, a, 0, FPST_FPCR) + gen_helper_sve_bfcvt, a, 0, + s->fpcr_ah ? FPST_AH : FPST_A64) TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_dh, a, 0, FPST_A64) TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16) TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_ds, a, 0, FPST_A64) TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) + gen_helper_sve_fcvt_sd, a, 0, FPST_A64) TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) + gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16) TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64) TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64) TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64) TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64) TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64) TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64) TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64) TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) + gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64) static gen_helper_gvec_3_ptr * const frint_fns[] = { NULL, @@ -3918,7 +4037,7 @@ static gen_helper_gvec_3_ptr * const frint_fns[] = { gen_helper_sve_frint_d }; TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], - a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) static gen_helper_gvec_3_ptr * const frintx_fns[] = { NULL, @@ -3927,7 +4046,7 @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = { gen_helper_sve_frintx_d }; TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], - a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) @@ -3944,7 +4063,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, } vsz = vec_full_reg_size(s); - status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); tmode = gen_set_rmode(mode, status); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), @@ -3972,48 +4091,48 @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = { gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, }; TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], - a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a, 0, select_ah_fpst(s, a->esz)) static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { NULL, gen_helper_sve_fsqrt_h, gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, }; TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], - a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) + gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16) TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) + gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16) TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) + gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16) TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) + gen_helper_sve_scvt_ss, a, 0, FPST_A64) TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) + gen_helper_sve_scvt_ds, a, 0, FPST_A64) TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) + gen_helper_sve_scvt_sd, a, 0, FPST_A64) TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) + gen_helper_sve_scvt_dd, a, 0, FPST_A64) TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) + gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16) TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) + gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16) TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) + gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16) TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) + gen_helper_sve_ucvt_ss, a, 0, FPST_A64) TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) + gen_helper_sve_ucvt_ds, a, 0, FPST_A64) TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) + gen_helper_sve_ucvt_sd, a, 0, FPST_A64) TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, - gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) + gen_helper_sve_ucvt_dd, a, 0, FPST_A64) /* *** SVE Memory - 32-bit Gather and Unsized Contiguous Group @@ -6048,9 +6167,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) if (top) { if (shl == halfbits) { - TCGv_vec t = tcg_temp_new_vec_matching(d); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); - tcg_gen_and_vec(vece, d, n, t); + tcg_gen_and_vec(vece, d, n, + tcg_constant_vec_matching(d, vece, + MAKE_64BIT_MASK(halfbits, halfbits))); } else { tcg_gen_sari_vec(vece, d, n, halfbits); tcg_gen_shli_vec(vece, d, d, shl); @@ -6105,18 +6224,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) if (top) { if (shl == halfbits) { - TCGv_vec t = tcg_temp_new_vec_matching(d); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); - tcg_gen_and_vec(vece, d, n, t); + tcg_gen_and_vec(vece, d, n, + tcg_constant_vec_matching(d, vece, + MAKE_64BIT_MASK(halfbits, halfbits))); } else { tcg_gen_shri_vec(vece, d, n, halfbits); tcg_gen_shli_vec(vece, d, d, shl); } } else { if (shl == 0) { - TCGv_vec t = tcg_temp_new_vec_matching(d); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_and_vec(vece, d, n, t); + tcg_gen_and_vec(vece, d, n, + tcg_constant_vec_matching(d, vece, + MAKE_64BIT_MASK(0, halfbits))); } else { tcg_gen_shli_vec(vece, d, n, halfbits); tcg_gen_shri_vec(vece, d, d, halfbits - shl); @@ -6284,18 +6403,14 @@ static const TCGOpcode sqxtn_list[] = { static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t mask = (1ull << halfbits) - 1; int64_t min = -1ull << (halfbits - 1); int64_t max = -min - 1; - tcg_gen_dupi_vec(vece, t, min); - tcg_gen_smax_vec(vece, d, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_smin_vec(vece, d, d, t); - tcg_gen_dupi_vec(vece, t, mask); - tcg_gen_and_vec(vece, d, d, t); + tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min)); + tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); + tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask)); } static const GVecGen2 sqxtnb_ops[3] = { @@ -6316,19 +6431,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t mask = (1ull << halfbits) - 1; int64_t min = -1ull << (halfbits - 1); int64_t max = -min - 1; - tcg_gen_dupi_vec(vece, t, min); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_smin_vec(vece, n, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); + tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_dupi_vec(vece, t, mask); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); } static const GVecGen2 sqxtnt_ops[3] = { @@ -6356,12 +6467,10 @@ static const TCGOpcode uqxtn_list[] = { static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = (1ull << halfbits) - 1; - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_umin_vec(vece, d, n, t); + tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); } static const GVecGen2 uqxtnb_ops[3] = { @@ -6382,14 +6491,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = (1ull << halfbits) - 1; + TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_umin_vec(vece, n, n, t); + tcg_gen_umin_vec(vece, n, n, maxv); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, maxv, d, n); } static const GVecGen2 uqxtnt_ops[3] = { @@ -6417,14 +6525,11 @@ static const TCGOpcode sqxtun_list[] = { static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = (1ull << halfbits) - 1; - tcg_gen_dupi_vec(vece, t, 0); - tcg_gen_smax_vec(vece, d, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_umin_vec(vece, d, d, t); + tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0)); + tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max)); } static const GVecGen2 sqxtunb_ops[3] = { @@ -6445,16 +6550,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = (1ull << halfbits) - 1; + TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); - tcg_gen_dupi_vec(vece, t, 0); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_umin_vec(vece, n, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); + tcg_gen_umin_vec(vece, n, n, maxv); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, maxv, d, n); } static const GVecGen2 sqxtunt_ops[3] = { @@ -6518,13 +6621,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; uint64_t mask = MAKE_64BIT_MASK(0, halfbits); tcg_gen_shri_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, mask); - tcg_gen_and_vec(vece, d, n, t); + tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); } static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; @@ -6576,13 +6677,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; uint64_t mask = MAKE_64BIT_MASK(0, halfbits); tcg_gen_shli_vec(vece, n, n, halfbits - shr); - tcg_gen_dupi_vec(vece, t, mask); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); } static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; @@ -6625,14 +6724,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; + uint64_t max = MAKE_64BIT_MASK(0, halfbits); tcg_gen_sari_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, 0); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_umin_vec(vece, d, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); + tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); } static const TCGOpcode sqshrunb_vec_list[] = { @@ -6657,16 +6754,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; + uint64_t max = MAKE_64BIT_MASK(0, halfbits); + TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); tcg_gen_sari_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, 0); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_umin_vec(vece, n, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0)); + tcg_gen_umin_vec(vece, n, n, maxv); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, maxv, d, n); } static const TCGOpcode sqshrunt_vec_list[] = { @@ -6709,18 +6805,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); int64_t min = -max - 1; + int64_t mask = MAKE_64BIT_MASK(0, halfbits); tcg_gen_sari_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, min); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_smin_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_and_vec(vece, d, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); + tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); + tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask)); } static const TCGOpcode sqshrnb_vec_list[] = { @@ -6745,19 +6838,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); int64_t min = -max - 1; + int64_t mask = MAKE_64BIT_MASK(0, halfbits); tcg_gen_sari_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, min); - tcg_gen_smax_vec(vece, n, n, t); - tcg_gen_dupi_vec(vece, t, max); - tcg_gen_smin_vec(vece, n, n, t); + tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min)); + tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max)); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n); } static const TCGOpcode sqshrnt_vec_list[] = { @@ -6800,12 +6890,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; + int64_t max = MAKE_64BIT_MASK(0, halfbits); tcg_gen_shri_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_umin_vec(vece, d, n, t); + tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max)); } static const TCGOpcode uqshrnb_vec_list[] = { @@ -6830,14 +6919,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) { - TCGv_vec t = tcg_temp_new_vec_matching(d); int halfbits = 4 << vece; + int64_t max = MAKE_64BIT_MASK(0, halfbits); + TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max); tcg_gen_shri_vec(vece, n, n, shr); - tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); - tcg_gen_umin_vec(vece, n, n, t); + tcg_gen_umin_vec(vece, n, n, maxv); tcg_gen_shli_vec(vece, n, n, halfbits); - tcg_gen_bitsel_vec(vece, d, t, d, n); + tcg_gen_bitsel_vec(vece, d, maxv, d, n); } static const TCGOpcode uqshrnt_vec_list[] = { @@ -6925,10 +7014,10 @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, - 0, FPST_FPCR) + 0, FPST_A64) TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, - 0, FPST_FPCR) + 0, FPST_A64) static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { NULL, gen_helper_sve2_sqdmlal_zzzw_h, @@ -7044,17 +7133,18 @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a) TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, - gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) + gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64) TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, - gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) + gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64) TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, - gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) + gen_helper_sve_bfcvtnt, a, 0, + s->fpcr_ah ? FPST_AH : FPST_A64) TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, - gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) + gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64) TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, - gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) + gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64) TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, FPROUNDING_ODD, gen_helper_sve_fcvt_ds) @@ -7066,7 +7156,7 @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = { gen_helper_flogb_s, gen_helper_flogb_d }; TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], - a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) { @@ -7099,18 +7189,19 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, gen_helper_gvec_ummla_b, a, 0) -TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, +TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, gen_helper_gvec_bfdot, a, 0) -TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, +TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, gen_helper_gvec_bfdot_idx, a) -TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, +TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz, gen_helper_gvec_bfmmla, a, 0) static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) { return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, - a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); + a->rd, a->rn, a->rm, a->ra, sel, + s->fpcr_ah ? FPST_AH : FPST_A64); } TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) @@ -7120,7 +7211,8 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) { return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, a->rd, a->rn, a->rm, a->ra, - (a->index << 1) | sel, FPST_FPCR); + (a->index << 1) | sel, + s->fpcr_ah ? FPST_AH : FPST_A64); } TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c index 39ec971..8d9d1ab 100644 --- a/target/arm/tcg/translate-vfp.c +++ b/target/arm/tcg/translate-vfp.c @@ -460,9 +460,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) } if (sz == 1) { - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); } else { - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); } tcg_rmode = gen_set_rmode(rounding, fpst); @@ -527,9 +527,9 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) } if (sz == 1) { - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); } else { - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); } tcg_shift = tcg_constant_i32(0); @@ -833,8 +833,8 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) break; case ARM_VFP_FPSCR: if (a->rt == 15) { - tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); - tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); + tmp = load_cpu_field_low32(vfp.fpsr); + tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK); } else { tmp = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(tmp, tcg_env); @@ -1398,7 +1398,7 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, f0 = tcg_temp_new_i32(); f1 = tcg_temp_new_i32(); fd = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); vfp_load_reg32(f0, vn); vfp_load_reg32(f1, vm); @@ -1433,7 +1433,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, /* * Do a half-precision operation. Functionally this is * the same as do_vfp_3op_sp(), except: - * - it uses the FPST_FPCR_F16 + * - it uses the FPST_A32_F16 * - it doesn't need the VFP vector handling (fp16 is a * v8 feature, and in v8 VFP vectors don't exist) * - it does the aa32_fp16_arith feature test @@ -1456,7 +1456,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, f0 = tcg_temp_new_i32(); f1 = tcg_temp_new_i32(); fd = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); vfp_load_reg16(f0, vn); vfp_load_reg16(f1, vm); @@ -1517,7 +1517,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, f0 = tcg_temp_new_i64(); f1 = tcg_temp_new_i64(); fd = tcg_temp_new_i64(); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); vfp_load_reg64(f0, vn); vfp_load_reg64(f1, vm); @@ -2122,7 +2122,7 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) /* VFNMA, VFNMS */ gen_vfp_negh(vd, vd); } - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); vfp_store_reg32(vd, a->vd); return true; @@ -2181,7 +2181,7 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) /* VFNMA, VFNMS */ gen_vfp_negs(vd, vd); } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); vfp_store_reg32(vd, a->vd); return true; @@ -2190,8 +2190,8 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) { /* - * VFNMA : fd = muladd(-fd, fn, fm) - * VFNMS : fd = muladd(-fd, -fn, fm) + * VFNMA : fd = muladd(-fd, -fn, fm) + * VFNMS : fd = muladd(-fd, fn, fm) * VFMA : fd = muladd( fd, fn, fm) * VFMS : fd = muladd( fd, -fn, fm) * @@ -2246,7 +2246,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) /* VFNMA, VFNMS */ gen_vfp_negd(vd, vd); } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); vfp_store_reg64(vd, a->vd); return true; @@ -2262,8 +2262,8 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) #define MAKE_VFM_TRANS_FNS(PREC) \ MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \ MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \ - MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ - MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) + MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, false, true) \ + MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, true, true) MAKE_VFM_TRANS_FNS(hp) MAKE_VFM_TRANS_FNS(sp) @@ -2424,17 +2424,17 @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { - gen_helper_vfp_sqrth(vd, vm, tcg_env); + gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16)); } static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) { - gen_helper_vfp_sqrts(vd, vm, tcg_env); + gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32)); } static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) { - gen_helper_vfp_sqrtd(vd, vm, tcg_env); + gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32)); } DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) @@ -2565,7 +2565,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ @@ -2599,7 +2599,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ @@ -2623,7 +2623,7 @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); @@ -2646,7 +2646,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); @@ -2680,7 +2680,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); @@ -2706,7 +2706,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg16(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); gen_helper_rinth(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); return true; @@ -2727,7 +2727,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_rints(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); return true; @@ -2757,7 +2757,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) tmp = tcg_temp_new_i64(); vfp_load_reg64(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_rintd(tmp, tmp, fpst); vfp_store_reg64(tmp, a->vd); return true; @@ -2779,7 +2779,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg16(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rinth(tmp, tmp, fpst); gen_restore_rmode(tcg_rmode, fpst); @@ -2803,7 +2803,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rints(tmp, tmp, fpst); gen_restore_rmode(tcg_rmode, fpst); @@ -2836,7 +2836,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) tmp = tcg_temp_new_i64(); vfp_load_reg64(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rintd(tmp, tmp, fpst); gen_restore_rmode(tcg_rmode, fpst); @@ -2859,7 +2859,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg16(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); gen_helper_rinth_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); return true; @@ -2880,7 +2880,7 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_rints_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); return true; @@ -2910,7 +2910,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) tmp = tcg_temp_new_i64(); vfp_load_reg64(tmp, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); gen_helper_rintd_exact(tmp, tmp, fpst); vfp_store_reg64(tmp, a->vd); return true; @@ -2937,7 +2937,7 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); vfp_load_reg32(vm, a->vm); - gen_helper_vfp_fcvtds(vd, vm, tcg_env); + gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32)); vfp_store_reg64(vd, a->vd); return true; } @@ -2963,7 +2963,7 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); vfp_load_reg64(vm, a->vm); - gen_helper_vfp_fcvtsd(vd, vm, tcg_env); + gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32)); vfp_store_reg32(vd, a->vd); return true; } @@ -2983,7 +2983,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) vm = tcg_temp_new_i32(); vfp_load_reg32(vm, a->vm); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); if (a->s) { /* i32 -> f16 */ gen_helper_vfp_sitoh(vm, vm, fpst); @@ -3010,7 +3010,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) vm = tcg_temp_new_i32(); vfp_load_reg32(vm, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); if (a->s) { /* i32 -> f32 */ gen_helper_vfp_sitos(vm, vm, fpst); @@ -3044,7 +3044,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); vfp_load_reg32(vm, a->vm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); if (a->s) { /* i32 -> f64 */ gen_helper_vfp_sitod(vd, vm, fpst); @@ -3105,7 +3105,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) vd = tcg_temp_new_i32(); vfp_load_reg32(vd, a->vd); - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ @@ -3161,7 +3161,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) vd = tcg_temp_new_i32(); vfp_load_reg32(vd, a->vd); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ @@ -3223,7 +3223,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) vd = tcg_temp_new_i64(); vfp_load_reg64(vd, a->vd); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ @@ -3273,7 +3273,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR_F16); + fpst = fpstatus_ptr(FPST_A32_F16); vm = tcg_temp_new_i32(); vfp_load_reg16(vm, a->vm); @@ -3307,7 +3307,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); vm = tcg_temp_new_i32(); vfp_load_reg32(vm, a->vm); @@ -3347,7 +3347,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) return true; } - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(FPST_A32); vm = tcg_temp_new_i64(); vd = tcg_temp_new_i32(); vfp_load_reg64(vm, a->vm); diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index c5bc691..f7d6d8c 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -27,6 +27,7 @@ #include "semihosting/semihost.h" #include "cpregs.h" #include "exec/helper-proto.h" +#include "exec/target_page.h" #define HELPER_H "helper.h" #include "exec/helper-info.c.inc" @@ -228,6 +229,9 @@ static inline int get_a32_user_mem_index(DisasContext *s) */ switch (s->mmu_idx) { case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: + return arm_to_core_mmu_idx(ARMMMUIdx_E30_0); case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */ case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: @@ -368,7 +372,7 @@ static void gen_rebuild_hflags(DisasContext *s, bool new_el) } } -static void gen_exception_internal(int excp) +void gen_exception_internal(int excp) { assert(excp_is_internal(excp)); gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); @@ -490,20 +494,9 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp = tcg_temp_new_i32(); - if (TCG_TARGET_HAS_add2_i32) { - tcg_gen_movi_i32(tmp, 0); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp); - } else { - TCGv_i64 q0 = tcg_temp_new_i64(); - TCGv_i64 q1 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(q0, t0); - tcg_gen_extu_i32_i64(q1, t1); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extu_i32_i64(q1, cpu_CF); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); - } + + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0, t1, cpu_CF); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); @@ -3507,7 +3500,7 @@ static int t32_expandimm_rot(DisasContext *s, int x) /* Return the unrotated immediate from T32ExpandImm. */ static int t32_expandimm_imm(DisasContext *s, int x) { - int imm = extract32(x, 0, 8); + uint32_t imm = extract32(x, 0, 8); switch (extract32(x, 8, 4)) { case 0: /* XY */ @@ -4938,7 +4931,7 @@ static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a) } static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a, - TCGv_i32 addr, int address_offset) + TCGv_i32 addr) { if (!a->p) { TCGv_i32 ofs = load_reg(s, a->rm); @@ -4951,7 +4944,6 @@ static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a, } else if (!a->w) { return; } - tcg_gen_addi_i32(addr, addr, address_offset); store_reg(s, a->rn, addr); } @@ -4971,7 +4963,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a, * Perform base writeback before the loaded value to * ensure correct behavior with overlapping index registers. */ - op_addr_rr_post(s, a, addr, 0); + op_addr_rr_post(s, a, addr); store_reg_from_load(s, a->rt, tmp); return true; } @@ -4996,14 +4988,53 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a, gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); - op_addr_rr_post(s, a, addr, 0); + op_addr_rr_post(s, a, addr); return true; } -static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) +static void do_ldrd_load(DisasContext *s, TCGv_i32 addr, int rt, int rt2) { + /* + * LDRD is required to be an atomic 64-bit access if the + * address is 8-aligned, two atomic 32-bit accesses if + * it's only 4-aligned, and to give an alignment fault + * if it's not 4-aligned. This is MO_ALIGN_4 | MO_ATOM_SUBALIGN. + * Rt is always the word from the lower address, and Rt2 the + * data from the higher address, regardless of endianness. + * So (like gen_load_exclusive) we avoid gen_aa32_ld_i64() + * so we don't get its SCTLR_B check, and instead do a 64-bit access + * using MO_BE if appropriate and then split the two halves. + * + * For M-profile, and for A-profile before LPAE, the 64-bit + * atomicity is not required. We could model that using + * the looser MO_ATOM_IFALIGN_PAIR, but providing a higher + * level of atomicity than required is harmless (we would not + * currently generate better code for IFALIGN_PAIR here). + * + * This also gives us the correct behaviour of not updating + * rt if the load of rt2 faults; this is required for cases + * like "ldrd r2, r3, [r2]" where rt is also the base register. + */ int mem_idx = get_mem_index(s); - TCGv_i32 addr, tmp; + MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data; + TCGv taddr = gen_aa32_addr(s, addr, opc); + TCGv_i64 t64 = tcg_temp_new_i64(); + TCGv_i32 tmp = tcg_temp_new_i32(); + TCGv_i32 tmp2 = tcg_temp_new_i32(); + + tcg_gen_qemu_ld_i64(t64, taddr, mem_idx, opc); + if (s->be_data == MO_BE) { + tcg_gen_extr_i64_i32(tmp2, tmp, t64); + } else { + tcg_gen_extr_i64_i32(tmp, tmp2, t64); + } + store_reg(s, rt, tmp); + store_reg(s, rt2, tmp2); +} + +static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) +{ + TCGv_i32 addr; if (!ENABLE_ARCH_5TE) { return false; @@ -5014,25 +5045,49 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a) } addr = op_addr_rr_pre(s, a); - tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - store_reg(s, a->rt, tmp); - - tcg_gen_addi_i32(addr, addr, 4); - - tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - store_reg(s, a->rt + 1, tmp); + do_ldrd_load(s, addr, a->rt, a->rt + 1); /* LDRD w/ base writeback is undefined if the registers overlap. */ - op_addr_rr_post(s, a, addr, -4); + op_addr_rr_post(s, a, addr); return true; } -static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) +static void do_strd_store(DisasContext *s, TCGv_i32 addr, int rt, int rt2) { + /* + * STRD is required to be an atomic 64-bit access if the + * address is 8-aligned, two atomic 32-bit accesses if + * it's only 4-aligned, and to give an alignment fault + * if it's not 4-aligned. + * Rt is always the word from the lower address, and Rt2 the + * data from the higher address, regardless of endianness. + * So (like gen_store_exclusive) we avoid gen_aa32_ld_i64() + * so we don't get its SCTLR_B check, and instead do a 64-bit access + * using MO_BE if appropriate, using a value constructed + * by putting the two halves together in the right order. + * + * As with LDRD, the 64-bit atomicity is not required for + * M-profile, or for A-profile before LPAE, and we provide + * the higher guarantee always for simplicity. + */ int mem_idx = get_mem_index(s); - TCGv_i32 addr, tmp; + MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data; + TCGv taddr = gen_aa32_addr(s, addr, opc); + TCGv_i32 t1 = load_reg(s, rt); + TCGv_i32 t2 = load_reg(s, rt2); + TCGv_i64 t64 = tcg_temp_new_i64(); + + if (s->be_data == MO_BE) { + tcg_gen_concat_i32_i64(t64, t2, t1); + } else { + tcg_gen_concat_i32_i64(t64, t1, t2); + } + tcg_gen_qemu_st_i64(t64, taddr, mem_idx, opc); +} + +static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) +{ + TCGv_i32 addr; if (!ENABLE_ARCH_5TE) { return false; @@ -5043,15 +5098,9 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) } addr = op_addr_rr_pre(s, a); - tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); + do_strd_store(s, addr, a->rt, a->rt + 1); - tcg_gen_addi_i32(addr, addr, 4); - - tmp = load_reg(s, a->rt + 1); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - - op_addr_rr_post(s, a, addr, -4); + op_addr_rr_post(s, a, addr); return true; } @@ -5087,13 +5136,14 @@ static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a) } static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a, - TCGv_i32 addr, int address_offset) + TCGv_i32 addr) { + int address_offset = 0; if (!a->p) { if (a->u) { - address_offset += a->imm; + address_offset = a->imm; } else { - address_offset -= a->imm; + address_offset = -a->imm; } } else if (!a->w) { return; @@ -5118,7 +5168,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a, * Perform base writeback before the loaded value to * ensure correct behavior with overlapping index registers. */ - op_addr_ri_post(s, a, addr, 0); + op_addr_ri_post(s, a, addr); store_reg_from_load(s, a->rt, tmp); return true; } @@ -5143,29 +5193,20 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a, gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); - op_addr_ri_post(s, a, addr, 0); + op_addr_ri_post(s, a, addr); return true; } static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) { - int mem_idx = get_mem_index(s); - TCGv_i32 addr, tmp; + TCGv_i32 addr; addr = op_addr_ri_pre(s, a); - tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - store_reg(s, a->rt, tmp); - - tcg_gen_addi_i32(addr, addr, 4); - - tmp = tcg_temp_new_i32(); - gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - store_reg(s, rt2, tmp); + do_ldrd_load(s, addr, a->rt, rt2); /* LDRD w/ base writeback is undefined if the registers overlap. */ - op_addr_ri_post(s, a, addr, -4); + op_addr_ri_post(s, a, addr); return true; } @@ -5188,20 +5229,13 @@ static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a) static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) { - int mem_idx = get_mem_index(s); - TCGv_i32 addr, tmp; + TCGv_i32 addr; addr = op_addr_ri_pre(s, a); - tmp = load_reg(s, a->rt); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - - tcg_gen_addi_i32(addr, addr, 4); - - tmp = load_reg(s, rt2); - gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); + do_strd_store(s, addr, a->rt, rt2); - op_addr_ri_post(s, a, addr, -4); + op_addr_ri_post(s, a, addr); return true; } @@ -7726,7 +7760,8 @@ static bool arm_check_ss_active(DisasContext *dc) static void arm_post_translate_insn(DisasContext *dc) { - if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) { + if (dc->condjmp && + (dc->base.is_jmp == DISAS_NEXT || dc->base.is_jmp == DISAS_TOO_MANY)) { if (dc->pc_save != dc->condlabel.pc_save) { gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save); } @@ -7756,7 +7791,7 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * be possible after an indirect branch, at the start of the TB. */ assert(dc->base.num_insns == 1); - gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); dc->base.is_jmp = DISAS_NORETURN; dc->base.pc_next = QEMU_ALIGN_UP(pc, 4); return; @@ -8090,9 +8125,8 @@ static const TranslatorOps thumb_translator_ops = { .tb_stop = arm_tr_tb_stop, }; -/* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, - vaddr pc, void *host_pc) +void arm_translate_code(CPUState *cpu, TranslationBlock *tb, + int *max_insns, vaddr pc, void *host_pc) { DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index aba21f7..0004a97 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -4,8 +4,8 @@ #include "cpu.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" -#include "exec/exec-all.h" #include "exec/translator.h" +#include "exec/translation-block.h" #include "exec/helper-gen.h" #include "internals.h" #include "cpu-features.h" @@ -91,15 +91,19 @@ typedef struct DisasContext { bool aarch64; bool thumb; bool lse2; - /* Because unallocated encodings generate different exception syndrome + /* + * Because unallocated encodings generate different exception syndrome * information from traps due to FP being disabled, we can't do a single * "is fp access disabled" check at a high level in the decode tree. * To help in catching bugs where the access check was forgotten in some * code path, we set this flag when the access check is done, and assert * that it is set at the point where we actually touch the FP regs. + * 0: not checked, + * 1: checked, access ok + * -1: checked, access denied */ - bool fp_access_checked; - bool sve_access_checked; + int8_t fp_access_checked; + int8_t sve_access_checked; /* ARMv8 single-step state (this is distinct from the QEMU gdbstub * single-step support). */ @@ -154,6 +158,10 @@ typedef struct DisasContext { bool nv2_mem_e20; /* True if NV2 enabled and NV2 RAM accesses are big-endian */ bool nv2_mem_be; + /* True if FPCR.AH is 1 (alternate floating point handling) */ + bool fpcr_ah; + /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ + bool fpcr_nep; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -163,8 +171,6 @@ typedef struct DisasContext { uint8_t dcz_blocksize; /* A copy of cpu->gm_blocksize. */ uint8_t gm_blocksize; - /* True if this page is guarded. */ - bool guarded_page; /* True if the current insn_start has been updated. */ bool insn_start_updated; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ @@ -341,6 +347,7 @@ void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); MemOp pow2_align(unsigned i); void unallocated_encoding(DisasContext *s); +void gen_exception_internal(int excp); void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp, uint32_t syn, uint32_t target_el); void gen_exception_insn(DisasContext *s, target_long pc_diff, @@ -351,8 +358,7 @@ static inline TCGv_i32 get_ahp_flag(void) { TCGv_i32 ret = tcg_temp_new_i32(); - tcg_gen_ld_i32(ret, tcg_env, - offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR])); + tcg_gen_ld_i32(ret, tcg_env, offsetoflow32(CPUARMState, vfp.fpcr)); tcg_gen_extract_i32(ret, ret, 26, 1); return ret; @@ -472,6 +478,13 @@ void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + int64_t c, uint32_t opr_sz, uint32_t max_sz); + void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -515,6 +528,11 @@ void gen_sqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -569,6 +587,41 @@ void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); + +/* These exclusively manipulate the sign bit. */ +void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); +void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); + +void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t opr_sz, uint32_t max_sz); + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */ @@ -594,13 +647,13 @@ typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32, typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); -typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32); typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64); +typedef void NeonGenOne64OpEnvFn(TCGv_i64, TCGv_env, TCGv_i64); typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); @@ -621,54 +674,18 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb) return (CPUARMTBFlags){ tb->flags, tb->cs_base }; } -/* - * Enum for argument to fpstatus_ptr(). - */ -typedef enum ARMFPStatusFlavour { - FPST_FPCR, - FPST_FPCR_F16, - FPST_STD, - FPST_STD_F16, -} ARMFPStatusFlavour; - /** * fpstatus_ptr: return TCGv_ptr to the specified fp_status field * * We have multiple softfloat float_status fields in the Arm CPU state struct * (see the comment in cpu.h for details). Return a TCGv_ptr which has * been set up to point to the requested field in the CPU state struct. - * The options are: - * - * FPST_FPCR - * for non-FP16 operations controlled by the FPCR - * FPST_FPCR_F16 - * for operations controlled by the FPCR where FPCR.FZ16 is to be used - * FPST_STD - * for A32/T32 Neon operations using the "standard FPSCR value" - * FPST_STD_F16 - * as FPST_STD, but where FPCR.FZ16 is to be used */ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) { TCGv_ptr statusptr = tcg_temp_new_ptr(); - int offset; - - switch (flavour) { - case FPST_FPCR: - offset = offsetof(CPUARMState, vfp.fp_status); - break; - case FPST_FPCR_F16: - offset = offsetof(CPUARMState, vfp.fp_status_f16); - break; - case FPST_STD: - offset = offsetof(CPUARMState, vfp.standard_fp_status); - break; - case FPST_STD_F16: - offset = offsetof(CPUARMState, vfp.standard_fp_status_f16); - break; - default: - g_assert_not_reached(); - } + int offset = offsetof(CPUARMState, vfp.fp_status[flavour]); + tcg_gen_addi_ptr(statusptr, tcg_env, offset); return statusptr; } diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index b05922b..986eaf8 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -317,10 +317,12 @@ void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); - for (i = 0; i < opr_sz / 2; i += 16 / 2) { + for (i = 0; i < elements; i += 16 / 2) { int16_t mm = m[i]; - for (j = 0; j < 16 / 2; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); } } @@ -333,16 +335,54 @@ void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); - for (i = 0; i < opr_sz / 2; i += 16 / 2) { + for (i = 0; i < elements; i += 16 / 2) { int16_t mm = m[i]; - for (j = 0; j < 16 / 2; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqrdmlah_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); + + for (i = 0; i < elements; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmlsh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); + + for (i = 0; i < elements; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], true, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -512,10 +552,12 @@ void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); - for (i = 0; i < opr_sz / 4; i += 16 / 4) { + for (i = 0; i < elements; i += 16 / 4) { int32_t mm = m[i]; - for (j = 0; j < 16 / 4; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); } } @@ -528,16 +570,54 @@ void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); - for (i = 0; i < opr_sz / 4; i += 16 / 4) { + for (i = 0; i < elements; i += 16 / 4) { int32_t mm = m[i]; - for (j = 0; j < 16 / 4; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqrdmlah_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + + for (i = 0; i < elements; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmlsh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + + for (i = 0; i < elements; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], true, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -756,6 +836,13 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ { \ intptr_t i = 0, opr_sz = simd_oprsz(desc); \ intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ + /* \ + * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \ + * first iteration might not be a full 16 byte segment. But \ + * for vector lengths beyond that this must be SVE and we know \ + * opr_sz is a multiple of 16, so we need not clamp segend \ + * to opr_sz_n when we advance it at the end of the loop. \ + */ \ intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ intptr_t index = simd_data(desc); \ TYPED *d = vd, *a = va; \ @@ -773,7 +860,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ n[i * 4 + 2] * m2 + \ n[i * 4 + 3] * m3); \ } while (++i < segend); \ - segend = i + 4; \ + segend = i + (16 / sizeof(TYPED)); \ } while (i < opr_sz_n); \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } @@ -786,26 +873,27 @@ DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8) DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8) void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float16 *d = vd; float16 *n = vn; float16 *m = vm; - float_status *fpst = vfpst; - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = neg_real ^ 1; + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 15; - neg_imag <<= 15; - for (i = 0; i < opr_sz / 2; i += 2) { float16 e0 = n[H2(i)]; - float16 e1 = m[H2(i + 1)] ^ neg_imag; + float16 e1 = m[H2(i + 1)]; float16 e2 = n[H2(i + 1)]; - float16 e3 = m[H2(i)] ^ neg_real; + float16 e3 = m[H2(i)]; + + if (rot) { + e3 = float16_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float16_maybe_ah_chs(e1, fpcr_ah); + } d[H2(i)] = float16_add(e0, e1, fpst); d[H2(i + 1)] = float16_add(e2, e3, fpst); @@ -814,26 +902,27 @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, } void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float32 *d = vd; float32 *n = vn; float32 *m = vm; - float_status *fpst = vfpst; - uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = neg_real ^ 1; + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 31; - neg_imag <<= 31; - for (i = 0; i < opr_sz / 4; i += 2) { float32 e0 = n[H4(i)]; - float32 e1 = m[H4(i + 1)] ^ neg_imag; + float32 e1 = m[H4(i + 1)]; float32 e2 = n[H4(i + 1)]; - float32 e3 = m[H4(i)] ^ neg_real; + float32 e3 = m[H4(i)]; + + if (rot) { + e3 = float32_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float32_maybe_ah_chs(e1, fpcr_ah); + } d[H4(i)] = float32_add(e0, e1, fpst); d[H4(i + 1)] = float32_add(e2, e3, fpst); @@ -842,26 +931,27 @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm, } void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float64 *d = vd; float64 *n = vn; float64 *m = vm; - float_status *fpst = vfpst; - uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1); - uint64_t neg_imag = neg_real ^ 1; + bool rot = extract32(desc, SIMD_DATA_SHIFT, 1); + bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1); uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 63; - neg_imag <<= 63; - for (i = 0; i < opr_sz / 8; i += 2) { float64 e0 = n[i]; - float64 e1 = m[i + 1] ^ neg_imag; + float64 e1 = m[i + 1]; float64 e2 = n[i + 1]; - float64 e3 = m[i] ^ neg_real; + float64 e3 = m[i]; + + if (rot) { + e3 = float64_maybe_ah_chs(e3, fpcr_ah); + } else { + e1 = float64_maybe_ah_chs(e1, fpcr_ah); + } d[i] = float64_add(e0, e1, fpst); d[i + 1] = float64_add(e2, e3, fpst); @@ -870,152 +960,167 @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm, } void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float16 *d = vd, *n = vn, *m = vm, *a = va; - float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); - uint32_t neg_real = flip ^ neg_imag; + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float16 negx_imag, negx_real; uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 15; - neg_imag <<= 15; + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 15; + negx_imag = (negf_imag & ~fpcr_ah) << 15; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); for (i = 0; i < opr_sz / 2; i += 2) { float16 e2 = n[H2(i + flip)]; - float16 e1 = m[H2(i + flip)] ^ neg_real; + float16 e1 = m[H2(i + flip)] ^ negx_real; float16 e4 = e2; - float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag; + float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; - d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst); - d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst); + d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); + d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float16 *d = vd, *n = vn, *m = vm, *a = va; - float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); - uint32_t neg_real = flip ^ neg_imag; + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); + uint32_t negf_real = flip ^ negf_imag; intptr_t elements = opr_sz / sizeof(float16); - intptr_t eltspersegment = 16 / sizeof(float16); + intptr_t eltspersegment = MIN(16 / sizeof(float16), elements); + float16 negx_imag, negx_real; intptr_t i, j; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 15; - neg_imag <<= 15; + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 15; + negx_imag = (negf_imag & ~fpcr_ah) << 15; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); for (i = 0; i < elements; i += eltspersegment) { float16 mr = m[H2(i + 2 * index + 0)]; float16 mi = m[H2(i + 2 * index + 1)]; - float16 e1 = neg_real ^ (flip ? mi : mr); - float16 e3 = neg_imag ^ (flip ? mr : mi); + float16 e1 = negx_real ^ (flip ? mi : mr); + float16 e3 = negx_imag ^ (flip ? mr : mi); for (j = i; j < i + eltspersegment; j += 2) { float16 e2 = n[H2(j + flip)]; float16 e4 = e2; - d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst); - d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst); + d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst); + d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float32 *d = vd, *n = vn, *m = vm, *a = va; - float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); - uint32_t neg_real = flip ^ neg_imag; + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float32 negx_imag, negx_real; uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 31; - neg_imag <<= 31; + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 31; + negx_imag = (negf_imag & ~fpcr_ah) << 31; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); for (i = 0; i < opr_sz / 4; i += 2) { float32 e2 = n[H4(i + flip)]; - float32 e1 = m[H4(i + flip)] ^ neg_real; + float32 e1 = m[H4(i + flip)] ^ negx_real; float32 e4 = e2; - float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag; + float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; - d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst); - d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst); + d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); + d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float32 *d = vd, *n = vn, *m = vm, *a = va; - float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); - uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); - uint32_t neg_real = flip ^ neg_imag; + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1); + uint32_t negf_real = flip ^ negf_imag; intptr_t elements = opr_sz / sizeof(float32); - intptr_t eltspersegment = 16 / sizeof(float32); + intptr_t eltspersegment = MIN(16 / sizeof(float32), elements); + float32 negx_imag, negx_real; intptr_t i, j; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 31; - neg_imag <<= 31; + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (negf_real & ~fpcr_ah) << 31; + negx_imag = (negf_imag & ~fpcr_ah) << 31; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); for (i = 0; i < elements; i += eltspersegment) { float32 mr = m[H4(i + 2 * index + 0)]; float32 mi = m[H4(i + 2 * index + 1)]; - float32 e1 = neg_real ^ (flip ? mi : mr); - float32 e3 = neg_imag ^ (flip ? mr : mi); + float32 e1 = negx_real ^ (flip ? mi : mr); + float32 e3 = negx_imag ^ (flip ? mr : mi); for (j = i; j < i + eltspersegment; j += 2) { float32 e2 = n[H4(j + flip)]; float32 e4 = e2; - d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst); - d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst); + d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst); + d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va, - void *vfpst, uint32_t desc) + float_status *fpst, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); float64 *d = vd, *n = vn, *m = vm, *a = va; - float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); - uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); - uint64_t neg_real = flip ^ neg_imag; + uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + uint32_t negf_real = flip ^ negf_imag; + float64 negx_real, negx_imag; uintptr_t i; - /* Shift boolean to the sign bit so we can xor to negate. */ - neg_real <<= 63; - neg_imag <<= 63; + /* With AH=0, use negx; with AH=1 use negf. */ + negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63; + negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63; + negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0); + negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0); for (i = 0; i < opr_sz / 8; i += 2) { float64 e2 = n[i + flip]; - float64 e1 = m[i + flip] ^ neg_real; + float64 e1 = m[i + flip] ^ negx_real; float64 e4 = e2; - float64 e3 = m[i + 1 - flip] ^ neg_imag; + float64 e3 = m[i + 1 - flip] ^ negx_imag; - d[i] = float64_muladd(e2, e1, a[i], 0, fpst); - d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst); + d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); + d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -1100,9 +1205,8 @@ static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat) return -float64_lt(float64_abs(op2), float64_abs(op1), stat); } -static int16_t vfp_tosszh(float16 x, void *fpstp) +static int16_t vfp_tosszh(float16 x, float_status *fpst) { - float_status *fpst = fpstp; if (float16_is_any_nan(x)) { float_raise(float_flag_invalid, fpst); return 0; @@ -1110,9 +1214,8 @@ static int16_t vfp_tosszh(float16 x, void *fpstp) return float16_to_int16_round_to_zero(x, fpst); } -static uint16_t vfp_touszh(float16 x, void *fpstp) +static uint16_t vfp_touszh(float16 x, float_status *fpst) { - float_status *fpst = fpstp; if (float16_is_any_nan(x)) { float_raise(float_flag_invalid, fpst); return 0; @@ -1121,7 +1224,7 @@ static uint16_t vfp_touszh(float16 x, void *fpstp) } #define DO_2OP(NAME, FUNC, TYPE) \ -void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ { \ intptr_t i, oprsz = simd_oprsz(desc); \ TYPE *d = vd, *n = vn; \ @@ -1133,10 +1236,12 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32) +DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32) DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64) DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16) DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32) +DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32) DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64) DO_2OP(gvec_vrintx_h, float16_round_to_int, float16) @@ -1166,8 +1271,10 @@ DO_2OP(gvec_touszh, vfp_touszh, float16) #define DO_2OP_CMP0(FN, CMPOP, DIRN) \ WRAP_CMP0_##DIRN(FN, CMPOP, float16) \ WRAP_CMP0_##DIRN(FN, CMPOP, float32) \ + WRAP_CMP0_##DIRN(FN, CMPOP, float64) \ DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \ - DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) + DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) \ + DO_2OP(gvec_f##FN##0_d, float64_##FN##0, float64) DO_2OP_CMP0(cgt, cgt, FWD) DO_2OP_CMP0(cge, cge, FWD) @@ -1223,6 +1330,25 @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat) return float64_abs(float64_sub(op1, op2, stat)); } +/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */ +static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat) +{ + float16 r = float16_sub(op1, op2, stat); + return float16_is_any_nan(r) ? r : float16_abs(r); +} + +static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat) +{ + float32 r = float32_sub(op1, op2, stat); + return float32_is_any_nan(r) ? r : float32_abs(r); +} + +static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat) +{ + float64 r = float64_sub(op1, op2, stat); + return float64_is_any_nan(r) ? r : float64_abs(r); +} + /* * Reciprocal step. These are the AArch32 version which uses a * non-fused multiply-and-subtract. @@ -1279,7 +1405,8 @@ static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat) } #define DO_3OP(NAME, FUNC, TYPE) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *stat, uint32_t desc) \ { \ intptr_t i, oprsz = simd_oprsz(desc); \ TYPE *d = vd, *n = vn, *m = vm; \ @@ -1309,6 +1436,10 @@ DO_3OP(gvec_fabd_h, float16_abd, float16) DO_3OP(gvec_fabd_s, float32_abd, float32) DO_3OP(gvec_fabd_d, float64_abd, float64) +DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16) +DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32) +DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64) + DO_3OP(gvec_fceq_h, float16_ceq, float16) DO_3OP(gvec_fceq_s, float32_ceq, float32) DO_3OP(gvec_fceq_d, float64_ceq, float64) @@ -1368,6 +1499,22 @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16) DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32) DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64) +DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16) +DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32) +DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64) + +DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16) +DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32) +DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64) + +DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16) +DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32) +DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64) + +DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) +DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) +DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) + #endif #undef DO_3OP @@ -1433,8 +1580,27 @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, return float64_muladd(float64_chs(op1), op2, dest, 0, stat); } -#define DO_MULADD(NAME, FUNC, TYPE) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, + float_status *stat) +{ + return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + +static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, + float_status *stat) +{ + return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + +static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + +#define DO_MULADD(NAME, FUNC, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *stat, uint32_t desc) \ { \ intptr_t i, oprsz = simd_oprsz(desc); \ TYPE *d = vd, *n = vn, *m = vm; \ @@ -1458,6 +1624,10 @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) +DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) +DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) +DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) + /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. */ @@ -1511,7 +1681,8 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8) #undef DO_MLA_IDX #define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc); \ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ @@ -1553,29 +1724,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) #undef DO_FMUL_IDX -#define DO_FMLA_IDX(NAME, TYPE, H) \ +#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ - void *stat, uint32_t desc) \ + float_status *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc); \ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \ - TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ - intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ + intptr_t idx = simd_data(desc); \ TYPE *d = vd, *n = vn, *m = vm, *a = va; \ - op1_neg <<= (8 * sizeof(TYPE) - 1); \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ TYPE mm = m[H(i + idx)]; \ for (j = 0; j < segment; j++) { \ - d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \ - mm, a[i + j], 0, stat); \ + d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \ + a[i + j], NEGF, stat); \ } \ } \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2) -DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) -DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8) +DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) +DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) +DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) + +DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) +DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) +DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) + +DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) +DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) +DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) #undef DO_FMLA_IDX @@ -1948,135 +2125,171 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2) * as there is not yet SVE versions that might use blocking. */ -static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst, - uint32_t desc, bool fz16) +static void do_fmlal(float32 *d, void *vn, void *vm, + CPUARMState *env, uint32_t desc, + ARMFPStatusFlavour fpst_idx, + uint64_t negx, int negf) { + float_status *fpst = &env->vfp.fp_status[fpst_idx]; + bool fz16 = env->vfp.fpcr & FPCR_FZ16; intptr_t i, oprsz = simd_oprsz(desc); - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); int is_q = oprsz == 16; uint64_t n_4, m_4; - /* Pre-load all of the f16 data, avoiding overlap issues. */ - n_4 = load4_f16(vn, is_q, is_2); + /* + * Pre-load all of the f16 data, avoiding overlap issues. + * Negate all inputs for AH=0 FMLSL at once. + */ + n_4 = load4_f16(vn, is_q, is_2) ^ negx; m_4 = load4_f16(vm, is_q, is_2); - /* Negate all inputs for FMLSL at once. */ - if (is_s) { - n_4 ^= 0x8000800080008000ull; - } - for (i = 0; i < oprsz / 4; i++) { float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); } clear_tail(d, oprsz, simd_maxsz(desc)); } void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { - CPUARMState *env = venv; - do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc, - get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + uint64_t negx = is_s ? 0x8000800080008000ull : 0; + + do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0); } void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { - CPUARMState *env = venv; - do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc, - get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + uint64_t negx = 0; + int negf = 0; + + if (is_s) { + if (env->vfp.fpcr & FPCR_AH) { + negf = float_muladd_negate_product; + } else { + negx = 0x8000800080008000ull; + } + } + do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf); } void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { intptr_t i, oprsz = simd_oprsz(desc); - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); - CPUARMState *env = venv; - float_status *status = &env->vfp.fp_status; - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16); + float_status *status = &env->vfp.fp_status[FPST_A64]; + bool fz16 = env->vfp.fpcr & FPCR_FZ16; + int negx = 0, negf = 0; + + if (is_s) { + if (env->vfp.fpcr & FPCR_AH) { + negf = float_muladd_negate_product; + } else { + negx = 0x8000; + } + } for (i = 0; i < oprsz; i += sizeof(float32)) { - float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn; + float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); float32 nn = float16_to_float32_by_bits(nn_16, fz16); float32 mm = float16_to_float32_by_bits(mm_16, fz16); float32 aa = *(float32 *)(va + H1_4(i)); - *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status); + *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); } } -static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst, - uint32_t desc, bool fz16) +static void do_fmlal_idx(float32 *d, void *vn, void *vm, + CPUARMState *env, uint32_t desc, + ARMFPStatusFlavour fpst_idx, + uint64_t negx, int negf) { + float_status *fpst = &env->vfp.fp_status[fpst_idx]; + bool fz16 = env->vfp.fpcr & FPCR_FZ16; intptr_t i, oprsz = simd_oprsz(desc); - int is_s = extract32(desc, SIMD_DATA_SHIFT, 1); int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1); int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3); int is_q = oprsz == 16; uint64_t n_4; float32 m_1; - /* Pre-load all of the f16 data, avoiding overlap issues. */ - n_4 = load4_f16(vn, is_q, is_2); - - /* Negate all inputs for FMLSL at once. */ - if (is_s) { - n_4 ^= 0x8000800080008000ull; - } - + /* + * Pre-load all of the f16 data, avoiding overlap issues. + * Negate all inputs for AH=0 FMLSL at once. + */ + n_4 = load4_f16(vn, is_q, is_2) ^ negx; m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16); for (i = 0; i < oprsz / 4; i++) { float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); - d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst); + d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); } clear_tail(d, oprsz, simd_maxsz(desc)); } void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { - CPUARMState *env = venv; - do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc, - get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + uint64_t negx = is_s ? 0x8000800080008000ull : 0; + + do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0); } void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { - CPUARMState *env = venv; - do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc, - get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); + uint64_t negx = 0; + int negf = 0; + + if (is_s) { + if (env->vfp.fpcr & FPCR_AH) { + negf = float_muladd_negate_product; + } else { + negx = 0x8000800080008000ull; + } + } + do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf); } void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, - void *venv, uint32_t desc) + CPUARMState *env, uint32_t desc) { intptr_t i, j, oprsz = simd_oprsz(desc); - uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15; + bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); - CPUARMState *env = venv; - float_status *status = &env->vfp.fp_status; - bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16); + float_status *status = &env->vfp.fp_status[FPST_A64]; + bool fz16 = env->vfp.fpcr & FPCR_FZ16; + int negx = 0, negf = 0; + if (is_s) { + if (env->vfp.fpcr & FPCR_AH) { + negf = float_muladd_negate_product; + } else { + negx = 0x8000; + } + } for (i = 0; i < oprsz; i += 16) { float16 mm_16 = *(float16 *)(vm + i + idx); float32 mm = float16_to_float32_by_bits(mm_16, fz16); for (j = 0; j < 16; j += sizeof(float32)) { - float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn; + float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; float32 nn = float16_to_float32_by_bits(nn_16, fz16); float32 aa = *(float32 *)(va + H1_4(i + j)); *(float32 *)(vd + H1_4(i + j)) = - float32_muladd(nn, mm, aa, 0, status); + float32_muladd(nn, mm, aa, negf, status); } } } @@ -2321,7 +2534,8 @@ DO_ABA(gvec_uaba_d, uint64_t) #undef DO_ABA #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *stat, uint32_t desc) \ { \ ARMVectorReg scratch; \ intptr_t oprsz = simd_oprsz(desc); \ @@ -2359,6 +2573,16 @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) +#ifdef TARGET_AARCH64 +DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2) +DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4) +DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, ) + +DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2) +DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4) +DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, ) +#endif + #undef DO_3OP_PAIR #define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ @@ -2406,7 +2630,7 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4) #undef DO_3OP_PAIR #define DO_VCVT_FIXED(NAME, FUNC, TYPE) \ - void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \ { \ intptr_t i, oprsz = simd_oprsz(desc); \ int shift = simd_data(desc); \ @@ -2418,21 +2642,25 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4) clear_tail(d, oprsz, simd_maxsz(desc)); \ } +DO_VCVT_FIXED(gvec_vcvt_sd, helper_vfp_sqtod, uint64_t) +DO_VCVT_FIXED(gvec_vcvt_ud, helper_vfp_uqtod, uint64_t) DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t) DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t) -DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t) -DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t) DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t) DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t) -DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t) -DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t) + +DO_VCVT_FIXED(gvec_vcvt_rz_ds, helper_vfp_tosqd_round_to_zero, uint64_t) +DO_VCVT_FIXED(gvec_vcvt_rz_du, helper_vfp_touqd_round_to_zero, uint64_t) +DO_VCVT_FIXED(gvec_vcvt_rz_fs, helper_vfp_tosls_round_to_zero, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_rz_fu, helper_vfp_touls_round_to_zero, uint32_t) +DO_VCVT_FIXED(gvec_vcvt_rz_hs, helper_vfp_toshh_round_to_zero, uint16_t) +DO_VCVT_FIXED(gvec_vcvt_rz_hu, helper_vfp_touhh_round_to_zero, uint16_t) #undef DO_VCVT_FIXED #define DO_VCVT_RMODE(NAME, FUNC, TYPE) \ - void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + void HELPER(NAME)(void *vd, void *vn, float_status *fpst, uint32_t desc) \ { \ - float_status *fpst = stat; \ intptr_t i, oprsz = simd_oprsz(desc); \ uint32_t rmode = simd_data(desc); \ uint32_t prev_rmode = get_float_rounding_mode(fpst); \ @@ -2445,6 +2673,8 @@ DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t) clear_tail(d, oprsz, simd_maxsz(desc)); \ } +DO_VCVT_RMODE(gvec_vcvt_rm_sd, helper_vfp_tosqd, uint64_t) +DO_VCVT_RMODE(gvec_vcvt_rm_ud, helper_vfp_touqd, uint64_t) DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t) DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t) DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t) @@ -2453,9 +2683,8 @@ DO_VCVT_RMODE(gvec_vcvt_rm_uh, helper_vfp_touhh, uint16_t) #undef DO_VCVT_RMODE #define DO_VRINT_RMODE(NAME, FUNC, TYPE) \ - void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ + void HELPER(NAME)(void *vd, void *vn, float_status *fpst, uint32_t desc) \ { \ - float_status *fpst = stat; \ intptr_t i, oprsz = simd_oprsz(desc); \ uint32_t rmode = simd_data(desc); \ uint32_t prev_rmode = get_float_rounding_mode(fpst); \ @@ -2474,10 +2703,9 @@ DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t) #undef DO_VRINT_RMODE #ifdef TARGET_AARCH64 -void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc) +void HELPER(simd_tblx)(void *vd, void *vm, CPUARMState *env, uint32_t desc) { const uint8_t *indices = vm; - CPUARMState *env = venv; size_t oprsz = simd_oprsz(desc); uint32_t rn = extract32(desc, SIMD_DATA_SHIFT, 5); bool is_tbx = extract32(desc, SIMD_DATA_SHIFT + 5, 1); @@ -2710,44 +2938,109 @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b) * BFloat16 Dot Product */ -float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) +bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp) +{ + /* + * For BFDOT, BFMMLA, etc, the behaviour depends on FPCR.EBF. + * For EBF = 0, we ignore the FPCR bits which determine rounding + * mode and denormal-flushing, and we do unfused multiplies and + * additions with intermediate rounding of all products and sums. + * For EBF = 1, we honour FPCR rounding mode and denormal-flushing bits, + * and we perform a fused two-way sum-of-products without intermediate + * rounding of the products. + * In either case, we don't set fp exception flags. + * + * EBF is AArch64 only, so even if it's set in the FPCR it has + * no effect on AArch32 instructions. + */ + bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF; + + *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32]; + set_default_nan_mode(true, statusp); + + if (ebf) { + /* EBF=1 needs to do a step with round-to-odd semantics */ + *oddstatusp = *statusp; + set_float_rounding_mode(float_round_to_odd, oddstatusp); + } else { + set_flush_to_zero(true, statusp); + set_flush_inputs_to_zero(true, statusp); + set_float_rounding_mode(float_round_to_odd_inf, statusp); + } + return ebf; +} + +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) { - /* FPCR is ignored for BFDOT and BFMMLA. */ - float_status bf_status = { - .tininess_before_rounding = float_tininess_before_rounding, - .float_rounding_mode = float_round_to_odd_inf, - .flush_to_zero = true, - .flush_inputs_to_zero = true, - .default_nan_mode = true, - }; float32 t1, t2; /* * Extract each BFloat16 from the element pair, and shift * them such that they become float32. */ - t1 = float32_mul(e1 << 16, e2 << 16, &bf_status); - t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status); - t1 = float32_add(t1, t2, &bf_status); - t1 = float32_add(sum, t1, &bf_status); + t1 = float32_mul(e1 << 16, e2 << 16, fpst); + t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, fpst); + t1 = float32_add(t1, t2, fpst); + t1 = float32_add(sum, t1, fpst); return t1; } -void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc) +float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, + float_status *fpst, float_status *fpst_odd) +{ + /* + * Compare f16_dotadd() in sme_helper.c, but here we have + * bfloat16 inputs. In particular that means that we do not + * want the FPCR.FZ16 flush semantics, so we use the normal + * float_status for the input handling here. + */ + float64 e1r = float32_to_float64(e1 << 16, fpst); + float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst); + float64 e2r = float32_to_float64(e2 << 16, fpst); + float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst); + float64 t64; + float32 t32; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, fpst_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); + + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, fpst); + + /* The final accumulation step is not fused. */ + return float32_add(sum, t32, fpst); +} + +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; + float_status fpst, fpst_odd; - for (i = 0; i < opr_sz / 4; ++i) { - d[i] = bfdotadd(a[i], n[i], m[i]); + if (is_ebf(env, &fpst, &fpst_odd)) { + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd); + } + } else { + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = bfdotadd(a[i], n[i], m[i], &fpst); + } } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, - void *va, uint32_t desc) + void *va, CPUARMState *env, uint32_t desc) { intptr_t i, j, opr_sz = simd_oprsz(desc); intptr_t index = simd_data(desc); @@ -2755,59 +3048,106 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, intptr_t eltspersegment = MIN(16 / 4, elements); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; + float_status fpst, fpst_odd; - for (i = 0; i < elements; i += eltspersegment) { - uint32_t m_idx = m[i + H4(index)]; + if (is_ebf(env, &fpst, &fpst_odd)) { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(index)]; - for (j = i; j < i + eltspersegment; j++) { - d[j] = bfdotadd(a[j], n[j], m_idx); + for (j = i; j < i + eltspersegment; j++) { + d[j] = bfdotadd_ebf(a[j], n[j], m_idx, &fpst, &fpst_odd); + } + } + } else { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(index)]; + + for (j = i; j < i + eltspersegment; j++) { + d[j] = bfdotadd(a[j], n[j], m_idx, &fpst); + } } } clear_tail(d, opr_sz, simd_maxsz(desc)); } -void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc) +void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) { intptr_t s, opr_sz = simd_oprsz(desc); float32 *d = vd, *a = va; uint32_t *n = vn, *m = vm; + float_status fpst, fpst_odd; - for (s = 0; s < opr_sz / 4; s += 4) { - float32 sum00, sum01, sum10, sum11; - - /* - * Process the entire segment at once, writing back the - * results only after we've consumed all of the inputs. - * - * Key to indices by column: - * i j i k j k - */ - sum00 = a[s + H4(0 + 0)]; - sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]); - sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]); - - sum01 = a[s + H4(0 + 1)]; - sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]); - sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]); - - sum10 = a[s + H4(2 + 0)]; - sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]); - sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]); + if (is_ebf(env, &fpst, &fpst_odd)) { + for (s = 0; s < opr_sz / 4; s += 4) { + float32 sum00, sum01, sum10, sum11; - sum11 = a[s + H4(2 + 1)]; - sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]); - sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]); + /* + * Process the entire segment at once, writing back the + * results only after we've consumed all of the inputs. + * + * Key to indices by column: + * i j i k j k + */ + sum00 = a[s + H4(0 + 0)]; + sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd); + sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd); + + sum01 = a[s + H4(0 + 1)]; + sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd); + sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd); + + sum10 = a[s + H4(2 + 0)]; + sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd); + sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd); + + sum11 = a[s + H4(2 + 1)]; + sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd); + sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd); + + d[s + H4(0 + 0)] = sum00; + d[s + H4(0 + 1)] = sum01; + d[s + H4(2 + 0)] = sum10; + d[s + H4(2 + 1)] = sum11; + } + } else { + for (s = 0; s < opr_sz / 4; s += 4) { + float32 sum00, sum01, sum10, sum11; - d[s + H4(0 + 0)] = sum00; - d[s + H4(0 + 1)] = sum01; - d[s + H4(2 + 0)] = sum10; - d[s + H4(2 + 1)] = sum11; + /* + * Process the entire segment at once, writing back the + * results only after we've consumed all of the inputs. + * + * Key to indices by column: + * i j i k j k + */ + sum00 = a[s + H4(0 + 0)]; + sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst); + sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst); + + sum01 = a[s + H4(0 + 1)]; + sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst); + sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst); + + sum10 = a[s + H4(2 + 0)]; + sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst); + sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst); + + sum11 = a[s + H4(2 + 1)]; + sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst); + sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst); + + d[s + H4(0 + 0)] = sum00; + d[s + H4(0 + 1)] = sum01; + d[s + H4(2 + 0)] = sum10; + d[s + H4(2 + 1)] = sum11; + } } clear_tail(d, opr_sz, simd_maxsz(desc)); } void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, - void *stat, uint32_t desc) + float_status *stat, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); intptr_t sel = simd_data(desc); @@ -2823,7 +3163,7 @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, } void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, - void *va, void *stat, uint32_t desc) + void *va, float_status *stat, uint32_t desc) { intptr_t i, j, opr_sz = simd_oprsz(desc); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); @@ -2867,3 +3207,49 @@ DO_CLAMP(gvec_uclamp_b, uint8_t) DO_CLAMP(gvec_uclamp_h, uint16_t) DO_CLAMP(gvec_uclamp_s, uint32_t) DO_CLAMP(gvec_uclamp_d, uint64_t) + +/* Bit count in each 8-bit word. */ +void HELPER(gvec_cnt_b)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint8_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; ++i) { + d[i] = ctpop8(n[i]); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +/* Reverse bits in each 8 bit word */ +void HELPER(gvec_rbit_b)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = revbit64(bswap64(n[i])); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_urecpe_s)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = helper_recpe_u32(n[i]); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = helper_rsqrte_u32(n[i]); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index 3ca1b94..c02f9c3 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -20,6 +20,10 @@ #ifndef TARGET_ARM_VEC_INTERNAL_H #define TARGET_ARM_VEC_INTERNAL_H +#include "fpu/softfloat.h" + +typedef struct CPUArchState CPUARMState; + /* * Note that vector data is stored in host-endian 64-bit chunks, * so addressing units smaller than that needs a host-endian fixup. @@ -223,13 +227,79 @@ int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); * bfdotadd: * @sum: addend * @e1, @e2: multiplicand vectors + * @fpst: floating-point status to use + * + * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. + * The @e1 and @e2 operands correspond to the 32-bit source vector + * slots and contain two Bfloat16 values each. + * + * Corresponds to the ARM pseudocode function BFDotAdd, specialized + * for the FPCR.EBF == 0 case. + */ +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst); +/** + * bfdotadd_ebf: + * @sum: addend + * @e1, @e2: multiplicand vectors + * @fpst: floating-point status to use + * @fpst_odd: floating-point status to use for round-to-odd operations * * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. * The @e1 and @e2 operands correspond to the 32-bit source vector * slots and contain two Bfloat16 values each. * - * Corresponds to the ARM pseudocode function BFDotAdd. + * Corresponds to the ARM pseudocode function BFDotAdd, specialized + * for the FPCR.EBF == 1 case. + */ +float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, + float_status *fpst, float_status *fpst_odd); + +/** + * is_ebf: + * @env: CPU state + * @statusp: pointer to floating point status to fill in + * @oddstatusp: pointer to floating point status to fill in for round-to-odd + * + * Determine whether a BFDotAdd operation should use FPCR.EBF = 0 + * or FPCR.EBF = 1 semantics. On return, has initialized *statusp + * and *oddstatusp to suitable float_status arguments to use with either + * bfdotadd() or bfdotadd_ebf(). + * Returns true for EBF = 1, false for EBF = 0. (The caller should use this + * to decide whether to call bfdotadd() or bfdotadd_ebf().) + */ +bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); + +/* + * Negate as for FPCR.AH=1 -- do not negate NaNs. */ -float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2); +static inline float16 float16_ah_chs(float16 a) +{ + return float16_is_any_nan(a) ? a : float16_chs(a); +} + +static inline float32 float32_ah_chs(float32 a) +{ + return float32_is_any_nan(a) ? a : float32_chs(a); +} + +static inline float64 float64_ah_chs(float64 a) +{ + return float64_is_any_nan(a) ? a : float64_chs(a); +} + +static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) +{ + return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a); +} + +static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah) +{ + return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a); +} + +static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) +{ + return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); +} #endif /* TARGET_ARM_VEC_INTERNAL_H */ diff --git a/target/arm/tcg/vfp.decode b/target/arm/tcg/vfp.decode index 5405e80..2dd87a2 100644 --- a/target/arm/tcg/vfp.decode +++ b/target/arm/tcg/vfp.decode @@ -141,18 +141,18 @@ VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s -VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s -VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s +VFNMS_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s +VFNMA_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s -VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s -VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s +VFNMS_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s +VFNMA_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d -VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d -VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d +VFNMS_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d +VFNMA_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \ vd=%vd_sp imm=%vmov_imm diff --git a/target/arm/vfp_helper.c b/target/arm/tcg/vfp_helper.c index ce26b8a..b1324c5 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/tcg/vfp_helper.c @@ -19,94 +19,165 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/helper-proto.h" #include "internals.h" #include "cpu-features.h" -#ifdef CONFIG_TCG -#include "qemu/log.h" #include "fpu/softfloat.h" -#endif +#include "qemu/log.h" + +#define HELPER_H "tcg/helper.h" +#include "exec/helper-proto.h.inc" -/* VFP support. We follow the convention used for VFP instructions: - Single precision routines have a "s" suffix, double precision a - "d" suffix. */ +/* + * Set the float_status behaviour to match the Arm defaults: + * * tininess-before-rounding + * * 2-input NaN propagation prefers SNaN over QNaN, and then + * operand A over operand B (see FPProcessNaNs() pseudocode) + * * 3-input NaN propagation prefers SNaN over QNaN, and then + * operand C over A over B (see FPProcessNaNs3() pseudocode, + * but note that for QEMU muladd is a * b + c, whereas for + * the pseudocode function the arguments are in the order c, a, b. + * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet, + * and the input NaN if it is signalling + * * Default NaN has sign bit clear, msb frac bit set + */ +void arm_set_default_fp_behaviours(float_status *s) +{ + set_float_detect_tininess(float_tininess_before_rounding, s); + set_float_ftz_detection(float_ftz_before_rounding, s); + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); + set_float_3nan_prop_rule(float_3nan_prop_s_cab, s); + set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s); + set_float_default_nan_pattern(0b01000000, s); +} -#ifdef CONFIG_TCG +/* + * Set the float_status behaviour to match the FEAT_AFP + * FPCR.AH=1 requirements: + * * tininess-after-rounding + * * 2-input NaN propagation prefers the first NaN + * * 3-input NaN propagation prefers a over b over c + * * 0 * Inf + NaN always returns the input NaN and doesn't + * set Invalid for a QNaN + * * default NaN has sign bit set, msb frac bit set + */ +void arm_set_ah_fp_behaviours(float_status *s) +{ + set_float_detect_tininess(float_tininess_after_rounding, s); + set_float_ftz_detection(float_ftz_after_rounding, s); + set_float_2nan_prop_rule(float_2nan_prop_ab, s); + set_float_3nan_prop_rule(float_3nan_prop_abc, s); + set_float_infzeronan_rule(float_infzeronan_dnan_never | + float_infzeronan_suppress_invalid, s); + set_float_default_nan_pattern(0b11000000, s); +} /* Convert host exception flags to vfp form. */ -static inline int vfp_exceptbits_from_host(int host_bits) +static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah) { - int target_bits = 0; + uint32_t target_bits = 0; if (host_bits & float_flag_invalid) { - target_bits |= 1; + target_bits |= FPSR_IOC; } if (host_bits & float_flag_divbyzero) { - target_bits |= 2; + target_bits |= FPSR_DZC; } if (host_bits & float_flag_overflow) { - target_bits |= 4; + target_bits |= FPSR_OFC; } - if (host_bits & (float_flag_underflow | float_flag_output_denormal)) { - target_bits |= 8; + if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) { + target_bits |= FPSR_UFC; } if (host_bits & float_flag_inexact) { - target_bits |= 0x10; + target_bits |= FPSR_IXC; + } + if (host_bits & float_flag_input_denormal_flushed) { + target_bits |= FPSR_IDC; + } + /* + * With FPCR.AH, IDC is set when an input denormal is used, + * and flushing an output denormal to zero sets both IXC and UFC. + */ + if (ah && (host_bits & float_flag_input_denormal_used)) { + target_bits |= FPSR_IDC; } - if (host_bits & float_flag_input_denormal) { - target_bits |= 0x80; + if (ah && (host_bits & float_flag_output_denormal_flushed)) { + target_bits |= FPSR_IXC; } return target_bits; } -/* Convert vfp exception flags to target form. */ -static inline int vfp_exceptbits_to_host(int target_bits) +uint32_t vfp_get_fpsr_from_host(CPUARMState *env) { - int host_bits = 0; + uint32_t a32_flags = 0, a64_flags = 0; - if (target_bits & 1) { - host_bits |= float_flag_invalid; - } - if (target_bits & 2) { - host_bits |= float_flag_divbyzero; - } - if (target_bits & 4) { - host_bits |= float_flag_overflow; - } - if (target_bits & 8) { - host_bits |= float_flag_underflow; - } - if (target_bits & 0x10) { - host_bits |= float_flag_inexact; - } - if (target_bits & 0x80) { - host_bits |= float_flag_input_denormal; + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]); + a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]); + /* FZ16 does not generate an input denormal exception. */ + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16]) + & ~float_flag_input_denormal_flushed); + a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16]) + & ~float_flag_input_denormal_flushed); + + a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]); + a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) + & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); + /* + * We do not merge in flags from FPST_AH or FPST_AH_F16, because + * they are used for insns that must not set the cumulative exception bits. + */ + + /* + * Flushing an input denormal *only* because FPCR.FIZ == 1 does + * not set FPSR.IDC; if FPCR.FZ is also set then this takes + * precedence and IDC is set (see the FPUnpackBase pseudocode). + * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1). + * We only do this for the a64 flags because FIZ has no effect + * on AArch32 even if it is set. + */ + if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) { + a64_flags &= ~float_flag_input_denormal_flushed; } - return host_bits; + return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) | + vfp_exceptbits_from_host(a32_flags, false); } -static uint32_t vfp_get_fpscr_from_host(CPUARMState *env) +void vfp_clear_float_status_exc_flags(CPUARMState *env) { - uint32_t i; + /* + * Clear out all the exception-flag information in the float_status + * values. The caller should have arranged for env->vfp.fpsr to + * be the architecturally up-to-date exception flag information first. + */ + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]); + set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]); +} - i = get_float_exception_flags(&env->vfp.fp_status); - i |= get_float_exception_flags(&env->vfp.standard_fp_status); - /* FZ16 does not generate an input denormal exception. */ - i |= (get_float_exception_flags(&env->vfp.fp_status_f16) - & ~float_flag_input_denormal); - i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16) - & ~float_flag_input_denormal); - return vfp_exceptbits_from_host(i); +static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env) +{ + /* + * Synchronize any pending exception-flag information in the + * float_status values into env->vfp.fpsr, and then clear out + * the float_status data. + */ + env->vfp.fpsr |= vfp_get_fpsr_from_host(env); + vfp_clear_float_status_exc_flags(env); } -static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val) +void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) { - int i; - uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR]; + uint64_t changed = env->vfp.fpcr; changed ^= val; + changed &= mask; if (changed & (3 << 22)) { - i = (val >> 22) & 3; + int i = (val >> 22) & 3; switch (i) { case FPROUNDING_TIEEVEN: i = float_round_nearest_even; @@ -121,154 +192,88 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val) i = float_round_to_zero; break; } - set_float_rounding_mode(i, &env->vfp.fp_status); - set_float_rounding_mode(i, &env->vfp.fp_status_f16); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); } if (changed & FPCR_FZ16) { bool ftz_enabled = val & FPCR_FZ16; - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16); - set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16); - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); } if (changed & FPCR_FZ) { bool ftz_enabled = val & FPCR_FZ; - set_flush_to_zero(ftz_enabled, &env->vfp.fp_status); - set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status); - } - if (changed & FPCR_DN) { - bool dnan_enabled = val & FPCR_DN; - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status); - set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16); - } - - /* - * The exception flags are ORed together when we read fpscr so we - * only need to preserve the current state in one of our - * float_status values. - */ - i = vfp_exceptbits_to_host(val); - set_float_exception_flags(i, &env->vfp.fp_status); - set_float_exception_flags(0, &env->vfp.fp_status_f16); - set_float_exception_flags(0, &env->vfp.standard_fp_status); - set_float_exception_flags(0, &env->vfp.standard_fp_status_f16); -} - -#else - -static uint32_t vfp_get_fpscr_from_host(CPUARMState *env) -{ - return 0; -} - -static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val) -{ -} - -#endif - -uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) -{ - uint32_t i, fpscr; - - fpscr = env->vfp.xregs[ARM_VFP_FPSCR] - | (env->vfp.vec_len << 16) - | (env->vfp.vec_stride << 20); - - /* - * M-profile LTPSIZE overlaps A-profile Stride; whichever of the - * two is not applicable to this CPU will always be zero. - */ - fpscr |= env->v7m.ltpsize << 16; - - fpscr |= vfp_get_fpscr_from_host(env); - - i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; - fpscr |= i ? FPCR_QC : 0; - - return fpscr; -} - -uint32_t vfp_get_fpscr(CPUARMState *env) -{ - return HELPER(vfp_get_fpscr)(env); -} - -void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) -{ - ARMCPU *cpu = env_archcpu(env); - - /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!cpu_isar_feature(any_fp16, cpu)) { - val &= ~FPCR_FZ16; + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); + /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); } - - vfp_set_fpscr_to_host(env, val); - - if (!arm_feature(env, ARM_FEATURE_M)) { + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { /* - * Short-vector length and stride; on M-profile these bits - * are used for different purposes. - * We can't make this conditional be "if MVFR0.FPShVec != 0", - * because in v7A no-short-vector-support cores still had to - * allow Stride/Len to be written with the only effect that - * some insns are required to UNDEF if the guest sets them. + * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or + * both FPCR.AH = 0 and FPCR.FZ = 1. */ - env->vfp.vec_len = extract32(val, 16, 3); - env->vfp.vec_stride = extract32(val, 20, 2); - } else if (cpu_isar_feature(aa32_mve, cpu)) { - env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT, - FPCR_LTPSIZE_LENGTH); + bool fitz_enabled = (val & FPCR_FIZ) || + (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); } - - if (arm_feature(env, ARM_FEATURE_NEON) || - cpu_isar_feature(aa32_mve, cpu)) { - /* - * The bit we set within fpscr_q is arbitrary; the register as a - * whole being zero/non-zero is what counts. - * TODO: M-profile MVE also has a QC bit. - */ - env->vfp.qc[0] = val & FPCR_QC; - env->vfp.qc[1] = 0; - env->vfp.qc[2] = 0; - env->vfp.qc[3] = 0; + if (changed & FPCR_DN) { + bool dnan_enabled = val & FPCR_DN; + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]); + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]); + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]); + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]); + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]); + set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]); } + if (changed & FPCR_AH) { + bool ah_enabled = val & FPCR_AH; + if (ah_enabled) { + /* Change behaviours for A64 FP operations */ + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + } else { + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + } + } /* - * We don't implement trapped exception handling, so the - * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) - * - * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in - * fp_status; QC, Len and Stride are stored separately earlier. - * Clear out all of those and the RES0 bits: only NZCV, AHP, DN, - * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR]. + * If any bits changed that we look at in vfp_get_fpsr_from_host(), + * we must sync the float_status flags into vfp.fpsr now (under the + * old regime) before we update vfp.fpcr. */ - env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000; -} - -void vfp_set_fpscr(CPUARMState *env, uint32_t val) -{ - HELPER(vfp_set_fpscr)(env, val); + if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) { + vfp_sync_and_clear_float_status_exc_flags(env); + } } -#ifdef CONFIG_TCG +/* + * VFP support. We follow the convention used for VFP instructions: + * Single precision routines have a "s" suffix, double precision a + * "d" suffix. + */ #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) #define VFP_BINOP(name) \ -dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \ +dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return float16_ ## name(a, b, fpst); \ } \ -float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \ +float32 VFP_HELPER(name, s)(float32 a, float32 b, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return float32_ ## name(a, b, fpst); \ } \ -float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \ +float64 VFP_HELPER(name, d)(float64 a, float64 b, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return float64_ ## name(a, b, fpst); \ } VFP_BINOP(add) @@ -281,19 +286,19 @@ VFP_BINOP(minnum) VFP_BINOP(maxnum) #undef VFP_BINOP -dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env) +dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, float_status *fpst) { - return float16_sqrt(a, &env->vfp.fp_status_f16); + return float16_sqrt(a, fpst); } -float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env) +float32 VFP_HELPER(sqrt, s)(float32 a, float_status *fpst) { - return float32_sqrt(a, &env->vfp.fp_status); + return float32_sqrt(a, fpst); } -float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) +float64 VFP_HELPER(sqrt, d)(float64 a, float_status *fpst) { - return float64_sqrt(a, &env->vfp.fp_status); + return float64_sqrt(a, fpst); } static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) @@ -315,8 +320,7 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) default: g_assert_not_reached(); } - env->vfp.xregs[ARM_VFP_FPSCR] = - deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags); + env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */ } /* XXX: check quiet/signaling case */ @@ -324,31 +328,29 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ { \ softfloat_to_vfp_compare(env, \ - FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \ + FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \ } \ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \ { \ softfloat_to_vfp_compare(env, \ - FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \ + FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \ } -DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16) -DO_VFP_cmp(s, float32, float32, fp_status) -DO_VFP_cmp(d, float64, float64, fp_status) +DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16) +DO_VFP_cmp(s, float32, float32, FPST_A32) +DO_VFP_cmp(d, float64, float64, FPST_A32) #undef DO_VFP_cmp /* Integer to float and float to integer conversions */ #define CONV_ITOF(name, ftype, fsz, sign) \ -ftype HELPER(name)(uint32_t x, void *fpstp) \ +ftype HELPER(name)(uint32_t x, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \ } #define CONV_FTOI(name, ftype, fsz, sign, round) \ -sign##int32_t HELPER(name)(ftype x, void *fpstp) \ +sign##int32_t HELPER(name)(ftype x, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ if (float##fsz##_is_any_nan(x)) { \ float_raise(float_flag_invalid, fpst); \ return 0; \ @@ -373,22 +375,22 @@ FLOAT_CONVS(ui, d, float64, 64, u) #undef FLOAT_CONVS /* floating point conversion */ -float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env) +float64 VFP_HELPER(fcvtd, s)(float32 x, float_status *status) { - return float32_to_float64(x, &env->vfp.fp_status); + return float32_to_float64(x, status); } -float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) +float32 VFP_HELPER(fcvts, d)(float64 x, float_status *status) { - return float64_to_float32(x, &env->vfp.fp_status); + return float64_to_float32(x, status); } -uint32_t HELPER(bfcvt)(float32 x, void *status) +uint32_t HELPER(bfcvt)(float32 x, float_status *status) { return float32_to_bfloat16(x, status); } -uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status) +uint32_t HELPER(bfcvt_pair)(uint64_t pair, float_status *status) { bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status); bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status); @@ -404,26 +406,25 @@ uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status) */ #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ - void *fpstp) \ -{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); } + float_status *fpst) \ +{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpst); } #define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \ uint32_t shift, \ - void *fpstp) \ + float_status *fpst) \ { \ ftype ret; \ - float_status *fpst = fpstp; \ FloatRoundMode oldmode = fpst->float_rounding_mode; \ fpst->float_rounding_mode = float_round_nearest_even; \ - ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \ + ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpst); \ fpst->float_rounding_mode = oldmode; \ return ret; \ } #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \ uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ - void *fpst) \ + float_status *fpst) \ { \ if (unlikely(float##fsz##_is_any_nan(x))) { \ float_raise(float_flag_invalid, fpst); \ @@ -463,6 +464,10 @@ VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64) VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16) VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32) VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64) +VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64, + float_round_to_zero, _round_to_zero) +VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64, + float_round_to_zero, _round_to_zero) #undef VFP_CONV_FIX #undef VFP_CONV_FIX_FLOAT @@ -472,10 +477,8 @@ VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64) /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. */ -uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp) +uint32_t HELPER(set_rmode)(uint32_t rmode, float_status *fp_status) { - float_status *fp_status = fpstp; - uint32_t prev_rmode = get_float_rounding_mode(fp_status); set_float_rounding_mode(rmode, fp_status); @@ -483,12 +486,12 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp) } /* Half precision conversions. */ -float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode) +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, float_status *fpst, + uint32_t ahp_mode) { /* Squash FZ16 to 0 for the duration of conversion. In this case, * it would affect flushing input denormals. */ - float_status *fpst = fpstp; bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float32 r = float16_to_float32(a, !ahp_mode, fpst); @@ -496,12 +499,12 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode) return r; } -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode) +uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, float_status *fpst, + uint32_t ahp_mode) { /* Squash FZ16 to 0 for the duration of conversion. In this case, * it would affect flushing output denormals. */ - float_status *fpst = fpstp; bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float32_to_float16(a, !ahp_mode, fpst); @@ -509,12 +512,12 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode) return r; } -float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode) +float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, float_status *fpst, + uint32_t ahp_mode) { /* Squash FZ16 to 0 for the duration of conversion. In this case, * it would affect flushing input denormals. */ - float_status *fpst = fpstp; bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float64 r = float16_to_float64(a, !ahp_mode, fpst); @@ -522,12 +525,12 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode) return r; } -uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) +uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, float_status *fpst, + uint32_t ahp_mode) { /* Squash FZ16 to 0 for the duration of conversion. In this case, * it would affect flushing output denormals. */ - float_status *fpst = fpstp; bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float64_to_float16(a, !ahp_mode, fpst); @@ -570,6 +573,33 @@ static int recip_estimate(int input) } /* + * Increased precision version: + * input is a 13 bit fixed point number + * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0. + * result range 4096 .. 8191 for a number from 1.0 to 2.0 + */ +static int recip_estimate_incprec(int input) +{ + int a, b, r; + assert(2048 <= input && input < 4096); + a = (input * 2) + 1; + /* + * The pseudocode expresses this as an operation on infinite + * precision reals where it calculates 2^25 / a and then looks + * at the error between that and the rounded-down-to-integer + * value to see if it should instead round up. We instead + * follow the same approach as the pseudocode for the 8-bit + * precision version, and calculate (2 * (2^25 / a)) as an + * integer so we can do the "add one and halve" to round it. + * So the 1 << 26 here is correct. + */ + b = (1 << 26) / a; + r = (b + 1) >> 1; + assert(4096 <= r && r < 8192); + return r; +} + +/* * Common wrapper to call recip_estimate * * The parameters are exponent and 64 bit fraction (without implicit @@ -578,7 +608,8 @@ static int recip_estimate(int input) * callee. */ -static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) +static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac, + bool increasedprecision) { uint32_t scaled, estimate; uint64_t result_frac; @@ -594,12 +625,22 @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac) } } - /* scaled = UInt('1':fraction<51:44>) */ - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); - estimate = recip_estimate(scaled); + if (increasedprecision) { + /* scaled = UInt('1':fraction<51:41>) */ + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); + estimate = recip_estimate_incprec(scaled); + } else { + /* scaled = UInt('1':fraction<51:44>) */ + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); + estimate = recip_estimate(scaled); + } result_exp = exp_off - *exp; - result_frac = deposit64(0, 44, 8, estimate); + if (increasedprecision) { + result_frac = deposit64(0, 40, 12, estimate); + } else { + result_frac = deposit64(0, 44, 8, estimate); + } if (result_exp == 0) { result_frac = deposit64(result_frac >> 1, 51, 1, 1); } else if (result_exp == -1) { @@ -628,9 +669,8 @@ static bool round_to_inf(float_status *fpst, bool sign_bit) } } -uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) +uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst) { - float_status *fpst = fpstp; float16 f16 = float16_squash_input_denormal(input, fpst); uint32_t f16_val = float16_val(f16); uint32_t f16_sign = float16_is_neg(f16); @@ -669,7 +709,7 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) } f64_frac = call_recip_estimate(&f16_exp, 29, - ((uint64_t) f16_frac) << (52 - 10)); + ((uint64_t) f16_frac) << (52 - 10), false); /* result = sign : result_exp<4:0> : fraction<51:42> */ f16_val = deposit32(0, 15, 1, f16_sign); @@ -678,9 +718,12 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) return make_float16(f16_val); } -float32 HELPER(recpe_f32)(float32 input, void *fpstp) +/* + * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant + * which is used when FPCR.AH == 1. + */ +static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres) { - float_status *fpst = fpstp; float32 f32 = float32_squash_input_denormal(input, fpst); uint32_t f32_val = float32_val(f32); bool f32_sign = float32_is_neg(f32); @@ -719,7 +762,7 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp) } f64_frac = call_recip_estimate(&f32_exp, 253, - ((uint64_t) f32_frac) << (52 - 23)); + ((uint64_t) f32_frac) << (52 - 23), rpres); /* result = sign : result_exp<7:0> : fraction<51:29> */ f32_val = deposit32(0, 31, 1, f32_sign); @@ -728,9 +771,18 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp) return make_float32(f32_val); } -float64 HELPER(recpe_f64)(float64 input, void *fpstp) +float32 HELPER(recpe_f32)(float32 input, float_status *fpst) +{ + return do_recpe_f32(input, fpst, false); +} + +float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst) +{ + return do_recpe_f32(input, fpst, true); +} + +float64 HELPER(recpe_f64)(float64 input, float_status *fpst) { - float_status *fpst = fpstp; float64 f64 = float64_squash_input_denormal(input, fpst); uint64_t f64_val = float64_val(f64); bool f64_sign = float64_is_neg(f64); @@ -768,7 +820,7 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp) return float64_set_sign(float64_zero, float64_is_neg(f64)); } - f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac); + f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false); /* result = sign : result_exp<10:0> : fraction<51:0>; */ f64_val = deposit64(0, 63, 1, f64_sign); @@ -802,8 +854,36 @@ static int do_recip_sqrt_estimate(int a) return estimate; } +static int do_recip_sqrt_estimate_incprec(int a) +{ + /* + * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate + * in terms of an infinite-precision floating point calculation of a + * square root. We implement this using the same kind of pure integer + * algorithm as the 8-bit mantissa, to get the same bit-for-bit result. + */ + int64_t b, estimate; + + assert(1024 <= a && a < 4096); + if (a < 2048) { + a = a * 2 + 1; + } else { + a = (a >> 1) << 1; + a = (a + 1) * 2; + } + b = 8192; + while (a * (b + 1) * (b + 1) < (1ULL << 39)) { + b += 1; + } + estimate = (b + 1) / 2; + + assert(4096 <= estimate && estimate < 8192); -static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) + return estimate; +} + +static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac, + bool increasedprecision) { int estimate; uint32_t scaled; @@ -816,22 +896,36 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac) frac = extract64(frac, 0, 51) << 1; } - if (*exp & 1) { - /* scaled = UInt('01':fraction<51:45>) */ - scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); + if (increasedprecision) { + if (*exp & 1) { + /* scaled = UInt('01':fraction<51:42>) */ + scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10)); + } else { + /* scaled = UInt('1':fraction<51:41>) */ + scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11)); + } + estimate = do_recip_sqrt_estimate_incprec(scaled); } else { - /* scaled = UInt('1':fraction<51:44>) */ - scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); + if (*exp & 1) { + /* scaled = UInt('01':fraction<51:45>) */ + scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7)); + } else { + /* scaled = UInt('1':fraction<51:44>) */ + scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8)); + } + estimate = do_recip_sqrt_estimate(scaled); } - estimate = do_recip_sqrt_estimate(scaled); *exp = (exp_off - *exp) / 2; - return extract64(estimate, 0, 8) << 44; + if (increasedprecision) { + return extract64(estimate, 0, 12) << 40; + } else { + return extract64(estimate, 0, 8) << 44; + } } -uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) +uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s) { - float_status *s = fpstp; float16 f16 = float16_squash_input_denormal(input, s); uint16_t val = float16_val(f16); bool f16_sign = float16_is_neg(f16); @@ -844,7 +938,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) if (float16_is_signaling_nan(f16, s)) { float_raise(float_flag_invalid, s); if (!s->default_nan_mode) { - nan = float16_silence_nan(f16, fpstp); + nan = float16_silence_nan(f16, s); } } if (s->default_nan_mode) { @@ -866,7 +960,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) f64_frac = ((uint64_t) f16_frac) << (52 - 10); - f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac); + f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false); /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */ val = deposit32(0, 15, 1, f16_sign); @@ -875,9 +969,12 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) return make_float16(val); } -float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) +/* + * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant + * which is used when FPCR.AH == 1. + */ +static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres) { - float_status *s = fpstp; float32 f32 = float32_squash_input_denormal(input, s); uint32_t val = float32_val(f32); uint32_t f32_sign = float32_is_neg(f32); @@ -890,7 +987,7 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) if (float32_is_signaling_nan(f32, s)) { float_raise(float_flag_invalid, s); if (!s->default_nan_mode) { - nan = float32_silence_nan(f32, fpstp); + nan = float32_silence_nan(f32, s); } } if (s->default_nan_mode) { @@ -912,18 +1009,35 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) f64_frac = ((uint64_t) f32_frac) << 29; - f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac); + f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres); - /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */ + /* + * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15) + * or for increased precision + * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11) + */ val = deposit32(0, 31, 1, f32_sign); val = deposit32(val, 23, 8, f32_exp); - val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); + if (rpres) { + val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12)); + } else { + val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8)); + } return make_float32(val); } -float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) +float32 HELPER(rsqrte_f32)(float32 input, float_status *s) +{ + return do_rsqrte_f32(input, s, false); +} + +float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s) +{ + return do_rsqrte_f32(input, s, true); +} + +float64 HELPER(rsqrte_f64)(float64 input, float_status *s) { - float_status *s = fpstp; float64 f64 = float64_squash_input_denormal(input, s); uint64_t val = float64_val(f64); bool f64_sign = float64_is_neg(f64); @@ -935,7 +1049,7 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) if (float64_is_signaling_nan(f64, s)) { float_raise(float_flag_invalid, s); if (!s->default_nan_mode) { - nan = float64_silence_nan(f64, fpstp); + nan = float64_silence_nan(f64, s); } } if (s->default_nan_mode) { @@ -952,7 +1066,7 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) return float64_zero; } - f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac); + f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false); /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */ val = deposit64(0, 61, 1, f64_sign); @@ -990,41 +1104,40 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a) /* VFPv4 fused multiply-accumulate */ dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b, - dh_ctype_f16 c, void *fpstp) + dh_ctype_f16 c, float_status *fpst) { - float_status *fpst = fpstp; return float16_muladd(a, b, c, 0, fpst); } -float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp) +float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, + float_status *fpst) { - float_status *fpst = fpstp; return float32_muladd(a, b, c, 0, fpst); } -float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp) +float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, + float_status *fpst) { - float_status *fpst = fpstp; return float64_muladd(a, b, c, 0, fpst); } /* ARMv8 round to integral */ -dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status) +dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, float_status *fp_status) { return float16_round_to_int(x, fp_status); } -float32 HELPER(rints_exact)(float32 x, void *fp_status) +float32 HELPER(rints_exact)(float32 x, float_status *fp_status) { return float32_round_to_int(x, fp_status); } -float64 HELPER(rintd_exact)(float64 x, void *fp_status) +float64 HELPER(rintd_exact)(float64 x, float_status *fp_status) { return float64_round_to_int(x, fp_status); } -dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status) +dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, float_status *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; float16 ret; @@ -1040,7 +1153,7 @@ dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status) return ret; } -float32 HELPER(rints)(float32 x, void *fp_status) +float32 HELPER(rints)(float32 x, float_status *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; float32 ret; @@ -1056,15 +1169,13 @@ float32 HELPER(rints)(float32 x, void *fp_status) return ret; } -float64 HELPER(rintd)(float64 x, void *fp_status) +float64 HELPER(rintd)(float64 x, float_status *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; float64 ret; ret = float64_round_to_int(x, fp_status); - new_flags = get_float_exception_flags(fp_status); - /* Suppress any inexact exceptions the conversion produced */ if (!(old_flags & float_flag_inexact)) { new_flags = get_float_exception_flags(fp_status); @@ -1088,11 +1199,10 @@ const FloatRoundMode arm_rmode_to_sf_map[] = { * Implement float64 to int32_t conversion without saturation; * the result is supplied modulo 2^32. */ -uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) +uint64_t HELPER(fjcvtzs)(float64 value, float_status *status) { - float_status *status = vstatus; - uint32_t inexact, frac; - uint32_t e_old, e_new; + uint32_t frac, e_old, e_new; + bool inexact; e_old = get_float_exception_flags(status); set_float_exception_flags(0, status); @@ -1100,13 +1210,13 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) e_new = get_float_exception_flags(status); set_float_exception_flags(e_old | e_new, status); - if (value == float64_chs(float64_zero)) { - /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ - inexact = 1; - } else { - /* Normal inexact or overflow or NaN */ - inexact = e_new & (float_flag_inexact | float_flag_invalid); - } + /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */ + inexact = e_new & (float_flag_inexact | + float_flag_input_denormal_flushed | + float_flag_invalid); + + /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ + inexact |= value == float64_chs(float64_zero); /* Pack the result and the env->ZF representation of Z together. */ return deposit64(frac, 32, 32, inexact); @@ -1114,13 +1224,12 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env) { - uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status); + uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]); uint32_t result = pair; uint32_t z = (pair >> 32) == 0; /* Store Z, clear NCV, in FPSCR.NZCV. */ - env->vfp.xregs[ARM_VFP_FPSCR] - = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z); + env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z); return result; } @@ -1163,12 +1272,12 @@ static float32 frint_s(float32 f, float_status *fpst, int intsize) return (0x100u + 126u + intsize) << 23; } -float32 HELPER(frint32_s)(float32 f, void *fpst) +float32 HELPER(frint32_s)(float32 f, float_status *fpst) { return frint_s(f, fpst, 32); } -float32 HELPER(frint64_s)(float32 f, void *fpst) +float32 HELPER(frint64_s)(float32 f, float_status *fpst) { return frint_s(f, fpst, 64); } @@ -1211,12 +1320,12 @@ static float64 frint_d(float64 f, float_status *fpst, int intsize) return (uint64_t)(0x800 + 1022 + intsize) << 52; } -float64 HELPER(frint32_d)(float64 f, void *fpst) +float64 HELPER(frint32_d)(float64 f, float_status *fpst) { return frint_d(f, fpst, 32); } -float64 HELPER(frint64_d)(float64 f, void *fpst) +float64 HELPER(frint64_d)(float64 f, float_status *fpst) { return frint_d(f, fpst, 64); } @@ -1250,4 +1359,12 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg) raise_exception(env, EXCP_HYP_TRAP, syndrome, 2); } -#endif +uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) +{ + return vfp_get_fpscr(env); +} + +void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) +{ + vfp_set_fpscr(env, val); +} diff --git a/target/arm/vfp_fpscr.c b/target/arm/vfp_fpscr.c new file mode 100644 index 0000000..92ea60e --- /dev/null +++ b/target/arm/vfp_fpscr.c @@ -0,0 +1,155 @@ +/* + * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-features.h" + +uint32_t vfp_get_fpcr(CPUARMState *env) +{ + uint32_t fpcr = env->vfp.fpcr + | (env->vfp.vec_len << 16) + | (env->vfp.vec_stride << 20); + + /* + * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever + * of the two is not applicable to this CPU will always be zero. + */ + fpcr |= env->v7m.ltpsize << 16; + + return fpcr; +} + +uint32_t vfp_get_fpsr(CPUARMState *env) +{ + uint32_t fpsr = env->vfp.fpsr; + uint32_t i; + + fpsr |= vfp_get_fpsr_from_host(env); + + i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; + fpsr |= i ? FPSR_QC : 0; + return fpsr; +} + +uint32_t vfp_get_fpscr(CPUARMState *env) +{ + return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) | + (vfp_get_fpsr(env) & FPSCR_FPSR_MASK); +} + +void vfp_set_fpsr(CPUARMState *env, uint32_t val) +{ + ARMCPU *cpu = env_archcpu(env); + + if (arm_feature(env, ARM_FEATURE_NEON) || + cpu_isar_feature(aa32_mve, cpu)) { + /* + * The bit we set within vfp.qc[] is arbitrary; the array as a + * whole being zero/non-zero is what counts. + */ + env->vfp.qc[0] = val & FPSR_QC; + env->vfp.qc[1] = 0; + env->vfp.qc[2] = 0; + env->vfp.qc[3] = 0; + } + + /* + * NZCV lives only in env->vfp.fpsr. The cumulative exception flags + * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible + * extra pending exception information that hasn't yet been folded in + * living in the float_status values (for TCG). + * Since this FPSR write gives us the up to date values of the exception + * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing + * anything else. We also need to clear out the float_status exception + * information so that the next vfp_get_fpsr does not fold in stale data. + */ + val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK; + env->vfp.fpsr = val; + vfp_clear_float_status_exc_flags(env); +} + +static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask) +{ + /* + * We only set FPCR bits defined by mask, and leave the others alone. + * We assume the mask is sensible (e.g. doesn't try to set only + * part of a field) + */ + ARMCPU *cpu = env_archcpu(env); + + /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ + if (!cpu_isar_feature(any_fp16, cpu)) { + val &= ~FPCR_FZ16; + } + if (!cpu_isar_feature(aa64_afp, cpu)) { + val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP); + } + + if (!cpu_isar_feature(aa64_ebf16, cpu)) { + val &= ~FPCR_EBF; + } + + vfp_set_fpcr_to_host(env, val, mask); + + if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) { + if (!arm_feature(env, ARM_FEATURE_M)) { + /* + * Short-vector length and stride; on M-profile these bits + * are used for different purposes. + * We can't make this conditional be "if MVFR0.FPShVec != 0", + * because in v7A no-short-vector-support cores still had to + * allow Stride/Len to be written with the only effect that + * some insns are required to UNDEF if the guest sets them. + */ + env->vfp.vec_len = extract32(val, 16, 3); + env->vfp.vec_stride = extract32(val, 20, 2); + } else if (cpu_isar_feature(aa32_mve, cpu)) { + env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT, + FPCR_LTPSIZE_LENGTH); + } + } + + /* + * We don't implement trapped exception handling, so the + * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) + * + * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16, + * FIZ, AH, and NEP. + * Len, Stride and LTPSIZE we just handled. Store those bits + * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI + * bits. + */ + val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | + FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP; + env->vfp.fpcr &= ~mask; + env->vfp.fpcr |= val; +} + +void vfp_set_fpcr(CPUARMState *env, uint32_t val) +{ + vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32)); +} + +void vfp_set_fpscr(CPUARMState *env, uint32_t val) +{ + vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK); + vfp_set_fpsr(env, val & FPSCR_FPSR_MASK); +} |