aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/arch_dump.c9
-rw-r--r--target/arm/arm-powerctl.c2
-rw-r--r--target/arm/arm-qmp-cmds.c11
-rw-r--r--target/arm/cpregs.h124
-rw-r--r--target/arm/cpu-features.h383
-rw-r--r--target/arm/cpu-param.h18
-rw-r--r--target/arm/cpu-qom.h5
-rw-r--r--target/arm/cpu-sysregs.h42
-rw-r--r--target/arm/cpu-sysregs.h.inc36
-rw-r--r--target/arm/cpu.c411
-rw-r--r--target/arm/cpu.h598
-rw-r--r--target/arm/cpu32-stubs.c26
-rw-r--r--target/arm/cpu64.c293
-rw-r--r--target/arm/debug_helper.c22
-rw-r--r--target/arm/gdbstub.c54
-rw-r--r--target/arm/gdbstub64.c241
-rw-r--r--target/arm/gtimer.h14
-rw-r--r--target/arm/helper.c2210
-rw-r--r--target/arm/helper.h1091
-rw-r--r--target/arm/hvf-stub.c20
-rw-r--r--target/arm/hvf/hvf.c487
-rw-r--r--target/arm/hvf/trace-events5
-rw-r--r--target/arm/hvf/trace.h1
-rw-r--r--target/arm/hvf_arm.h5
-rw-r--r--target/arm/hyp_gdbstub.c15
-rw-r--r--target/arm/internals.h278
-rw-r--r--target/arm/kvm-stub.c97
-rw-r--r--target/arm/kvm.c342
-rw-r--r--target/arm/kvm_arm.h110
-rw-r--r--target/arm/machine.c154
-rw-r--r--target/arm/meson.build45
-rw-r--r--target/arm/ptw.c357
-rw-r--r--target/arm/tcg-stubs.c27
-rw-r--r--target/arm/tcg/a64.decode884
-rw-r--r--target/arm/tcg/arith_helper.c297
-rw-r--r--target/arm/tcg/cpu-v7m.c194
-rw-r--r--target/arm/tcg/cpu32.c401
-rw-r--r--target/arm/tcg/cpu64.c583
-rw-r--r--target/arm/tcg/crypto_helper.c6
-rw-r--r--target/arm/tcg/gengvec.c490
-rw-r--r--target/arm/tcg/helper-a64.c463
-rw-r--r--target/arm/tcg/helper-a64.h131
-rw-r--r--target/arm/tcg/helper-sme.h10
-rw-r--r--target/arm/tcg/helper-sve.h546
-rw-r--r--target/arm/tcg/helper.h1153
-rw-r--r--target/arm/tcg/hflags.c147
-rw-r--r--target/arm/tcg/iwmmxt_helper.c4
-rw-r--r--target/arm/tcg/m_helper.c11
-rw-r--r--target/arm/tcg/meson.build28
-rw-r--r--target/arm/tcg/mte_helper.c66
-rw-r--r--target/arm/tcg/mte_helper.h66
-rw-r--r--target/arm/tcg/mve_helper.c59
-rw-r--r--target/arm/tcg/neon-dp.decode6
-rw-r--r--target/arm/tcg/neon_helper.c209
-rw-r--r--target/arm/tcg/op_addsub.c.inc (renamed from target/arm/op_addsub.h)0
-rw-r--r--target/arm/tcg/op_helper.c111
-rw-r--r--target/arm/tcg/pauth_helper.c3
-rw-r--r--target/arm/tcg/psci.c2
-rw-r--r--target/arm/tcg/sme_helper.c155
-rw-r--r--target/arm/tcg/sve_helper.c520
-rw-r--r--target/arm/tcg/sve_ldst_internal.h2
-rw-r--r--target/arm/tcg/tlb-insns.c1306
-rw-r--r--target/arm/tcg/tlb_helper.c53
-rw-r--r--target/arm/tcg/translate-a32.h7
-rw-r--r--target/arm/tcg/translate-a64.c9731
-rw-r--r--target/arm/tcg/translate-a64.h15
-rw-r--r--target/arm/tcg/translate-m-nocp.c22
-rw-r--r--target/arm/tcg/translate-neon.c559
-rw-r--r--target/arm/tcg/translate-sme.c46
-rw-r--r--target/arm/tcg/translate-sve.c502
-rw-r--r--target/arm/tcg/translate-vfp.c90
-rw-r--r--target/arm/tcg/translate.c182
-rw-r--r--target/arm/tcg/translate.h111
-rw-r--r--target/arm/tcg/vec_helper.c852
-rw-r--r--target/arm/tcg/vec_internal.h74
-rw-r--r--target/arm/tcg/vfp.decode12
-rw-r--r--target/arm/tcg/vfp_helper.c (renamed from target/arm/vfp_helper.c)683
-rw-r--r--target/arm/vfp_fpscr.c155
78 files changed, 14886 insertions, 13594 deletions
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 06cdf4b..1dd7984 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -21,8 +21,9 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "elf.h"
-#include "sysemu/dump.h"
+#include "system/dump.h"
#include "cpu-features.h"
+#include "internals.h"
/* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */
struct aarch64_user_regs {
@@ -142,7 +143,6 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
return 0;
}
-#ifdef TARGET_AARCH64
static off_t sve_zreg_offset(uint32_t vq, int n)
{
off_t off = sizeof(struct aarch64_user_sve_header);
@@ -230,7 +230,6 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
return 0;
}
-#endif
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, DumpState *s)
@@ -272,11 +271,9 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return ret;
}
-#ifdef TARGET_AARCH64
if (cpu_isar_feature(aa64_sve, cpu)) {
ret = aarch64_write_elf64_sve(f, env, cpuid, s);
}
-#endif
return ret;
}
@@ -450,11 +447,9 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
if (class == ELFCLASS64) {
note_size = AARCH64_PRSTATUS_NOTE_SIZE;
note_size += AARCH64_PRFPREG_NOTE_SIZE;
-#ifdef TARGET_AARCH64
if (cpu_isar_feature(aa64_sve, cpu)) {
note_size += AARCH64_SVE_NOTE_SIZE(&cpu->env);
}
-#endif
} else {
note_size = ARM_PRSTATUS_NOTE_SIZE;
if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 2b2055c..20c70c7 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -15,7 +15,7 @@
#include "arm-powerctl.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
-#include "sysemu/tcg.h"
+#include "system/tcg.h"
#include "target/arm/multiprocessing.h"
#ifndef DEBUG_ARM_POWERCTL
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index 3cc8cc7..cefd235 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -26,10 +26,11 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "qapi/qobject-input-visitor.h"
-#include "qapi/qapi-commands-machine-target.h"
-#include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-misc-arm.h"
+#include "qobject/qdict.h"
#include "qom/qom-qobject.h"
+#include "cpu.h"
static GICCapability *gic_cap_new(int version)
{
@@ -46,7 +47,7 @@ static inline void gic_cap_kvm_probe(GICCapability *v2, GICCapability *v3)
#ifdef CONFIG_KVM
int fdarray[3];
- if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, NULL)) {
+ if (!kvm_arm_create_scratch_host_vcpu(fdarray, NULL)) {
return;
}
@@ -94,7 +95,7 @@ static const char *cpu_model_advertised_features[] = {
"sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
"sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
"kvm-no-adjvtime", "kvm-steal-time",
- "pauth", "pauth-impdef", "pauth-qarma3",
+ "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5",
NULL
};
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index cc7c543..c1a7ae3 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -23,6 +23,7 @@
#include "hw/registerfields.h"
#include "target/arm/kvm-consts.h"
+#include "cpu.h"
/*
* ARMCPRegInfo type field bits:
@@ -126,6 +127,14 @@ enum {
* equivalent EL1 register when FEAT_NV2 is enabled.
*/
ARM_CP_NV2_REDIRECT = 1 << 20,
+ /*
+ * Flag: this is a TLBI insn which (when FEAT_XS is present) also has
+ * an NXS variant at the same encoding except that crn is 1 greater,
+ * so when registering this cpreg automatically also register one
+ * for the TLBI NXS variant. (For QEMU the NXS variant behaves
+ * identically to the normal one, other than FGT trapping handling.)
+ */
+ ARM_CP_ADD_TLBI_NXS = 1 << 21,
};
/*
@@ -320,20 +329,23 @@ typedef enum CPAccessResult {
* Access fails due to a configurable trap or enable which would
* result in a categorized exception syndrome giving information about
* the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
- * 0xc or 0x18).
+ * 0xc or 0x18). These traps are always to a specified target EL,
+ * never to the usual target EL.
*/
- CP_ACCESS_TRAP = (1 << 2),
- CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP | 2,
- CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP | 3,
+ CP_ACCESS_TRAP_BIT = (1 << 2),
+ CP_ACCESS_TRAP_EL1 = CP_ACCESS_TRAP_BIT | 1,
+ CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP_BIT | 2,
+ CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP_BIT | 3,
/*
- * Access fails and results in an exception syndrome 0x0 ("uncategorized").
+ * Access fails with UNDEFINED, i.e. an exception syndrome 0x0
+ * ("uncategorized"), which is what an undefined insn produces.
* Note that this is not a catch-all case -- the set of cases which may
* result in this failure is specifically defined by the architecture.
* This trap is always to the usual target EL, never directly to a
* specified target EL.
*/
- CP_ACCESS_TRAP_UNCATEGORIZED = (2 << 2),
+ CP_ACCESS_UNDEFINED = (2 << 2),
} CPAccessResult;
/* Indexes into fgt_read[] */
@@ -621,6 +633,7 @@ FIELD(HDFGWTR_EL2, NBRBCTL, 60, 1)
FIELD(HDFGWTR_EL2, NBRBDATA, 61, 1)
FIELD(HDFGWTR_EL2, NPMSNEVFR_EL1, 62, 1)
+FIELD(FGT, NXS, 13, 1) /* Honour HCR_EL2.FGTnXS to suppress FGT */
/* Which fine-grained trap bit register to check, if any */
FIELD(FGT, TYPE, 10, 3)
FIELD(FGT, REV, 9, 1) /* Is bit sense reversed? */
@@ -639,6 +652,17 @@ FIELD(FGT, BITPOS, 0, 6) /* Bit position within the uint64_t */
#define DO_REV_BIT(REG, BITNAME) \
FGT_##BITNAME = FGT_##REG | FGT_REV | R_##REG##_EL2_##BITNAME##_SHIFT
+/*
+ * The FGT bits for TLBI maintenance instructions accessible at EL1 always
+ * affect the "normal" TLBI insns; they affect the corresponding TLBI insns
+ * with the nXS qualifier only if HCRX_EL2.FGTnXS is 0. We define e.g.
+ * FGT_TLBIVAE1 to use for the normal insn, and FGT_TLBIVAE1NXS to use
+ * for the nXS qualified insn.
+ */
+#define DO_TLBINXS_BIT(REG, BITNAME) \
+ FGT_##BITNAME = FGT_##REG | R_##REG##_EL2_##BITNAME##_SHIFT, \
+ FGT_##BITNAME##NXS = FGT_##BITNAME | R_FGT_NXS_MASK
+
typedef enum FGTBit {
/*
* These bits tell us which register arrays to use:
@@ -772,36 +796,36 @@ typedef enum FGTBit {
DO_BIT(HFGITR, ATS1E0W),
DO_BIT(HFGITR, ATS1E1RP),
DO_BIT(HFGITR, ATS1E1WP),
- DO_BIT(HFGITR, TLBIVMALLE1OS),
- DO_BIT(HFGITR, TLBIVAE1OS),
- DO_BIT(HFGITR, TLBIASIDE1OS),
- DO_BIT(HFGITR, TLBIVAAE1OS),
- DO_BIT(HFGITR, TLBIVALE1OS),
- DO_BIT(HFGITR, TLBIVAALE1OS),
- DO_BIT(HFGITR, TLBIRVAE1OS),
- DO_BIT(HFGITR, TLBIRVAAE1OS),
- DO_BIT(HFGITR, TLBIRVALE1OS),
- DO_BIT(HFGITR, TLBIRVAALE1OS),
- DO_BIT(HFGITR, TLBIVMALLE1IS),
- DO_BIT(HFGITR, TLBIVAE1IS),
- DO_BIT(HFGITR, TLBIASIDE1IS),
- DO_BIT(HFGITR, TLBIVAAE1IS),
- DO_BIT(HFGITR, TLBIVALE1IS),
- DO_BIT(HFGITR, TLBIVAALE1IS),
- DO_BIT(HFGITR, TLBIRVAE1IS),
- DO_BIT(HFGITR, TLBIRVAAE1IS),
- DO_BIT(HFGITR, TLBIRVALE1IS),
- DO_BIT(HFGITR, TLBIRVAALE1IS),
- DO_BIT(HFGITR, TLBIRVAE1),
- DO_BIT(HFGITR, TLBIRVAAE1),
- DO_BIT(HFGITR, TLBIRVALE1),
- DO_BIT(HFGITR, TLBIRVAALE1),
- DO_BIT(HFGITR, TLBIVMALLE1),
- DO_BIT(HFGITR, TLBIVAE1),
- DO_BIT(HFGITR, TLBIASIDE1),
- DO_BIT(HFGITR, TLBIVAAE1),
- DO_BIT(HFGITR, TLBIVALE1),
- DO_BIT(HFGITR, TLBIVAALE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIASIDE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAAE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVALE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAALE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVALE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1OS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIASIDE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAAE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVALE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAALE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVALE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1IS),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAAE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVALE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIRVAALE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVMALLE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIASIDE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAAE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVALE1),
+ DO_TLBINXS_BIT(HFGITR, TLBIVAALE1),
DO_BIT(HFGITR, CFPRCTX),
DO_BIT(HFGITR, DVPRCTX),
DO_BIT(HFGITR, CPPRCTX),
@@ -1134,4 +1158,32 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri)
return ri->opc1 == 4 || ri->opc1 == 5;
}
+/* Macros for accessing a specified CP register bank */
+#define A32_BANKED_REG_GET(_env, _regname, _secure) \
+ ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns)
+
+#define A32_BANKED_REG_SET(_env, _regname, _secure, _val) \
+ do { \
+ if (_secure) { \
+ (_env)->cp15._regname##_s = (_val); \
+ } else { \
+ (_env)->cp15._regname##_ns = (_val); \
+ } \
+ } while (0)
+
+/*
+ * Macros for automatically accessing a specific CP register bank depending on
+ * the current secure state of the system. These macros are not intended for
+ * supporting instruction translation reads/writes as these are dependent
+ * solely on the SCR.NS bit and not the mode.
+ */
+#define A32_BANKED_CURRENT_REG_GET(_env, _regname) \
+ A32_BANKED_REG_GET((_env), _regname, \
+ (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
+
+#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val) \
+ A32_BANKED_REG_SET((_env), _regname, \
+ (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
+ (_val))
+
#endif /* TARGET_ARM_CPREGS_H */
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index c59ca10..5d8adfb 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -21,6 +21,9 @@
#define TARGET_ARM_FEATURES_H
#include "hw/registerfields.h"
+#include "qemu/host-utils.h"
+#include "cpu.h"
+#include "cpu-sysregs.h"
/*
* Naming convention for isar_feature functions:
@@ -43,103 +46,103 @@
*/
static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) != 0;
}
static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1;
+ return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) > 1;
}
static inline bool isar_feature_aa32_lob(const ARMISARegisters *id)
{
/* (M-profile) low-overhead loops and branch future */
- return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3;
+ return FIELD_EX32_IDREG(id, ID_ISAR0, CMPBRANCH) >= 3;
}
static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR1, JAZELLE) != 0;
}
static inline bool isar_feature_aa32_aes(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, AES) != 0;
}
static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, AES) > 1;
}
static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, SHA1) != 0;
}
static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, SHA2) != 0;
}
static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, CRC32) != 0;
}
static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, RDM) != 0;
}
static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR5, VCMA) != 0;
}
static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, JSCVT) != 0;
}
static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, DP) != 0;
}
static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, FHM) != 0;
}
static inline bool isar_feature_aa32_sb(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, SB) != 0;
}
static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, SPECRES) != 0;
}
static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, BF16) != 0;
}
static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0;
+ return FIELD_EX32_IDREG(id, ID_ISAR6, I8MM) != 0;
}
static inline bool isar_feature_aa32_ras(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0;
+ return FIELD_EX32_IDREG(id, ID_PFR0, RAS) != 0;
}
static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0;
+ return FIELD_EX32_IDREG(id, ID_PFR1, MPROGMOD) != 0;
}
static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id)
@@ -148,7 +151,7 @@ static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id)
* Return true if M-profile state handling insns
* (VSCCLRM, CLRM, FPCTX access insns) are implemented
*/
- return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3;
+ return FIELD_EX32_IDREG(id, ID_PFR1, SECURITY) >= 3;
}
static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
@@ -281,88 +284,88 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id)
static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4;
+ return FIELD_EX32_IDREG(id, ID_MMFR0, VMSA) >= 4;
}
static inline bool isar_feature_aa32_pan(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0;
+ return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) != 0;
}
static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2;
+ return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) >= 2;
}
static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id)
{
/* 0xf means "non-standard IMPDEF PMU" */
- return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 &&
- FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+ return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 4 &&
+ FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf;
}
static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id)
{
/* 0xf means "non-standard IMPDEF PMU" */
- return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 &&
- FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+ return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 5 &&
+ FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf;
}
static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id)
{
/* 0xf means "non-standard IMPDEF PMU" */
- return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 &&
- FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+ return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 6 &&
+ FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf;
}
static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, HPDS) != 0;
}
static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, AC2) != 0;
}
static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, CCIDX) != 0;
}
static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, XNX) != 0;
}
static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 1;
}
static inline bool isar_feature_aa32_evt(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2;
+ return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 2;
}
static inline bool isar_feature_aa32_dit(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0;
+ return FIELD_EX32_IDREG(id, ID_PFR0, DIT) != 0;
}
static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0;
+ return FIELD_EX32_IDREG(id, ID_PFR2, SSBS) != 0;
}
static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5;
+ return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 5;
}
static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id)
{
- return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8;
+ return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 8;
}
static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id)
@@ -375,102 +378,107 @@ static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id)
*/
static inline bool isar_feature_aa64_aes(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) != 0;
}
static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) > 1;
}
static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA1) != 0;
}
static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) != 0;
}
static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) > 1;
}
static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, CRC32) != 0;
}
static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) != 0;
}
static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0;
}
static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA3) != 0;
}
static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM3) != 0;
}
static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM4) != 0;
}
static inline bool isar_feature_aa64_dp(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, DP) != 0;
}
static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, FHM) != 0;
}
static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) != 0;
}
static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) >= 2;
}
static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RNDR) != 0;
}
static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) == 2;
}
static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) != 0;
}
static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, JSCVT) != 0;
}
static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FCMA) != 0;
+}
+
+static inline bool isar_feature_aa64_xs(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, XS) != 0;
}
/*
@@ -494,9 +502,9 @@ isar_feature_pauth_feature(const ARMISARegisters *id)
* Architecturally, only one of {APA,API,APA3} may be active (non-zero)
* and the other two must be zero. Thus we may avoid conditionals.
*/
- return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) |
- FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) |
- FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3));
+ return (FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) |
+ FIELD_EX64_IDREG(id, ID_AA64ISAR1, API) |
+ FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3));
}
static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id)
@@ -514,7 +522,7 @@ static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id)
* Return true if pauth is enabled with the architected QARMA5 algorithm.
* QEMU will always enable or disable both APA and GPA.
*/
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) != 0;
}
static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id)
@@ -523,134 +531,144 @@ static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id)
* Return true if pauth is enabled with the architected QARMA3 algorithm.
* QEMU will always enable or disable both APA3 and GPA3.
*/
- return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3) != 0;
}
static inline bool isar_feature_aa64_sb(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SB) != 0;
}
static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SPECRES) != 0;
}
static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FRINTTS) != 0;
}
static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) != 0;
}
static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) >= 2;
}
static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) != 0;
+}
+
+static inline bool isar_feature_aa64_ebf16(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) > 1;
}
static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) != 0;
}
static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) >= 2;
}
static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR1, I8MM) != 0;
}
static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR2, WFXT) >= 2;
}
static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR2, BC) != 0;
}
static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR2, MOPS);
+}
+
+static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES);
}
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
{
/* We always set the AdvSIMD and FP fields identically. */
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) != 0xf;
}
static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
{
/* We always set the AdvSIMD and FP fields identically wrt FP16. */
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) == 1;
}
static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL0) >= 2;
}
static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL1) >= 2;
}
static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL2) >= 2;
}
static inline bool isar_feature_aa64_ras(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) != 0;
}
static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) >= 2;
}
static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, SVE) != 0;
}
static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, SEL2) != 0;
}
static inline bool isar_feature_aa64_rme(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) != 0;
}
static inline bool isar_feature_aa64_dit(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR0, DIT) != 0;
}
static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id)
{
- int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2);
+ int key = FIELD_EX64_IDREG(id, ID_AA64PFR0, CSV2);
if (key >= 2) {
return true; /* FEAT_CSV2_2 */
}
if (key == 1) {
- key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC);
+ key = FIELD_EX64_IDREG(id, ID_AA64PFR1, CSV2_FRAC);
return key >= 2; /* FEAT_CSV2_1p2 */
}
return false;
@@ -658,310 +676,320 @@ static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id)
static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, SSBS) != 0;
}
static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, BT) != 0;
}
static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) != 0;
}
static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 2;
}
static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 3;
}
static inline bool isar_feature_aa64_sme(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, SME) != 0;
}
static inline bool isar_feature_aa64_nmi(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, NMI) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64PFR1, NMI) != 0;
}
static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
{
- return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1;
+ return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 1;
}
static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id)
{
- unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2);
+ unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2);
return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id));
}
static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 2;
}
static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id)
{
- unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2);
+ unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2);
return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id));
}
static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id)
{
- return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0;
+ return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 0;
}
static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 1;
}
static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id)
{
- return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0;
+ return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN64) >= 0;
}
static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id)
{
- unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2);
+ unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2);
return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id));
}
static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id)
{
- unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2);
+ unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2);
return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id));
}
static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id)
{
- unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2);
+ unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN64_2);
return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id));
}
static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR0, FGT) != 0;
}
static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 0;
}
static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 1;
}
static inline bool isar_feature_aa64_vh(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, VH) != 0;
}
static inline bool isar_feature_aa64_lor(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, LO) != 0;
}
static inline bool isar_feature_aa64_pan(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) != 0;
}
static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 2;
}
static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 3;
}
static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HCX) != 0;
+}
+
+static inline bool isar_feature_aa64_afp(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, AFP) != 0;
}
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, TIDCP1) != 0;
+}
+
+static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id)
+{
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, CMOW) != 0;
}
static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) != 0;
}
static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) >= 2;
}
static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR1, XNX) != 0;
}
static inline bool isar_feature_aa64_uao(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, UAO) != 0;
}
static inline bool isar_feature_aa64_st(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, ST) != 0;
}
static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, AT) != 0;
}
static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, FWB) != 0;
}
static inline bool isar_feature_aa64_ids(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, IDS) != 0;
}
static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 1;
}
static inline bool isar_feature_aa64_evt(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 2;
}
static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, CCIDX) != 0;
}
static inline bool isar_feature_aa64_lva(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, VARANGE) != 0;
}
static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, E0PD) != 0;
}
static inline bool isar_feature_aa64_nv(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) != 0;
}
static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) >= 2;
}
static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
- FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+ return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 4 &&
+ FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf;
}
static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 &&
- FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+ return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 5 &&
+ FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf;
}
static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 &&
- FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+ return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 6 &&
+ FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf;
}
static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8;
+ return FIELD_EX64_IDREG(id, ID_AA64DFR0, DEBUGVER) >= 8;
}
static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id)
{
- return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0;
+ return FIELD_SEX64_IDREG(id, ID_AA64DFR0, DOUBLELOCK) >= 0;
}
static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) != 0;
}
static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) != 0;
}
static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) >= 2;
}
static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BITPERM) != 0;
}
static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BFLOAT16) != 0;
}
static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SHA3) != 0;
}
static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SM4) != 0;
}
static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, I8MM) != 0;
}
static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F32MM) != 0;
}
static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0;
+ return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F64MM) != 0;
}
static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64);
+ return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F64F64);
}
static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf;
+ return FIELD_EX64_IDREG(id, ID_AA64SMFR0, I16I64) == 0xf;
}
static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id)
{
- return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64);
+ return FIELD_EX64_IDREG(id, ID_AA64SMFR0, FA64);
}
/*
@@ -1022,6 +1050,55 @@ static inline bool isar_feature_any_evt(const ARMISARegisters *id)
return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
}
+typedef enum {
+ CCSIDR_FORMAT_LEGACY,
+ CCSIDR_FORMAT_CCIDX,
+} CCSIDRFormat;
+
+static inline uint64_t make_ccsidr(CCSIDRFormat format, unsigned assoc,
+ unsigned linesize, unsigned cachesize,
+ uint8_t flags)
+{
+ unsigned lg_linesize = ctz32(linesize);
+ unsigned sets;
+ uint64_t ccsidr = 0;
+
+ assert(assoc != 0);
+ assert(is_power_of_2(linesize));
+ assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
+
+ /* sets * associativity * linesize == cachesize. */
+ sets = cachesize / (assoc * linesize);
+ assert(cachesize % (assoc * linesize) == 0);
+
+ if (format == CCSIDR_FORMAT_LEGACY) {
+ /*
+ * The 32-bit CCSIDR format is:
+ * [27:13] number of sets - 1
+ * [12:3] associativity - 1
+ * [2:0] log2(linesize) - 4
+ * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
+ */
+ ccsidr = deposit32(ccsidr, 28, 4, flags);
+ ccsidr = deposit32(ccsidr, 13, 15, sets - 1);
+ ccsidr = deposit32(ccsidr, 3, 10, assoc - 1);
+ ccsidr = deposit32(ccsidr, 0, 3, lg_linesize - 4);
+ } else {
+ /*
+ * The 64-bit CCSIDR_EL1 format is:
+ * [55:32] number of sets - 1
+ * [23:3] associativity - 1
+ * [2:0] log2(linesize) - 4
+ * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
+ */
+ ccsidr = deposit64(ccsidr, 32, 24, sets - 1);
+ ccsidr = deposit64(ccsidr, 3, 21, assoc - 1);
+ ccsidr = deposit64(ccsidr, 0, 3, lg_linesize - 4);
+ }
+
+ return ccsidr;
+}
+
/*
* Forward to the above feature tests given an ARMCPU pointer.
*/
diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
index 2d5f3aa..8b46c7c 100644
--- a/target/arm/cpu-param.h
+++ b/target/arm/cpu-param.h
@@ -2,28 +2,24 @@
* ARM cpu parameters for qemu.
*
* Copyright (c) 2003 Fabrice Bellard
- * SPDX-License-Identifier: LGPL-2.0+
+ * SPDX-License-Identifier: LGPL-2.0-or-later
*/
#ifndef ARM_CPU_PARAM_H
#define ARM_CPU_PARAM_H
#ifdef TARGET_AARCH64
-# define TARGET_LONG_BITS 64
# define TARGET_PHYS_ADDR_SPACE_BITS 52
# define TARGET_VIRT_ADDR_SPACE_BITS 52
#else
-# define TARGET_LONG_BITS 32
# define TARGET_PHYS_ADDR_SPACE_BITS 40
# define TARGET_VIRT_ADDR_SPACE_BITS 32
#endif
#ifdef CONFIG_USER_ONLY
-# ifdef TARGET_AARCH64
-# define TARGET_TAGGED_ADDRESSES
+# if defined(TARGET_AARCH64) && defined(CONFIG_LINUX)
/* Allow user-only to vary page size from 4k */
# define TARGET_PAGE_BITS_VARY
-# define TARGET_PAGE_BITS_MIN 12
# else
# define TARGET_PAGE_BITS 12
# endif
@@ -33,10 +29,14 @@
* have to support 1K tiny pages.
*/
# define TARGET_PAGE_BITS_VARY
-# define TARGET_PAGE_BITS_MIN 10
+# define TARGET_PAGE_BITS_LEGACY 10
#endif /* !CONFIG_USER_ONLY */
-/* ARM processors have a weak memory model */
-#define TCG_GUEST_DEFAULT_MO (0)
+/*
+ * ARM-specific extra insn start words:
+ * 1: Conditional execution bits
+ * 2: Partial exception syndrome for data aborts
+ */
+#define TARGET_INSN_START_EXTRA_WORDS 2
#endif
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index b497667..2fcb0e1 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -28,11 +28,6 @@ OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU)
#define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU
-#define TYPE_AARCH64_CPU "aarch64-cpu"
-typedef struct AArch64CPUClass AArch64CPUClass;
-DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
- TYPE_AARCH64_CPU)
-
#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
diff --git a/target/arm/cpu-sysregs.h b/target/arm/cpu-sysregs.h
new file mode 100644
index 0000000..7877a3b
--- /dev/null
+++ b/target/arm/cpu-sysregs.h
@@ -0,0 +1,42 @@
+/*
+ * Definitions for Arm ID system registers
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ARM_CPU_SYSREGS_H
+#define ARM_CPU_SYSREGS_H
+
+/*
+ * Following is similar to the coprocessor regs encodings, but with an argument
+ * ordering that matches the ARM ARM. We also reuse the various CP_REG_ defines
+ * that actually are the same as the equivalent KVM_REG_ values.
+ */
+#define ENCODE_ID_REG(op0, op1, crn, crm, op2) \
+ (((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \
+ ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \
+ ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \
+ ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \
+ ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
+#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) NAME##_IDX,
+
+typedef enum ARMIDRegisterIdx {
+#include "cpu-sysregs.h.inc"
+ NUM_ID_IDX,
+} ARMIDRegisterIdx;
+
+#undef DEF
+#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \
+ SYS_##NAME = ENCODE_ID_REG(OP0, OP1, CRN, CRM, OP2),
+
+typedef enum ARMSysRegs {
+#include "cpu-sysregs.h.inc"
+} ARMSysRegs;
+
+#undef DEF
+
+extern const uint32_t id_register_sysreg[NUM_ID_IDX];
+
+int get_sysreg_idx(ARMSysRegs sysreg);
+
+#endif /* ARM_CPU_SYSREGS_H */
diff --git a/target/arm/cpu-sysregs.h.inc b/target/arm/cpu-sysregs.h.inc
new file mode 100644
index 0000000..cb99286
--- /dev/null
+++ b/target/arm/cpu-sysregs.h.inc
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+DEF(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0)
+DEF(ID_AA64PFR1_EL1, 3, 0, 0, 4, 1)
+DEF(ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5)
+DEF(ID_AA64DFR0_EL1, 3, 0, 0, 5, 0)
+DEF(ID_AA64DFR1_EL1, 3, 0, 0, 5, 1)
+DEF(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0)
+DEF(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1)
+DEF(ID_AA64ISAR2_EL1, 3, 0, 0, 6, 2)
+DEF(ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0)
+DEF(ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1)
+DEF(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2)
+DEF(ID_AA64MMFR3_EL1, 3, 0, 0, 7, 3)
+DEF(ID_PFR0_EL1, 3, 0, 0, 1, 0)
+DEF(ID_PFR1_EL1, 3, 0, 0, 1, 1)
+DEF(ID_DFR0_EL1, 3, 0, 0, 1, 2)
+DEF(ID_MMFR0_EL1, 3, 0, 0, 1, 4)
+DEF(ID_MMFR1_EL1, 3, 0, 0, 1, 5)
+DEF(ID_MMFR2_EL1, 3, 0, 0, 1, 6)
+DEF(ID_MMFR3_EL1, 3, 0, 0, 1, 7)
+DEF(ID_ISAR0_EL1, 3, 0, 0, 2, 0)
+DEF(ID_ISAR1_EL1, 3, 0, 0, 2, 1)
+DEF(ID_ISAR2_EL1, 3, 0, 0, 2, 2)
+DEF(ID_ISAR3_EL1, 3, 0, 0, 2, 3)
+DEF(ID_ISAR4_EL1, 3, 0, 0, 2, 4)
+DEF(ID_ISAR5_EL1, 3, 0, 0, 2, 5)
+DEF(ID_MMFR4_EL1, 3, 0, 0, 2, 6)
+DEF(ID_ISAR6_EL1, 3, 0, 0, 2, 7)
+DEF(MVFR0_EL1, 3, 0, 0, 3, 0)
+DEF(MVFR1_EL1, 3, 0, 0, 3, 1)
+DEF(MVFR2_EL1, 3, 0, 0, 3, 2)
+DEF(ID_PFR2_EL1, 3, 0, 0, 3, 4)
+DEF(ID_DFR1_EL1, 3, 0, 0, 3, 5)
+DEF(ID_MMFR5_EL1, 3, 0, 0, 3, 6)
+DEF(ID_AA64ZFR0_EL1, 3, 0, 0, 4, 4)
+DEF(CTR_EL0, 3, 3, 0, 0, 1)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 35fa281..a59a5b5 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -23,16 +23,18 @@
#include "qemu/timer.h"
#include "qemu/log.h"
#include "exec/page-vary.h"
+#include "exec/tswap.h"
#include "target/arm/idau.h"
#include "qemu/module.h"
#include "qapi/error.h"
#include "cpu.h"
#ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "exec/translation-block.h"
+#include "accel/tcg/cpu-ops.h"
#endif /* CONFIG_TCG */
#include "internals.h"
#include "cpu-features.h"
-#include "exec/exec-all.h"
+#include "exec/target_page.h"
#include "hw/qdev-properties.h"
#if !defined(CONFIG_USER_ONLY)
#include "hw/loader.h"
@@ -41,9 +43,9 @@
#include "hw/intc/armv7m_nvic.h"
#endif /* CONFIG_TCG */
#endif /* !CONFIG_USER_ONLY */
-#include "sysemu/tcg.h"
-#include "sysemu/qtest.h"
-#include "sysemu/hw_accel.h"
+#include "system/tcg.h"
+#include "system/qtest.h"
+#include "system/hw_accel.h"
#include "kvm_arm.h"
#include "disas/capstone.h"
#include "fpu/softfloat.h"
@@ -120,8 +122,15 @@ void arm_restore_state_to_opc(CPUState *cs,
env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
}
}
+
+int arm_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+ return arm_env_mmu_index(cpu_env(cs));
+}
+
#endif /* CONFIG_TCG */
+#ifndef CONFIG_USER_ONLY
/*
* With SCTLR_ELx.NMI == 0, IRQ with Superpriority is masked identically with
* IRQ without Superpriority. Moreover, if the GIC is configured so that
@@ -140,11 +149,7 @@ static bool arm_cpu_has_work(CPUState *cs)
| CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR
| CPU_INTERRUPT_EXITTB);
}
-
-static int arm_cpu_mmu_index(CPUState *cs, bool ifetch)
-{
- return arm_env_mmu_index(cpu_env(cs));
-}
+#endif /* !CONFIG_USER_ONLY */
void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
void *opaque)
@@ -545,18 +550,21 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
env->sau.ctrl = 0;
}
- set_flush_to_zero(1, &env->vfp.standard_fp_status);
- set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
- set_default_nan_mode(1, &env->vfp.standard_fp_status);
- set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.fp_status);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.standard_fp_status);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.fp_status_f16);
- set_float_detect_tininess(float_tininess_before_rounding,
- &env->vfp.standard_fp_status_f16);
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]);
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
+
#ifndef CONFIG_USER_ONLY
if (kvm_enabled()) {
kvm_arm_reset_vcpu(cpu);
@@ -826,7 +834,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
- CPUClass *cc = CPU_GET_CLASS(cs);
CPUARMState *env = cpu_env(cs);
uint32_t cur_el = arm_current_el(env);
bool secure = arm_is_secure(env);
@@ -926,7 +933,7 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
found:
cs->exception_index = excp_idx;
env->exception.target_el = target_el;
- cc->tcg_ops->do_interrupt(cs);
+ cs->cc->tcg_ops->do_interrupt(cs);
return true;
}
@@ -1092,37 +1099,6 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level)
}
}
-static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level)
-{
-#ifdef CONFIG_KVM
- ARMCPU *cpu = opaque;
- CPUARMState *env = &cpu->env;
- CPUState *cs = CPU(cpu);
- uint32_t linestate_bit;
- int irq_id;
-
- switch (irq) {
- case ARM_CPU_IRQ:
- irq_id = KVM_ARM_IRQ_CPU_IRQ;
- linestate_bit = CPU_INTERRUPT_HARD;
- break;
- case ARM_CPU_FIQ:
- irq_id = KVM_ARM_IRQ_CPU_FIQ;
- linestate_bit = CPU_INTERRUPT_FIQ;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (level) {
- env->irq_line_state |= linestate_bit;
- } else {
- env->irq_line_state &= ~linestate_bit;
- }
- kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level);
-#endif
-}
-
static bool arm_cpu_virtio_is_big_endian(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -1133,7 +1109,7 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs)
}
#ifdef CONFIG_TCG
-static bool arm_cpu_exec_halt(CPUState *cs)
+bool arm_cpu_exec_halt(CPUState *cs)
{
bool leave_halt = cpu_has_work(cs);
@@ -1167,7 +1143,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
{
ARMCPU *ac = ARM_CPU(cpu);
CPUARMState *env = &ac->env;
- bool sctlr_b;
+ bool sctlr_b = arm_sctlr_b(env);
if (is_a64(env)) {
info->cap_arch = CS_ARCH_ARM64;
@@ -1194,13 +1170,9 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
info->cap_mode = cap_mode;
}
- sctlr_b = arm_sctlr_b(env);
+ info->endian = BFD_ENDIAN_LITTLE;
if (bswap_code(sctlr_b)) {
-#if TARGET_BIG_ENDIAN
- info->endian = BFD_ENDIAN_LITTLE;
-#else
- info->endian = BFD_ENDIAN_BIG;
-#endif
+ info->endian = target_big_endian() ? BFD_ENDIAN_LITTLE : BFD_ENDIAN_BIG;
}
info->flags &= ~INSN_ARM_BE32;
#ifndef CONFIG_USER_ONLY
@@ -1210,8 +1182,6 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
#endif
}
-#ifdef TARGET_AARCH64
-
static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -1369,15 +1339,6 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
}
}
-#else
-
-static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
- g_assert_not_reached();
-}
-
-#endif
-
static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -1539,39 +1500,40 @@ static void arm_cpu_initfn(Object *obj)
* 0 means "unset, use the default value". That default might vary depending
* on the CPU type, and is set in the realize fn.
*/
-static Property arm_cpu_gt_cntfrq_property =
+#ifndef CONFIG_USER_ONLY
+static const Property arm_cpu_gt_cntfrq_property =
DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0);
-static Property arm_cpu_reset_cbar_property =
+static const Property arm_cpu_reset_cbar_property =
DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0);
-static Property arm_cpu_reset_hivecs_property =
+static const Property arm_cpu_reset_hivecs_property =
DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false);
-#ifndef CONFIG_USER_ONLY
-static Property arm_cpu_has_el2_property =
+static const Property arm_cpu_has_el2_property =
DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true);
-static Property arm_cpu_has_el3_property =
+static const Property arm_cpu_has_el3_property =
DEFINE_PROP_BOOL("has_el3", ARMCPU, has_el3, true);
#endif
-static Property arm_cpu_cfgend_property =
+static const Property arm_cpu_cfgend_property =
DEFINE_PROP_BOOL("cfgend", ARMCPU, cfgend, false);
-static Property arm_cpu_has_vfp_property =
+static const Property arm_cpu_has_vfp_property =
DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true);
-static Property arm_cpu_has_vfp_d32_property =
+static const Property arm_cpu_has_vfp_d32_property =
DEFINE_PROP_BOOL("vfp-d32", ARMCPU, has_vfp_d32, true);
-static Property arm_cpu_has_neon_property =
+static const Property arm_cpu_has_neon_property =
DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true);
-static Property arm_cpu_has_dsp_property =
+static const Property arm_cpu_has_dsp_property =
DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true);
-static Property arm_cpu_has_mpu_property =
+#ifndef CONFIG_USER_ONLY
+static const Property arm_cpu_has_mpu_property =
DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true);
/* This is like DEFINE_PROP_UINT32 but it doesn't set the default value,
@@ -1579,10 +1541,11 @@ static Property arm_cpu_has_mpu_property =
* the right value for that particular CPU type, and we don't want
* to override that with an incorrect constant value.
*/
-static Property arm_cpu_pmsav7_dregion_property =
+static const Property arm_cpu_pmsav7_dregion_property =
DEFINE_PROP_UNSIGNED_NODEFAULT("pmsav7-dregion", ARMCPU,
pmsav7_dregion,
qdev_prop_uint32, uint32_t);
+#endif
static bool arm_get_pmu(Object *obj, Error **errp)
{
@@ -1607,6 +1570,35 @@ static void arm_set_pmu(Object *obj, bool value, Error **errp)
cpu->has_pmu = value;
}
+static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ return arm_feature(&cpu->env, ARM_FEATURE_AARCH64);
+}
+
+static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ /*
+ * At this time, this property is only allowed if KVM is enabled. This
+ * restriction allows us to avoid fixing up functionality that assumes a
+ * uniform execution state like do_interrupt.
+ */
+ if (value == false) {
+ if (!kvm_enabled() || !kvm_arm_aarch32_supported()) {
+ error_setg(errp, "'aarch64' feature cannot be disabled "
+ "unless KVM is enabled and 32-bit EL1 "
+ "is supported");
+ return;
+ }
+ unset_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ } else {
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ }
+}
+
unsigned int gt_cntfrq_period_ns(ARMCPU *cpu)
{
/*
@@ -1723,7 +1715,7 @@ static void arm_cpu_propagate_feature_implications(ARMCPU *cpu)
}
}
-void arm_cpu_post_init(Object *obj)
+static void arm_cpu_post_init(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1734,6 +1726,14 @@ void arm_cpu_post_init(Object *obj)
*/
arm_cpu_propagate_feature_implications(cpu);
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ object_property_add_bool(obj, "aarch64", aarch64_cpu_get_aarch64,
+ aarch64_cpu_set_aarch64);
+ object_property_set_description(obj, "aarch64",
+ "Set on/off to enable/disable aarch64 "
+ "execution state ");
+ }
+#ifndef CONFIG_USER_ONLY
if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) ||
arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) {
qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property);
@@ -1749,7 +1749,6 @@ void arm_cpu_post_init(Object *obj)
OBJ_PROP_FLAG_READWRITE);
}
-#ifndef CONFIG_USER_ONLY
if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) {
/* Add the has_el3 state CPU property only if EL3 is allowed. This will
* prevent "has_el3" from existing on CPUs which cannot support EL3.
@@ -1821,6 +1820,7 @@ void arm_cpu_post_init(Object *obj)
qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property);
}
+#ifndef CONFIG_USER_ONLY
if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) {
qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property);
if (arm_feature(&cpu->env, ARM_FEATURE_V7)) {
@@ -1857,8 +1857,6 @@ void arm_cpu_post_init(Object *obj)
&cpu->psci_conduit,
OBJ_PROP_FLAG_READWRITE);
- qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property);
-
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
}
@@ -1867,7 +1865,6 @@ void arm_cpu_post_init(Object *obj)
kvm_arm_add_vcpu_properties(cpu);
}
-#ifndef CONFIG_USER_ONLY
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) &&
cpu_isar_feature(aa64_mte, cpu)) {
object_property_add_link(obj, "tag-memory",
@@ -1885,6 +1882,7 @@ void arm_cpu_post_init(Object *obj)
}
}
#endif
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property);
}
static void arm_cpu_finalizefn(Object *obj)
@@ -1916,7 +1914,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
{
Error *local_err = NULL;
-#ifdef TARGET_AARCH64
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
arm_cpu_sve_finalize(cpu, &local_err);
if (local_err != NULL) {
@@ -1952,7 +1949,6 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
return;
}
}
-#endif
if (kvm_enabled()) {
kvm_arm_steal_time_finalize(cpu, &local_err);
@@ -1967,6 +1963,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
{
CPUState *cs = CPU(dev);
ARMCPU *cpu = ARM_CPU(dev);
+ ARMISARegisters *isar = &cpu->isar;
ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
CPUARMState *env = &cpu->env;
Error *local_err = NULL;
@@ -2069,6 +2066,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
arm_gt_stimer_cb, cpu);
cpu->gt_timer[GTIMER_HYPVIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
arm_gt_hvtimer_cb, cpu);
+ cpu->gt_timer[GTIMER_S_EL2_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_sel2timer_cb, cpu);
+ cpu->gt_timer[GTIMER_S_EL2_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_sel2vtimer_cb, cpu);
}
#endif
@@ -2120,21 +2121,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (!cpu->has_vfp) {
- uint64_t t;
uint32_t u;
- t = cpu->isar.id_aa64isar1;
- t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0);
- cpu->isar.id_aa64isar1 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64ISAR1, JSCVT, 0);
- t = cpu->isar.id_aa64pfr0;
- t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf);
- cpu->isar.id_aa64pfr0 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, FP, 0xf);
- u = cpu->isar.id_isar6;
+ u = GET_IDREG(isar, ID_ISAR6);
u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0);
u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
- cpu->isar.id_isar6 = u;
+ SET_IDREG(isar, ID_ISAR6, u);
u = cpu->isar.mvfr0;
u = FIELD_DP32(u, MVFR0, FPSP, 0);
@@ -2168,7 +2164,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
unset_feature(env, ARM_FEATURE_NEON);
- t = cpu->isar.id_aa64isar0;
+ t = GET_IDREG(isar, ID_AA64ISAR0);
t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0);
t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0);
t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0);
@@ -2176,32 +2172,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0);
t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0);
t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0);
- cpu->isar.id_aa64isar0 = t;
+ SET_IDREG(isar, ID_AA64ISAR0, t);
- t = cpu->isar.id_aa64isar1;
+ t = GET_IDREG(isar, ID_AA64ISAR1);
t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0);
t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0);
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0);
- cpu->isar.id_aa64isar1 = t;
+ SET_IDREG(isar, ID_AA64ISAR1, t);
- t = cpu->isar.id_aa64pfr0;
- t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf);
- cpu->isar.id_aa64pfr0 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, ADVSIMD, 0xf);
- u = cpu->isar.id_isar5;
+ u = GET_IDREG(isar, ID_ISAR5);
u = FIELD_DP32(u, ID_ISAR5, AES, 0);
u = FIELD_DP32(u, ID_ISAR5, SHA1, 0);
u = FIELD_DP32(u, ID_ISAR5, SHA2, 0);
u = FIELD_DP32(u, ID_ISAR5, RDM, 0);
u = FIELD_DP32(u, ID_ISAR5, VCMA, 0);
- cpu->isar.id_isar5 = u;
+ SET_IDREG(isar, ID_ISAR5, u);
- u = cpu->isar.id_isar6;
+ u = GET_IDREG(isar, ID_ISAR6);
u = FIELD_DP32(u, ID_ISAR6, DP, 0);
u = FIELD_DP32(u, ID_ISAR6, FHM, 0);
u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
u = FIELD_DP32(u, ID_ISAR6, I8MM, 0);
- cpu->isar.id_isar6 = u;
+ SET_IDREG(isar, ID_ISAR6, u);
if (!arm_feature(env, ARM_FEATURE_M)) {
u = cpu->isar.mvfr1;
@@ -2218,16 +2212,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (!cpu->has_neon && !cpu->has_vfp) {
- uint64_t t;
uint32_t u;
- t = cpu->isar.id_aa64isar0;
- t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0);
- cpu->isar.id_aa64isar0 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64ISAR0, FHM, 0);
- t = cpu->isar.id_aa64isar1;
- t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0);
- cpu->isar.id_aa64isar1 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64ISAR1, FRINTTS, 0);
u = cpu->isar.mvfr0;
u = FIELD_DP32(u, MVFR0, SIMDREG, 0);
@@ -2244,19 +2233,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
unset_feature(env, ARM_FEATURE_THUMB_DSP);
- u = cpu->isar.id_isar1;
- u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1);
- cpu->isar.id_isar1 = u;
+ FIELD_DP32_IDREG(isar, ID_ISAR1, EXTEND, 1);
- u = cpu->isar.id_isar2;
+ u = GET_IDREG(isar, ID_ISAR2);
u = FIELD_DP32(u, ID_ISAR2, MULTU, 1);
u = FIELD_DP32(u, ID_ISAR2, MULTS, 1);
- cpu->isar.id_isar2 = u;
+ SET_IDREG(isar, ID_ISAR2, u);
- u = cpu->isar.id_isar3;
+ u = GET_IDREG(isar, ID_ISAR3);
u = FIELD_DP32(u, ID_ISAR3, SIMD, 1);
u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0);
- cpu->isar.id_isar3 = u;
+ SET_IDREG(isar, ID_ISAR3, u);
}
@@ -2331,14 +2318,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* Disable the security extension feature bits in the processor
* feature registers as well.
*/
- cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0);
- cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0);
- cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
- ID_AA64PFR0, EL3, 0);
+ FIELD_DP32_IDREG(isar, ID_PFR1, SECURITY, 0);
+ FIELD_DP32_IDREG(isar, ID_DFR0, COPSDBG, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL3, 0);
/* Disable the realm management extension, which requires EL3. */
- cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
- ID_AA64PFR0, RME, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, RME, 0);
}
if (!cpu->has_el2) {
@@ -2361,9 +2346,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
cpu);
#endif
} else {
- cpu->isar.id_aa64dfr0 =
- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0);
- cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMUVER, 0);
+ FIELD_DP32_IDREG(isar, ID_DFR0, PERFMON, 0);
cpu->pmceid0 = 0;
cpu->pmceid1 = 0;
}
@@ -2373,10 +2357,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* Disable the hypervisor feature bits in the processor feature
* registers if we don't have EL2.
*/
- cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
- ID_AA64PFR0, EL2, 0);
- cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1,
- ID_PFR1, VIRTUALIZATION, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL2, 0);
+ FIELD_DP32_IDREG(isar, ID_PFR1, VIRTUALIZATION, 0);
}
if (cpu_isar_feature(aa64_mte, cpu)) {
@@ -2390,13 +2372,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
#ifndef CONFIG_USER_ONLY
/*
- * If we do not have tag-memory provided by the machine,
- * reduce MTE support to instructions enabled at EL0.
+ * If we run with TCG and do not have tag-memory provided by
+ * the machine, then reduce MTE support to instructions enabled at EL0.
* This matches Cortex-A710 BROADCASTMTE input being LOW.
*/
- if (cpu->tag_memory == NULL) {
- cpu->isar.id_aa64pfr1 =
- FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
+ if (tcg_enabled() && cpu->tag_memory == NULL) {
+ FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 1);
+ }
+
+ /*
+ * If MTE is supported by the host, however it should not be
+ * enabled on the guest (i.e mte=off), clear guest's MTE bits."
+ */
+ if (kvm_enabled() && !cpu->kvm_mte) {
+ FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 0);
}
#endif
}
@@ -2416,32 +2405,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* try to access the non-existent system registers for them.
*/
/* FEAT_SPE (Statistical Profiling Extension) */
- cpu->isar.id_aa64dfr0 =
- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMSVER, 0);
/* FEAT_TRBE (Trace Buffer Extension) */
- cpu->isar.id_aa64dfr0 =
- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEBUFFER, 0);
/* FEAT_TRF (Self-hosted Trace Extension) */
- cpu->isar.id_aa64dfr0 =
- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0);
- cpu->isar.id_dfr0 =
- FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEFILT, 0);
+ FIELD_DP32_IDREG(isar, ID_DFR0, TRACEFILT, 0);
/* Trace Macrocell system register access */
- cpu->isar.id_aa64dfr0 =
- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0);
- cpu->isar.id_dfr0 =
- FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEVER, 0);
+ FIELD_DP32_IDREG(isar, ID_DFR0, COPTRC, 0);
/* Memory mapped trace */
- cpu->isar.id_dfr0 =
- FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0);
+ FIELD_DP32_IDREG(isar, ID_DFR0, MMAPTRC, 0);
/* FEAT_AMU (Activity Monitors Extension) */
- cpu->isar.id_aa64pfr0 =
- FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0);
- cpu->isar.id_pfr0 =
- FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, AMU, 0);
+ FIELD_DP32_IDREG(isar, ID_PFR0, AMU, 0);
/* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */
- cpu->isar.id_aa64pfr0 =
- FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0);
+ FIELD_DP64_IDREG(isar, ID_AA64PFR0, MPAM, 0);
}
/* MPU can be configured out of a PMSA CPU either by setting has-mpu
@@ -2518,6 +2497,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
register_cp_regs_for_features(cpu);
arm_cpu_register_gdb_regs_for_features(cpu);
+ arm_cpu_register_gdb_commands(cpu);
init_cpreg_list(cpu);
@@ -2616,7 +2596,7 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
return oc;
}
-static Property arm_cpu_properties[] = {
+static const Property arm_cpu_properties[] = {
DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
@@ -2624,7 +2604,8 @@ static Property arm_cpu_properties[] = {
DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1),
/* True to default to the backward-compat old CNTFRQ rather than 1Ghz */
DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false),
- DEFINE_PROP_END_OF_LIST()
+ DEFINE_PROP_BOOL("backcompat-pauth-default-use-qarma5", ARMCPU,
+ backcompat_pauth_default_use_qarma5, false),
};
static const gchar *arm_gdb_arch_name(CPUState *cs)
@@ -2632,16 +2613,58 @@ static const gchar *arm_gdb_arch_name(CPUState *cs)
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
+ if (arm_gdbstub_is_aarch64(cpu)) {
+ return "aarch64";
+ }
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
return "iwmmxt";
}
return "arm";
}
-#ifndef CONFIG_USER_ONLY
+static const char *arm_gdb_get_core_xml_file(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ if (arm_gdbstub_is_aarch64(cpu)) {
+ return "aarch64-core.xml";
+ }
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return "arm-m-profile.xml";
+ }
+ return "arm-core.xml";
+}
+
+#ifdef CONFIG_USER_ONLY
+/**
+ * aarch64_untagged_addr:
+ *
+ * Remove any address tag from @x. This is explicitly related to the
+ * linux syscall TIF_TAGGED_ADDR setting, not TBI in general.
+ *
+ * There should be a better place to put this, but we need this in
+ * include/exec/cpu_ldst.h, and not some place linux-user specific.
+ *
+ * Note that arm-*-user will never set tagged_addr_enable.
+ */
+static vaddr aarch64_untagged_addr(CPUState *cs, vaddr x)
+{
+ CPUARMState *env = cpu_env(cs);
+ if (env->tagged_addr_enable) {
+ /*
+ * TBI is enabled for userspace but not kernelspace addresses.
+ * Only clear the tag if bit 55 is clear.
+ */
+ x &= sextract64(x, 0, 56);
+ }
+ return x;
+}
+#else
#include "hw/core/sysemu-cpu-ops.h"
static const struct SysemuCPUOps arm_sysemu_ops = {
+ .has_work = arm_cpu_has_work,
.get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug,
.asidx_from_attrs = arm_asidx_from_attrs,
.write_elf32_note = arm_cpu_write_elf32_note,
@@ -2652,19 +2675,52 @@ static const struct SysemuCPUOps arm_sysemu_ops = {
#endif
#ifdef CONFIG_TCG
+#ifndef CONFIG_USER_ONLY
+static vaddr aprofile_pointer_wrap(CPUState *cs, int mmu_idx,
+ vaddr result, vaddr base)
+{
+ /*
+ * The Stage2 and Phys indexes are only used for ptw on arm32,
+ * and all pte's are aligned, so we never produce a wrap for these.
+ * Double check that we're not truncating a 40-bit physical address.
+ */
+ assert((unsigned)mmu_idx < (ARMMMUIdx_Stage2_S & ARM_MMU_IDX_COREIDX_MASK));
+
+ if (!is_a64(cpu_env(cs))) {
+ return (uint32_t)result;
+ }
+
+ /*
+ * TODO: For FEAT_CPA2, decide how to we want to resolve
+ * Unpredictable_CPACHECK in AddressIncrement.
+ */
+ return result;
+}
+#endif /* !CONFIG_USER_ONLY */
+
static const TCGCPUOps arm_tcg_ops = {
+ .mttcg_supported = true,
+ /* ARM processors have a weak memory model */
+ .guest_default_memory_order = 0,
+
.initialize = arm_translate_init,
+ .translate_code = arm_translate_code,
+ .get_tb_cpu_state = arm_get_tb_cpu_state,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
.debug_excp_handler = arm_debug_excp_handler,
.restore_state_to_opc = arm_restore_state_to_opc,
+ .mmu_index = arm_cpu_mmu_index,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = arm_cpu_record_sigsegv,
.record_sigbus = arm_cpu_record_sigbus,
+ .untagged_addr = aarch64_untagged_addr,
#else
- .tlb_fill = arm_cpu_tlb_fill,
+ .tlb_fill_align = arm_cpu_tlb_fill_align,
+ .pointer_wrap = aprofile_pointer_wrap,
.cpu_exec_interrupt = arm_cpu_exec_interrupt,
.cpu_exec_halt = arm_cpu_exec_halt,
+ .cpu_exec_reset = cpu_reset,
.do_interrupt = arm_cpu_do_interrupt,
.do_transaction_failed = arm_cpu_do_transaction_failed,
.do_unaligned_access = arm_cpu_do_unaligned_access,
@@ -2675,7 +2731,7 @@ static const TCGCPUOps arm_tcg_ops = {
};
#endif /* CONFIG_TCG */
-static void arm_cpu_class_init(ObjectClass *oc, void *data)
+static void arm_cpu_class_init(ObjectClass *oc, const void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
CPUClass *cc = CPU_CLASS(acc);
@@ -2691,8 +2747,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
&acc->parent_phases);
cc->class_by_name = arm_cpu_class_by_name;
- cc->has_work = arm_cpu_has_work;
- cc->mmu_index = arm_cpu_mmu_index;
cc->dump_state = arm_cpu_dump_state;
cc->set_pc = arm_cpu_set_pc;
cc->get_pc = arm_cpu_get_pc;
@@ -2702,6 +2756,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
cc->sysemu_ops = &arm_sysemu_ops;
#endif
cc->gdb_arch_name = arm_gdb_arch_name;
+ cc->gdb_get_core_xml_file = arm_gdb_get_core_xml_file;
cc->gdb_stop_before_watchpoint = true;
cc->disas_set_info = arm_disas_set_info;
@@ -2718,13 +2773,15 @@ static void arm_cpu_instance_init(Object *obj)
arm_cpu_post_init(obj);
}
-static void cpu_register_class_init(ObjectClass *oc, void *data)
+static void cpu_register_class_init(ObjectClass *oc, const void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
CPUClass *cc = CPU_CLASS(acc);
acc->info = data;
- cc->gdb_core_xml_file = "arm-core.xml";
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void arm_cpu_register(const ARMCPUInfo *info)
@@ -2733,11 +2790,11 @@ void arm_cpu_register(const ARMCPUInfo *info)
.parent = TYPE_ARM_CPU,
.instance_init = arm_cpu_instance_init,
.class_init = info->class_init ?: cpu_register_class_init,
- .class_data = (void *)info,
+ .class_data = info,
};
type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name);
- type_register(&type_info);
+ type_register_static(&type_info);
g_free((void *)type_info.name);
}
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 3841359..0338153 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -24,16 +24,15 @@
#include "qemu/cpu-float.h"
#include "hw/registerfields.h"
#include "cpu-qom.h"
+#include "exec/cpu-common.h"
#include "exec/cpu-defs.h"
+#include "exec/cpu-interrupt.h"
#include "exec/gdbstub.h"
#include "exec/page-protection.h"
#include "qapi/qapi-types-common.h"
#include "target/arm/multiprocessing.h"
#include "target/arm/gtimer.h"
-
-#ifdef TARGET_AARCH64
-#define KVM_HAVE_MCE_INJECTION 1
-#endif
+#include "target/arm/cpu-sysregs.h"
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
@@ -62,6 +61,7 @@
#define EXCP_NMI 26
#define EXCP_VINMI 27
#define EXCP_VFNMI 28
+#define EXCP_MON_TRAP 29 /* AArch32 trap to Monitor mode */
/* NB: add new EXCP_ defines to the array in arm_log_exception() too */
#define ARMV7M_EXCP_RESET 1
@@ -99,12 +99,6 @@
#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
#endif
-/* ARM-specific extra insn start words:
- * 1: Conditional execution bits
- * 2: Partial exception syndrome for data aborts
- */
-#define TARGET_INSN_START_EXTRA_WORDS 2
-
/* The 2nd extra word holding syndrome info for data aborts does not use
* the upper 6 bits nor the lower 13 bits. We mask and shift it down to
* help the sleb128 encoder do a better job.
@@ -170,17 +164,12 @@ typedef struct ARMGenericTimer {
* Align the data for use with TCG host vector operations.
*/
-#ifdef TARGET_AARCH64
-# define ARM_MAX_VQ 16
-#else
-# define ARM_MAX_VQ 1
-#endif
+#define ARM_MAX_VQ 16
typedef struct ARMVectorReg {
uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
} ARMVectorReg;
-#ifdef TARGET_AARCH64
/* In AArch32 mode, predicate registers do not exist at all. */
typedef struct ARMPredicateReg {
uint64_t p[DIV_ROUND_UP(2 * ARM_MAX_VQ, 8)] QEMU_ALIGNED(16);
@@ -190,18 +179,72 @@ typedef struct ARMPredicateReg {
typedef struct ARMPACKey {
uint64_t lo, hi;
} ARMPACKey;
-#endif
/* See the commentary above the TBFLAG field definitions. */
typedef struct CPUARMTBFlags {
uint32_t flags;
- target_ulong flags2;
+ uint64_t flags2;
} CPUARMTBFlags;
typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
typedef struct NVICState NVICState;
+/*
+ * Enum for indexing vfp.fp_status[].
+ *
+ * FPST_A32: is the "normal" fp status for AArch32 insns
+ * FPST_A64: is the "normal" fp status for AArch64 insns
+ * FPST_A32_F16: used for AArch32 half-precision calculations
+ * FPST_A64_F16: used for AArch64 half-precision calculations
+ * FPST_STD: the ARM "Standard FPSCR Value"
+ * FPST_STD_F16: used for half-precision
+ * calculations with the ARM "Standard FPSCR Value"
+ * FPST_AH: used for the A64 insns which change behaviour
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
+ * and the reciprocal and square root estimate/step insns)
+ * FPST_AH_F16: used for the A64 insns which change behaviour
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
+ * and the reciprocal and square root estimate/step insns);
+ * for half-precision
+ *
+ * Half-precision operations are governed by a separate
+ * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
+ * status structure to control this.
+ *
+ * The "Standard FPSCR", ie default-NaN, flush-to-zero,
+ * round-to-nearest and is used by any operations (generally
+ * Neon) which the architecture defines as controlled by the
+ * standard FPSCR value rather than the FPSCR.
+ *
+ * The "standard FPSCR but for fp16 ops" is needed because
+ * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
+ * using a fixed value for it.
+ *
+ * FPST_AH is needed because some insns have different
+ * behaviour when FPCR.AH == 1: they don't update cumulative
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
+ * which means we need an FPST_AH_F16 as well.
+ *
+ * To avoid having to transfer exception bits around, we simply
+ * say that the FPSCR cumulative exception flags are the logical
+ * OR of the flags in the four fp statuses. This relies on the
+ * only thing which needs to read the exception flags being
+ * an explicit FPSCR read.
+ */
+typedef enum ARMFPStatusFlavour {
+ FPST_A32,
+ FPST_A64,
+ FPST_A32_F16,
+ FPST_A64_F16,
+ FPST_AH,
+ FPST_AH_F16,
+ FPST_STD,
+ FPST_STD_F16,
+} ARMFPStatusFlavour;
+#define FPST_COUNT 8
+
typedef struct CPUArchState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -606,55 +649,31 @@ typedef struct CPUArchState {
struct {
ARMVectorReg zregs[32];
-#ifdef TARGET_AARCH64
/* Store FFR as pregs[16] to make it easier to treat as any other. */
#define FFR_PRED_NUM 16
ARMPredicateReg pregs[17];
/* Scratch space for aa64 sve predicate temporary. */
ARMPredicateReg preg_tmp;
-#endif
/* We store these fpcsr fields separately for convenience. */
uint32_t qc[4] QEMU_ALIGNED(16);
int vec_len;
int vec_stride;
+ /*
+ * Floating point status and control registers. Some bits are
+ * stored separately in other fields or in the float_status below.
+ */
+ uint64_t fpsr;
+ uint64_t fpcr;
+
uint32_t xregs[16];
/* Scratch space for aa32 neon expansion. */
uint32_t scratch[8];
- /* There are a number of distinct float control structures:
- *
- * fp_status: is the "normal" fp status.
- * fp_status_fp16: used for half-precision calculations
- * standard_fp_status : the ARM "Standard FPSCR Value"
- * standard_fp_status_fp16 : used for half-precision
- * calculations with the ARM "Standard FPSCR Value"
- *
- * Half-precision operations are governed by a separate
- * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
- * status structure to control this.
- *
- * The "Standard FPSCR", ie default-NaN, flush-to-zero,
- * round-to-nearest and is used by any operations (generally
- * Neon) which the architecture defines as controlled by the
- * standard FPSCR value rather than the FPSCR.
- *
- * The "standard FPSCR but for fp16 ops" is needed because
- * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
- * using a fixed value for it.
- *
- * To avoid having to transfer exception bits around, we simply
- * say that the FPSCR cumulative exception flags are the logical
- * OR of the flags in the four fp statuses. This relies on the
- * only thing which needs to read the exception flags being
- * an explicit FPSCR read.
- */
- float_status fp_status;
- float_status fp_status_f16;
- float_status standard_fp_status;
- float_status standard_fp_status_f16;
+ /* There are a number of distinct float control structures. */
+ float_status fp_status[FPST_COUNT];
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
@@ -679,7 +698,6 @@ typedef struct CPUArchState {
uint32_t cregs[16];
} iwmmxt;
-#ifdef TARGET_AARCH64
struct {
ARMPACKey apia;
ARMPACKey apib;
@@ -711,7 +729,6 @@ typedef struct CPUArchState {
* to keep the offsets into the rest of the structure smaller.
*/
ARMVectorReg zarray[ARM_MAX_VQ * 16];
-#endif
struct CPUBreakpoint *cpu_breakpoint[16];
struct CPUWatchpoint *cpu_watchpoint[16];
@@ -767,12 +784,9 @@ typedef struct CPUArchState {
#else /* CONFIG_USER_ONLY */
/* For usermode syscall translation. */
bool eabi;
-#endif /* CONFIG_USER_ONLY */
-
-#ifdef TARGET_TAGGED_ADDRESSES
/* Linux syscall tagged address support */
bool tagged_addr_enable;
-#endif
+#endif /* CONFIG_USER_ONLY */
} CPUARMState;
static inline void set_feature(CPUARMState *env, int feature)
@@ -821,6 +835,53 @@ typedef struct {
uint32_t map, init, supported;
} ARMVQMap;
+/* REG is ID_XXX */
+#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \
+ regval = FIELD_DP64(regval, REG, FIELD, VALUE); \
+ i_->idregs[REG ## _EL1_IDX] = regval; \
+ })
+
+#define FIELD_DP32_IDREG(ISAR, REG, FIELD, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \
+ regval = FIELD_DP32(regval, REG, FIELD, VALUE); \
+ i_->idregs[REG ## _EL1_IDX] = regval; \
+ })
+
+#define FIELD_EX64_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_EX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define FIELD_EX32_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_EX32(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define FIELD_SEX64_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_SEX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define SET_IDREG(ISAR, REG, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ i_->idregs[REG ## _EL1_IDX] = VALUE; \
+ })
+
+#define GET_IDREG(ISAR, REG) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ i_->idregs[REG ## _EL1_IDX]; \
+ })
+
/**
* ARMCPU:
* @env: #CPUARMState
@@ -915,6 +976,8 @@ struct ArchCPU {
/* CPU has memory protection unit */
bool has_mpu;
+ /* CPU has MTE enabled in KVM mode */
+ bool kvm_mte;
/* PMSAv7 MPU number of supported regions */
uint32_t pmsav7_dregion;
/* PMSAv8 MPU number of supported hyp regions */
@@ -937,7 +1000,6 @@ struct ArchCPU {
*/
uint32_t kvm_target;
-#ifdef CONFIG_KVM
/* KVM init features for this CPU */
uint32_t kvm_init_features[7];
@@ -950,7 +1012,6 @@ struct ArchCPU {
/* KVM steal time */
OnOffAuto kvm_steal_time;
-#endif /* CONFIG_KVM */
/* Uniprocessor system with MP extensions */
bool mp_is_up;
@@ -963,6 +1024,9 @@ struct ArchCPU {
/* QOM property to indicate we should use the back-compat CNTFRQ default */
bool backcompat_cntfrq;
+ /* QOM property to indicate we should use the back-compat QARMA5 default */
+ bool backcompat_pauth_default_use_qarma5;
+
/* Specify the number of cores in this CPU cluster. Used for the L2CTLR
* register.
*/
@@ -986,44 +1050,14 @@ struct ArchCPU {
* field by reading the value from the KVM vCPU.
*/
struct ARMISARegisters {
- uint32_t id_isar0;
- uint32_t id_isar1;
- uint32_t id_isar2;
- uint32_t id_isar3;
- uint32_t id_isar4;
- uint32_t id_isar5;
- uint32_t id_isar6;
- uint32_t id_mmfr0;
- uint32_t id_mmfr1;
- uint32_t id_mmfr2;
- uint32_t id_mmfr3;
- uint32_t id_mmfr4;
- uint32_t id_mmfr5;
- uint32_t id_pfr0;
- uint32_t id_pfr1;
- uint32_t id_pfr2;
uint32_t mvfr0;
uint32_t mvfr1;
uint32_t mvfr2;
- uint32_t id_dfr0;
- uint32_t id_dfr1;
uint32_t dbgdidr;
uint32_t dbgdevid;
uint32_t dbgdevid1;
- uint64_t id_aa64isar0;
- uint64_t id_aa64isar1;
- uint64_t id_aa64isar2;
- uint64_t id_aa64pfr0;
- uint64_t id_aa64pfr1;
- uint64_t id_aa64mmfr0;
- uint64_t id_aa64mmfr1;
- uint64_t id_aa64mmfr2;
- uint64_t id_aa64mmfr3;
- uint64_t id_aa64dfr0;
- uint64_t id_aa64dfr1;
- uint64_t id_aa64zfr0;
- uint64_t id_aa64smfr0;
uint64_t reset_pmcr_el0;
+ uint64_t idregs[NUM_ID_IDX];
} isar;
uint64_t midr;
uint32_t revidr;
@@ -1053,6 +1087,7 @@ struct ArchCPU {
bool prop_pauth;
bool prop_pauth_impdef;
bool prop_pauth_qarma3;
+ bool prop_pauth_qarma5;
bool prop_lpa2;
/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
@@ -1101,8 +1136,9 @@ struct ArchCPU {
typedef struct ARMCPUInfo {
const char *name;
+ const char *deprecation_note;
void (*initfn)(Object *obj);
- void (*class_init)(ObjectClass *oc, void *data);
+ void (*class_init)(ObjectClass *oc, const void *data);
} ARMCPUInfo;
/**
@@ -1120,22 +1156,18 @@ struct ARMCPUClass {
ResettablePhases parent_phases;
};
-struct AArch64CPUClass {
- ARMCPUClass parent_class;
-};
-
/* Callback functions for the generic timer's timers. */
void arm_gt_ptimer_cb(void *opaque);
void arm_gt_vtimer_cb(void *opaque);
void arm_gt_htimer_cb(void *opaque);
void arm_gt_stimer_cb(void *opaque);
void arm_gt_hvtimer_cb(void *opaque);
+void arm_gt_sel2timer_cb(void *opaque);
+void arm_gt_sel2vtimer_cb(void *opaque);
unsigned int gt_cntfrq_period_ns(ARMCPU *cpu);
void gt_rme_post_el_change(ARMCPU *cpu, void *opaque);
-void arm_cpu_post_init(Object *obj);
-
#define ARM_AFF0_SHIFT 0
#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT)
#define ARM_AFF1_SHIFT 8
@@ -1193,7 +1225,6 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
*/
void arm_emulate_firmware_reset(CPUState *cpustate, int target_el);
-#ifdef TARGET_AARCH64
int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
@@ -1225,13 +1256,6 @@ static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
#endif
}
-#else
-static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
-static inline void aarch64_sve_change_el(CPUARMState *env, int o,
- int n, bool a)
-{ }
-#endif
-
void aarch64_sync_32_to_64(CPUARMState *env);
void aarch64_sync_64_to_32(CPUARMState *env);
@@ -1358,6 +1382,7 @@ void pmu_init(ARMCPU *cpu);
#define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */
#define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */
#define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */
+#define SCTLR_CMOW (1ULL << 32) /* FEAT_CMOW */
#define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */
#define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */
#define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */
@@ -1680,61 +1705,103 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
uint32_t vfp_get_fpscr(CPUARMState *env);
void vfp_set_fpscr(CPUARMState *env, uint32_t val);
-/* FPCR, Floating Point Control Register
- * FPSR, Floating Poiht Status Register
+/*
+ * FPCR, Floating Point Control Register
+ * FPSR, Floating Point Status Register
*
- * For A64 the FPSCR is split into two logically distinct registers,
- * FPCR and FPSR. However since they still use non-overlapping bits
- * we store the underlying state in fpscr and just mask on read/write.
+ * For A64 floating point control and status bits are stored in
+ * two logically distinct registers, FPCR and FPSR. We store these
+ * in QEMU in vfp.fpcr and vfp.fpsr.
+ * For A32 there was only one register, FPSCR. The bits are arranged
+ * such that FPSCR bits map to FPCR or FPSR bits in the same bit positions,
+ * so we can use appropriate masking to handle FPSCR reads and writes.
+ * Note that the FPCR has some bits which are not visible in the
+ * AArch32 view (for FEAT_AFP). Writing the FPSCR leaves these unchanged.
*/
-#define FPSR_MASK 0xf800009f
-#define FPCR_MASK 0x07ff9f00
+/* FPCR bits */
+#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */
+#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */
+#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
#define FPCR_UFE (1 << 11) /* Underflow exception trap enable */
#define FPCR_IXE (1 << 12) /* Inexact exception trap enable */
+#define FPCR_EBF (1 << 13) /* Extended BFloat16 behaviors */
#define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */
+#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
+#define FPCR_STRIDE_MASK (3 << 20) /* Stride */
#define FPCR_RMODE_MASK (3 << 22) /* Rounding mode */
#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
#define FPCR_DN (1 << 25) /* Default NaN enable bit */
#define FPCR_AHP (1 << 26) /* Alternative half-precision */
-#define FPCR_QC (1 << 27) /* Cumulative saturation bit */
-#define FPCR_V (1 << 28) /* FP overflow flag */
-#define FPCR_C (1 << 29) /* FP carry flag */
-#define FPCR_Z (1 << 30) /* FP zero flag */
-#define FPCR_N (1 << 31) /* FP negative flag */
#define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */
#define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
#define FPCR_LTPSIZE_LENGTH 3
-#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
-#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
+/* Cumulative exception trap enable bits */
+#define FPCR_EEXC_MASK (FPCR_IOE | FPCR_DZE | FPCR_OFE | FPCR_UFE | FPCR_IXE | FPCR_IDE)
+
+/* FPSR bits */
+#define FPSR_IOC (1 << 0) /* Invalid Operation cumulative exception */
+#define FPSR_DZC (1 << 1) /* Divide by Zero cumulative exception */
+#define FPSR_OFC (1 << 2) /* Overflow cumulative exception */
+#define FPSR_UFC (1 << 3) /* Underflow cumulative exception */
+#define FPSR_IXC (1 << 4) /* Inexact cumulative exception */
+#define FPSR_IDC (1 << 7) /* Input Denormal cumulative exception */
+#define FPSR_QC (1 << 27) /* Cumulative saturation bit */
+#define FPSR_V (1 << 28) /* FP overflow flag */
+#define FPSR_C (1 << 29) /* FP carry flag */
+#define FPSR_Z (1 << 30) /* FP zero flag */
+#define FPSR_N (1 << 31) /* FP negative flag */
+
+/* Cumulative exception status bits */
+#define FPSR_CEXC_MASK (FPSR_IOC | FPSR_DZC | FPSR_OFC | FPSR_UFC | FPSR_IXC | FPSR_IDC)
+
+#define FPSR_NZCV_MASK (FPSR_N | FPSR_Z | FPSR_C | FPSR_V)
+#define FPSR_NZCVQC_MASK (FPSR_NZCV_MASK | FPSR_QC)
+
+/* A32 FPSCR bits which architecturally map to FPSR bits */
+#define FPSCR_FPSR_MASK (FPSR_NZCVQC_MASK | FPSR_CEXC_MASK)
+/* A32 FPSCR bits which architecturally map to FPCR bits */
+#define FPSCR_FPCR_MASK (FPCR_EEXC_MASK | FPCR_LEN_MASK | FPCR_FZ16 | \
+ FPCR_STRIDE_MASK | FPCR_RMODE_MASK | \
+ FPCR_FZ | FPCR_DN | FPCR_AHP)
+/* These masks don't overlap: each bit lives in only one place */
+QEMU_BUILD_BUG_ON(FPSCR_FPSR_MASK & FPSCR_FPCR_MASK);
-static inline uint32_t vfp_get_fpsr(CPUARMState *env)
-{
- return vfp_get_fpscr(env) & FPSR_MASK;
-}
+/**
+ * vfp_get_fpsr: read the AArch64 FPSR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPSR value
+ */
+uint32_t vfp_get_fpsr(CPUARMState *env);
-static inline void vfp_set_fpsr(CPUARMState *env, uint32_t val)
-{
- uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPSR_MASK) | (val & FPSR_MASK);
- vfp_set_fpscr(env, new_fpscr);
-}
+/**
+ * vfp_get_fpcr: read the AArch64 FPCR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPCR value
+ */
+uint32_t vfp_get_fpcr(CPUARMState *env);
-static inline uint32_t vfp_get_fpcr(CPUARMState *env)
-{
- return vfp_get_fpscr(env) & FPCR_MASK;
-}
+/**
+ * vfp_set_fpsr: write the AArch64 FPSR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpsr(CPUARMState *env, uint32_t value);
-static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val)
-{
- uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPCR_MASK) | (val & FPCR_MASK);
- vfp_set_fpscr(env, new_fpscr);
-}
+/**
+ * vfp_set_fpcr: write the AArch64 FPCR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpcr(CPUARMState *env, uint32_t value);
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
@@ -2299,6 +2366,8 @@ FIELD(DBGDEVID, DOUBLELOCK, 20, 4)
FIELD(DBGDEVID, AUXREGS, 24, 4)
FIELD(DBGDEVID, CIDMASK, 28, 4)
+FIELD(DBGDEVID1, PCSROFFSET, 0, 4)
+
FIELD(MVFR0, SIMDREG, 0, 4)
FIELD(MVFR0, FPSP, 4, 4)
FIELD(MVFR0, FPDP, 8, 4)
@@ -2510,6 +2579,11 @@ static inline bool arm_is_secure_below_el3(CPUARMState *env)
return false;
}
+static inline bool arm_is_el3_or_mon(CPUARMState *env)
+{
+ return false;
+}
+
static inline ARMSecuritySpace arm_security_space(CPUARMState *env)
{
return ARMSS_NonSecure;
@@ -2542,81 +2616,15 @@ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space);
uint64_t arm_hcr_el2_eff(CPUARMState *env);
uint64_t arm_hcrx_el2_eff(CPUARMState *env);
-/* Return true if the specified exception level is running in AArch64 state. */
-static inline bool arm_el_is_aa64(CPUARMState *env, int el)
-{
- /* This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
- * and if we're not in EL0 then the state of EL0 isn't well defined.)
- */
- assert(el >= 1 && el <= 3);
- bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
-
- /* The highest exception level is always at the maximum supported
- * register width, and then lower levels have a register width controlled
- * by bits in the SCR or HCR registers.
- */
- if (el == 3) {
- return aa64;
- }
-
- if (arm_feature(env, ARM_FEATURE_EL3) &&
- ((env->cp15.scr_el3 & SCR_NS) || !(env->cp15.scr_el3 & SCR_EEL2))) {
- aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
- }
-
- if (el == 2) {
- return aa64;
- }
-
- if (arm_is_el2_enabled(env)) {
- aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
- }
-
- return aa64;
-}
-
-/* Function for determining whether guest cp register reads and writes should
+/*
+ * Function for determining whether guest cp register reads and writes should
* access the secure or non-secure bank of a cp register. When EL3 is
* operating in AArch32 state, the NS-bit determines whether the secure
* instance of a cp register should be used. When EL3 is AArch64 (or if
* it doesn't exist at all) then there is no register banking, and all
* accesses are to the non-secure version.
*/
-static inline bool access_secure_reg(CPUARMState *env)
-{
- bool ret = (arm_feature(env, ARM_FEATURE_EL3) &&
- !arm_el_is_aa64(env, 3) &&
- !(env->cp15.scr_el3 & SCR_NS));
-
- return ret;
-}
-
-/* Macros for accessing a specified CP register bank */
-#define A32_BANKED_REG_GET(_env, _regname, _secure) \
- ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns)
-
-#define A32_BANKED_REG_SET(_env, _regname, _secure, _val) \
- do { \
- if (_secure) { \
- (_env)->cp15._regname##_s = (_val); \
- } else { \
- (_env)->cp15._regname##_ns = (_val); \
- } \
- } while (0)
-
-/* Macros for automatically accessing a specific CP register bank depending on
- * the current secure state of the system. These macros are not intended for
- * supporting instruction translation reads/writes as these are dependent
- * solely on the SCR.NS bit and not the mode.
- */
-#define A32_BANKED_CURRENT_REG_GET(_env, _regname) \
- A32_BANKED_REG_GET((_env), _regname, \
- (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)))
-
-#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val) \
- A32_BANKED_REG_SET((_env), _regname, \
- (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
- (_val))
+bool access_secure_reg(CPUARMState *env);
uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
uint32_t cur_el, bool secure);
@@ -2639,39 +2647,6 @@ static inline bool arm_v7m_is_handler_mode(CPUARMState *env)
return env->v7m.exception != 0;
}
-/* Return the current Exception Level (as per ARMv8; note that this differs
- * from the ARMv7 Privilege Level).
- */
-static inline int arm_current_el(CPUARMState *env)
-{
- if (arm_feature(env, ARM_FEATURE_M)) {
- return arm_v7m_is_handler_mode(env) ||
- !(env->v7m.control[env->v7m.secure] & 1);
- }
-
- if (is_a64(env)) {
- return extract32(env->pstate, 2, 2);
- }
-
- switch (env->uncached_cpsr & 0x1f) {
- case ARM_CPU_MODE_USR:
- return 0;
- case ARM_CPU_MODE_HYP:
- return 2;
- case ARM_CPU_MODE_MON:
- return 3;
- default:
- if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
- /* If EL3 is 32-bit then all secure privileged modes run in
- * EL3
- */
- return 3;
- }
-
- return 1;
- }
-}
-
/**
* write_list_to_cpustate
* @cpu: ARMCPU
@@ -2725,14 +2700,19 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* + NonSecure EL1 & 0 stage 2
* + NonSecure EL2
* + NonSecure EL2 & 0 (ARMv8.1-VHE)
- * + Secure EL1 & 0
- * + Secure EL3
+ * + Secure EL1 & 0 stage 1
+ * + Secure EL1 & 0 stage 2 (FEAT_SEL2)
+ * + Secure EL2 (FEAT_SEL2)
+ * + Secure EL2 & 0 (FEAT_SEL2)
+ * + Realm EL1 & 0 stage 1 (FEAT_RME)
+ * + Realm EL1 & 0 stage 2 (FEAT_RME)
+ * + Realm EL2 (FEAT_RME)
+ * + EL3
* If EL3 is 32-bit:
* + NonSecure PL1 & 0 stage 1
* + NonSecure PL1 & 0 stage 2
* + NonSecure PL2
- * + Secure PL0
- * + Secure PL1
+ * + Secure PL1 & 0
* (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.)
*
* For QEMU, an mmu_idx is not quite the same as a translation regime because:
@@ -2758,29 +2738,34 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
* table over and over.
* 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access
* Never (PAN) bit within PSTATE.
- * 7. we fold together the secure and non-secure regimes for A-profile,
+ * 7. we fold together most secure and non-secure regimes for A-profile,
* because there are no banked system registers for aarch64, so the
* process of switching between secure and non-secure is
* already heavyweight.
+ * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure,
+ * because both are in use simultaneously for Secure EL2.
*
* This gives us the following list of cases:
*
- * EL0 EL1&0 stage 1+2 (aka NS PL0)
- * EL1 EL1&0 stage 1+2 (aka NS PL1)
- * EL1 EL1&0 stage 1+2 +PAN
+ * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2)
+ * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2)
+ * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN)
* EL0 EL2&0
* EL2 EL2&0
* EL2 EL2&0 +PAN
* EL2 (aka NS PL2)
- * EL3 (aka S PL1)
- * Physical (NS & S)
- * Stage2 (NS & S)
+ * EL3 (aka AArch32 S PL1 PL1&0)
+ * AArch32 S PL0 PL1&0 (we call this EL30_0)
+ * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN)
+ * Stage2 Secure
+ * Stage2 NonSecure
+ * plus one TLB per Physical address space: S, NS, Realm, Root
*
- * for a total of 12 different mmu_idx.
+ * for a total of 16 different mmu_idx.
*
* R profile CPUs have an MPU, but can use the same set of MMU indexes
* as A profile. They only need to distinguish EL0 and EL1 (and
- * EL2 if we ever model a Cortex-R52).
+ * EL2 for cores like the Cortex-R52).
*
* M profile CPUs are rather different as they do not have a true MMU.
* They have the following different MMU indexes:
@@ -2840,6 +2825,8 @@ typedef enum ARMMMUIdx {
ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A,
ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A,
ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A,
/*
* Used for second stage of an S12 page table walk, or for descriptor
@@ -2847,14 +2834,14 @@ typedef enum ARMMMUIdx {
* are in use simultaneously for SecureEL2: the security state for
* the S2 ptw is selected by the NS bit from the S1 ptw.
*/
- ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A,
- ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A,
/* TLBs with 1-1 mapping to the physical address spaces. */
- ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A,
- ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A,
/*
* These are not allocated TLBs and are used only for AT system
@@ -2893,6 +2880,8 @@ typedef enum ARMMMUIdxBit {
TO_CORE_BIT(E20_2),
TO_CORE_BIT(E20_2_PAN),
TO_CORE_BIT(E3),
+ TO_CORE_BIT(E30_0),
+ TO_CORE_BIT(E30_3_PAN),
TO_CORE_BIT(Stage2),
TO_CORE_BIT(Stage2_S),
@@ -2958,60 +2947,15 @@ static inline bool arm_sctlr_b(CPUARMState *env)
uint64_t arm_sctlr(CPUARMState *env, int el);
-static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
- bool sctlr_b)
-{
-#ifdef CONFIG_USER_ONLY
- /*
- * In system mode, BE32 is modelled in line with the
- * architecture (as word-invariant big-endianness), where loads
- * and stores are done little endian but from addresses which
- * are adjusted by XORing with the appropriate constant. So the
- * endianness to use for the raw data access is not affected by
- * SCTLR.B.
- * In user mode, however, we model BE32 as byte-invariant
- * big-endianness (because user-only code cannot tell the
- * difference), and so we need to use a data access endianness
- * that depends on SCTLR.B.
- */
- if (sctlr_b) {
- return true;
- }
-#endif
- /* In 32bit endianness is determined by looking at CPSR's E bit */
- return env->uncached_cpsr & CPSR_E;
-}
-
-static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
-{
- return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
-}
-
-/* Return true if the processor is in big-endian mode. */
-static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
-{
- if (!is_a64(env)) {
- return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
- } else {
- int cur_el = arm_current_el(env);
- uint64_t sctlr = arm_sctlr(env, cur_el);
- return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
- }
-}
-
-#include "exec/cpu-all.h"
-
/*
* We have more than 32-bits worth of state per TB, so we split the data
* between tb->flags and tb->cs_base, which is otherwise unused for ARM.
* We collect these two parts in CPUARMTBFlags where they are named
* flags and flags2 respectively.
*
- * The flags that are shared between all execution modes, TBFLAG_ANY,
- * are stored in flags. The flags that are specific to a given mode
- * are stores in flags2. Since cs_base is sized on the configured
- * address size, flags2 always has 64-bits for A64, and a minimum of
- * 32-bits for A32 and M32.
+ * The flags that are shared between all execution modes, TBFLAG_ANY, are stored
+ * in flags. The flags that are specific to a given mode are stored in flags2.
+ * flags2 always has 64-bits, even though only 32-bits are used for A32 and M32.
*
* The bits for 32-bit A-profile and M-profile partially overlap:
*
@@ -3121,6 +3065,8 @@ FIELD(TBFLAG_A64, NV2, 34, 1)
FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
/* Set if FEAT_NV2 RAM accesses are big-endian */
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
+FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
+FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
/*
* Helpers for using the above. Note that only the A64 accessors use
@@ -3182,16 +3128,6 @@ static inline bool bswap_code(bool sctlr_b)
#endif
}
-#ifdef CONFIG_USER_ONLY
-static inline bool arm_cpu_bswap_data(CPUARMState *env)
-{
- return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
-}
-#endif
-
-void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
- uint64_t *cs_base, uint32_t *flags);
-
enum {
QEMU_PSCI_CONDUIT_DISABLED = 0,
QEMU_PSCI_CONDUIT_SMC = 1,
@@ -3289,34 +3225,4 @@ extern const uint64_t pred_esz_masks[5];
#define LOG2_TAG_GRANULE 4
#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
-#ifdef CONFIG_USER_ONLY
-#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1))
-#endif
-
-#ifdef TARGET_TAGGED_ADDRESSES
-/**
- * cpu_untagged_addr:
- * @cs: CPU context
- * @x: tagged address
- *
- * Remove any address tag from @x. This is explicitly related to the
- * linux syscall TIF_TAGGED_ADDR setting, not TBI in general.
- *
- * There should be a better place to put this, but we need this in
- * include/exec/cpu_ldst.h, and not some place linux-user specific.
- */
-static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- if (cpu->env.tagged_addr_enable) {
- /*
- * TBI is enabled for userspace but not kernelspace addresses.
- * Only clear the tag if bit 55 is clear.
- */
- x &= sextract64(x, 0, 56);
- }
- return x;
-}
-#endif
-
#endif
diff --git a/target/arm/cpu32-stubs.c b/target/arm/cpu32-stubs.c
new file mode 100644
index 0000000..81be44d
--- /dev/null
+++ b/target/arm/cpu32-stubs.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+#include "target/arm/cpu.h"
+#include "target/arm/internals.h"
+#include <glib.h>
+
+void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
+
+void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
+
+void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
+
+void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 262a1d6..1f34067 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -23,10 +23,11 @@
#include "cpu.h"
#include "cpregs.h"
#include "qemu/module.h"
-#include "sysemu/kvm.h"
-#include "sysemu/hvf.h"
-#include "sysemu/qtest.h"
-#include "sysemu/tcg.h"
+#include "qemu/units.h"
+#include "system/kvm.h"
+#include "system/hvf.h"
+#include "system/qtest.h"
+#include "system/tcg.h"
#include "kvm_arm.h"
#include "hvf_arm.h"
#include "qapi/visitor.h"
@@ -35,6 +36,28 @@
#include "cpu-features.h"
#include "cpregs.h"
+/* convert between <register>_IDX and SYS_<register> */
+#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \
+ [NAME##_IDX] = SYS_##NAME,
+
+const uint32_t id_register_sysreg[NUM_ID_IDX] = {
+#include "cpu-sysregs.h.inc"
+};
+
+#undef DEF
+#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \
+ case SYS_##NAME: return NAME##_IDX;
+
+int get_sysreg_idx(ARMSysRegs sysreg)
+{
+ switch (sysreg) {
+#include "cpu-sysregs.h.inc"
+ }
+ g_assert_not_reached();
+}
+
+#undef DEF
+
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
{
/*
@@ -113,7 +136,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* SVE is disabled and so are all vector lengths. Good.
* Disable all SVE extensions as well.
*/
- cpu->isar.id_aa64zfr0 = 0;
+ SET_IDREG(&cpu->isar, ID_AA64ZFR0, 0);
return;
}
@@ -287,16 +310,13 @@ static bool cpu_arm_get_sve(Object *obj, Error **errp)
static void cpu_arm_set_sve(Object *obj, bool value, Error **errp)
{
ARMCPU *cpu = ARM_CPU(obj);
- uint64_t t;
if (value && kvm_enabled() && !kvm_arm_sve_supported()) {
error_setg(errp, "'sve' feature not supported by KVM on this host");
return;
}
- t = cpu->isar.id_aa64pfr0;
- t = FIELD_DP64(t, ID_AA64PFR0, SVE, value);
- cpu->isar.id_aa64pfr0 = t;
+ FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, SVE, value);
}
void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp)
@@ -308,7 +328,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp)
if (vq_map == 0) {
if (!cpu_isar_feature(aa64_sme, cpu)) {
- cpu->isar.id_aa64smfr0 = 0;
+ SET_IDREG(&cpu->isar, ID_AA64SMFR0, 0);
return;
}
@@ -347,11 +367,8 @@ static bool cpu_arm_get_sme(Object *obj, Error **errp)
static void cpu_arm_set_sme(Object *obj, bool value, Error **errp)
{
ARMCPU *cpu = ARM_CPU(obj);
- uint64_t t;
- t = cpu->isar.id_aa64pfr1;
- t = FIELD_DP64(t, ID_AA64PFR1, SME, value);
- cpu->isar.id_aa64pfr1 = t;
+ FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR1, SME, value);
}
static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp)
@@ -364,11 +381,8 @@ static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp)
static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp)
{
ARMCPU *cpu = ARM_CPU(obj);
- uint64_t t;
- t = cpu->isar.id_aa64smfr0;
- t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value);
- cpu->isar.id_aa64smfr0 = t;
+ FIELD_DP64_IDREG(&cpu->isar, ID_AA64SMFR0, FA64, value);
}
#ifdef CONFIG_USER_ONLY
@@ -479,6 +493,7 @@ void aarch64_add_sme_properties(Object *obj)
void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
{
ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu);
+ ARMISARegisters *isar = &cpu->isar;
uint64_t isar1, isar2;
/*
@@ -489,13 +504,13 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
*
* Begin by disabling all fields.
*/
- isar1 = cpu->isar.id_aa64isar1;
+ isar1 = GET_IDREG(isar, ID_AA64ISAR1);
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0);
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0);
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0);
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0);
- isar2 = cpu->isar.id_aa64isar2;
+ isar2 = GET_IDREG(isar, ID_AA64ISAR2);
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0);
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0);
@@ -519,39 +534,56 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
}
if (cpu->prop_pauth) {
- if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) {
+ if ((cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) ||
+ (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma5) ||
+ (cpu->prop_pauth_qarma3 && cpu->prop_pauth_qarma5)) {
error_setg(errp,
- "cannot enable both pauth-impdef and pauth-qarma3");
+ "cannot enable pauth-impdef, pauth-qarma3 and "
+ "pauth-qarma5 at the same time");
return;
}
- if (cpu->prop_pauth_impdef) {
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
+ bool use_default = !cpu->prop_pauth_qarma5 &&
+ !cpu->prop_pauth_qarma3 &&
+ !cpu->prop_pauth_impdef;
+
+ if (cpu->prop_pauth_qarma5 ||
+ (use_default &&
+ cpu->backcompat_pauth_default_use_qarma5)) {
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
} else if (cpu->prop_pauth_qarma3) {
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features);
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1);
+ } else if (cpu->prop_pauth_impdef ||
+ (use_default &&
+ !cpu->backcompat_pauth_default_use_qarma5)) {
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
} else {
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
+ g_assert_not_reached();
}
- } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) {
- error_setg(errp, "cannot enable pauth-impdef or "
- "pauth-qarma3 without pauth");
+ } else if (cpu->prop_pauth_impdef ||
+ cpu->prop_pauth_qarma3 ||
+ cpu->prop_pauth_qarma5) {
+ error_setg(errp, "cannot enable pauth-impdef, pauth-qarma3 or "
+ "pauth-qarma5 without pauth");
error_append_hint(errp, "Add pauth=on to the CPU property list.\n");
}
}
- cpu->isar.id_aa64isar1 = isar1;
- cpu->isar.id_aa64isar2 = isar2;
+ SET_IDREG(isar, ID_AA64ISAR1, isar1);
+ SET_IDREG(isar, ID_AA64ISAR2, isar2);
}
-static Property arm_cpu_pauth_property =
+static const Property arm_cpu_pauth_property =
DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true);
-static Property arm_cpu_pauth_impdef_property =
+static const Property arm_cpu_pauth_impdef_property =
DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false);
-static Property arm_cpu_pauth_qarma3_property =
+static const Property arm_cpu_pauth_qarma3_property =
DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false);
+static Property arm_cpu_pauth_qarma5_property =
+ DEFINE_PROP_BOOL("pauth-qarma5", ARMCPU, prop_pauth_qarma5, false);
void aarch64_add_pauth_properties(Object *obj)
{
@@ -572,6 +604,7 @@ void aarch64_add_pauth_properties(Object *obj)
} else {
qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property);
qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma5_property);
}
}
@@ -587,17 +620,18 @@ void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp)
return;
}
- t = cpu->isar.id_aa64mmfr0;
+ t = GET_IDREG(&cpu->isar, ID_AA64MMFR0);
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */
- cpu->isar.id_aa64mmfr0 = t;
+ SET_IDREG(&cpu->isar, ID_AA64MMFR0, t);
}
static void aarch64_a57_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a57";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -618,33 +652,36 @@ static void aarch64_a57_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x8444c004;
cpu->reset_sctlr = 0x00c50838;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x03010066;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x03010066);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10101105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x00011121;
- cpu->isar.id_isar6 = 0;
- cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->isar.id_aa64dfr0 = 0x10305106;
- cpu->isar.id_aa64isar0 = 0x00011120;
- cpu->isar.id_aa64mmfr0 = 0x00001124;
+ SET_IDREG(isar, ID_MMFR0, 0x10101105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x00011121);
+ SET_IDREG(isar, ID_ISAR6, 0);
+ SET_IDREG(isar, ID_AA64PFR0, 0x00002222);
+ SET_IDREG(isar, ID_AA64DFR0, 0x10305106);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x00011120);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x00001124);
cpu->isar.dbgdidr = 0x3516d000;
cpu->isar.dbgdevid = 0x01110f13;
cpu->isar.dbgdevid1 = 0x2;
cpu->isar.reset_pmcr_el0 = 0x41013000;
cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
- cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
+ /* 32KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7);
+ /* 48KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 3, 64, 48 * KiB, 2);
+ /* 2048KB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 2 * MiB, 7);
cpu->dcz_blocksize = 4; /* 64 bytes */
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
@@ -656,6 +693,7 @@ static void aarch64_a57_initfn(Object *obj)
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a53";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -676,33 +714,36 @@ static void aarch64_a53_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x84448004; /* L1Ip = VIPT */
cpu->reset_sctlr = 0x00c50838;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x03010066;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x03010066);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10101105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x00011121;
- cpu->isar.id_isar6 = 0;
- cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->isar.id_aa64dfr0 = 0x10305106;
- cpu->isar.id_aa64isar0 = 0x00011120;
- cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */
+ SET_IDREG(isar, ID_MMFR0, 0x10101105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x00011121);
+ SET_IDREG(isar, ID_ISAR6, 0);
+ SET_IDREG(isar, ID_AA64PFR0, 0x00002222);
+ SET_IDREG(isar, ID_AA64DFR0, 0x10305106);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x00011120);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x00001122); /* 40 bit physical addr */
cpu->isar.dbgdidr = 0x3516d000;
cpu->isar.dbgdevid = 0x00110f13;
cpu->isar.dbgdevid1 = 0x1;
cpu->isar.reset_pmcr_el0 = 0x41033000;
cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
- cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */
+ /* 32KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7);
+ /* 32KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 1, 64, 32 * KiB, 2);
+ /* 1024KB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 1 * MiB, 7);
cpu->dcz_blocksize = 4; /* 64 bytes */
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
@@ -756,104 +797,12 @@ static const ARMCPUInfo aarch64_cpus[] = {
#endif
};
-static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- return arm_feature(&cpu->env, ARM_FEATURE_AARCH64);
-}
-
-static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- /* At this time, this property is only allowed if KVM is enabled. This
- * restriction allows us to avoid fixing up functionality that assumes a
- * uniform execution state like do_interrupt.
- */
- if (value == false) {
- if (!kvm_enabled() || !kvm_arm_aarch32_supported()) {
- error_setg(errp, "'aarch64' feature cannot be disabled "
- "unless KVM is enabled and 32-bit EL1 "
- "is supported");
- return;
- }
- unset_feature(&cpu->env, ARM_FEATURE_AARCH64);
- } else {
- set_feature(&cpu->env, ARM_FEATURE_AARCH64);
- }
-}
-
-static void aarch64_cpu_finalizefn(Object *obj)
-{
-}
-
-static const gchar *aarch64_gdb_arch_name(CPUState *cs)
-{
- return "aarch64";
-}
-
-static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
-{
- CPUClass *cc = CPU_CLASS(oc);
-
- cc->gdb_read_register = aarch64_cpu_gdb_read_register;
- cc->gdb_write_register = aarch64_cpu_gdb_write_register;
- cc->gdb_core_xml_file = "aarch64-core.xml";
- cc->gdb_arch_name = aarch64_gdb_arch_name;
-
- object_class_property_add_bool(oc, "aarch64", aarch64_cpu_get_aarch64,
- aarch64_cpu_set_aarch64);
- object_class_property_set_description(oc, "aarch64",
- "Set on/off to enable/disable aarch64 "
- "execution state ");
-}
-
-static void aarch64_cpu_instance_init(Object *obj)
-{
- ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj);
-
- acc->info->initfn(obj);
- arm_cpu_post_init(obj);
-}
-
-static void cpu_register_class_init(ObjectClass *oc, void *data)
-{
- ARMCPUClass *acc = ARM_CPU_CLASS(oc);
-
- acc->info = data;
-}
-
-void aarch64_cpu_register(const ARMCPUInfo *info)
-{
- TypeInfo type_info = {
- .parent = TYPE_AARCH64_CPU,
- .instance_init = aarch64_cpu_instance_init,
- .class_init = info->class_init ?: cpu_register_class_init,
- .class_data = (void *)info,
- };
-
- type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name);
- type_register(&type_info);
- g_free((void *)type_info.name);
-}
-
-static const TypeInfo aarch64_cpu_type_info = {
- .name = TYPE_AARCH64_CPU,
- .parent = TYPE_ARM_CPU,
- .instance_finalize = aarch64_cpu_finalizefn,
- .abstract = true,
- .class_init = aarch64_cpu_class_init,
-};
-
static void aarch64_cpu_register_types(void)
{
size_t i;
- type_register_static(&aarch64_cpu_type_info);
-
for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) {
- aarch64_cpu_register(&aarch64_cpus[i]);
+ arm_cpu_register(&aarch64_cpus[i]);
}
}
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 7d856ac..69fb1d0 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -11,9 +11,11 @@
#include "internals.h"
#include "cpu-features.h"
#include "cpregs.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
-#include "sysemu/tcg.h"
+#include "exec/watchpoint.h"
+#include "system/tcg.h"
+
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
#ifdef CONFIG_TCG
/* Return the Exception Level targeted by debug exceptions. */
@@ -378,7 +380,7 @@ bool arm_debug_check_breakpoint(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
- target_ulong pc;
+ vaddr pc;
int n;
/*
@@ -875,12 +877,13 @@ static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri,
(env->cp15.mdcr_el3 & MDCR_TDCC);
if (el < 1 && mdscr_el1_tdcc) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) {
+ if (!arm_is_el3_or_mon(env) &&
+ ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) {
return CP_ACCESS_TRAP_EL3;
}
return CP_ACCESS_OK;
@@ -1036,7 +1039,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
{ .name = "DBGVCR",
.cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tda,
- .type = ARM_CP_NOP },
+ .type = ARM_CP_CONST, .resetvalue = 0 },
/*
* Dummy MDCCINT_EL1, since we don't implement the Debug Communications
* Channel but Linux may try to access this register. The 32-bit
@@ -1045,7 +1048,7 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
{ .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
.cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tdcc,
- .type = ARM_CP_NOP },
+ .type = ARM_CP_CONST, .resetvalue = 0 },
/*
* Dummy DBGCLAIM registers.
* "The architecture does not define any functionality for the CLAIM tag bits.",
@@ -1074,7 +1077,8 @@ static const ARMCPRegInfo debug_aa32_el1_reginfo[] = {
{ .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0,
.access = PL2_RW, .accessfn = access_dbgvcr32,
- .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP },
+ .type = ARM_CP_CONST | ARM_CP_EL3_NO_EL2_KEEP,
+ .resetvalue = 0 },
};
static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index a3bb73c..ce4497a 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -21,7 +21,8 @@
#include "cpu.h"
#include "exec/gdbstub.h"
#include "gdbstub/helpers.h"
-#include "sysemu/tcg.h"
+#include "gdbstub/commands.h"
+#include "system/tcg.h"
#include "internals.h"
#include "cpu-features.h"
#include "cpregs.h"
@@ -43,6 +44,12 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
+#ifdef TARGET_AARCH64
+ if (arm_gdbstub_is_aarch64(cpu)) {
+ return aarch64_cpu_gdb_read_register(cs, mem_buf, n);
+ }
+#endif
+
if (n < 16) {
/* Core integer register. */
return gdb_get_reg32(mem_buf, env->regs[n]);
@@ -65,6 +72,12 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
CPUARMState *env = &cpu->env;
uint32_t tmp;
+#ifdef TARGET_AARCH64
+ if (arm_gdbstub_is_aarch64(cpu)) {
+ return aarch64_cpu_gdb_write_register(cs, mem_buf, n);
+ }
+#endif
+
tmp = ldl_p(mem_buf);
/*
@@ -474,6 +487,35 @@ static GDBFeature *arm_gen_dynamic_m_secextreg_feature(CPUState *cs,
#endif
#endif /* CONFIG_TCG */
+void arm_cpu_register_gdb_commands(ARMCPU *cpu)
+{
+ g_autoptr(GPtrArray) query_table = g_ptr_array_new();
+ g_autoptr(GPtrArray) set_table = g_ptr_array_new();
+ g_autoptr(GString) qsupported_features = g_string_new(NULL);
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ #ifdef TARGET_AARCH64
+ aarch64_cpu_register_gdb_commands(cpu, qsupported_features, query_table,
+ set_table);
+ #endif
+ }
+
+ /* Set arch-specific handlers for 'q' commands. */
+ if (query_table->len) {
+ gdb_extend_query_table(query_table);
+ }
+
+ /* Set arch-specific handlers for 'Q' commands. */
+ if (set_table->len) {
+ gdb_extend_set_table(set_table);
+ }
+
+ /* Set arch-specific qSupported feature. */
+ if (qsupported_features->len) {
+ gdb_extend_qsupported_features(qsupported_features->str);
+ }
+}
+
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
{
CPUState *cs = CPU(cpu);
@@ -507,6 +549,16 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
gdb_find_static_feature("aarch64-pauth.xml"),
0);
}
+
+#ifdef CONFIG_USER_ONLY
+ /* Memory Tagging Extension (MTE) 'tag_ctl' pseudo-register. */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ gdb_register_coprocessor(cs, aarch64_gdb_get_tag_ctl_reg,
+ aarch64_gdb_set_tag_ctl_reg,
+ gdb_find_static_feature("aarch64-mte.xml"),
+ 0);
+ }
+#endif
#endif
} else {
if (arm_feature(env, ARM_FEATURE_NEON)) {
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index caa31ff..64ee9b3 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -21,6 +21,16 @@
#include "cpu.h"
#include "internals.h"
#include "gdbstub/helpers.h"
+#include "gdbstub/commands.h"
+#include "tcg/mte_helper.h"
+#if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX)
+#include <sys/prctl.h>
+#include "mte_user_helper.h"
+#endif
+#ifdef CONFIG_TCG
+#include "accel/tcg/cpu-mmu-index.h"
+#include "exec/target_page.h"
+#endif
int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
@@ -381,3 +391,234 @@ GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg)
return &cpu->dyn_svereg_feature.desc;
}
+
+#ifdef CONFIG_USER_ONLY
+int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint64_t tcf0;
+
+ assert(reg == 0);
+
+ tcf0 = extract64(env->cp15.sctlr_el[1], 38, 2);
+
+ return gdb_get_reg64(buf, tcf0);
+}
+
+int aarch64_gdb_set_tag_ctl_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+#if defined(CONFIG_LINUX)
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ uint8_t tcf;
+
+ assert(reg == 0);
+
+ tcf = *buf << PR_MTE_TCF_SHIFT;
+
+ if (!tcf) {
+ return 0;
+ }
+
+ /*
+ * 'tag_ctl' register is actually a "pseudo-register" provided by GDB to
+ * expose options regarding the type of MTE fault that can be controlled at
+ * runtime.
+ */
+ arm_set_mte_tcf0(env, tcf);
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+#endif /* CONFIG_USER_ONLY */
+
+#ifdef CONFIG_TCG
+static void handle_q_memtag(GArray *params, void *user_ctx)
+{
+ ARMCPU *cpu = ARM_CPU(user_ctx);
+ CPUARMState *env = &cpu->env;
+ uint32_t mmu_index;
+
+ uint64_t addr = gdb_get_cmd_param(params, 0)->val_ull;
+ uint64_t len = gdb_get_cmd_param(params, 1)->val_ul;
+ int type = gdb_get_cmd_param(params, 2)->val_ul;
+
+ uint8_t *tags;
+ uint8_t addr_tag;
+
+ g_autoptr(GString) str_buf = g_string_new(NULL);
+
+ /*
+ * GDB does not query multiple tags for a memory range on remote targets, so
+ * that's not supported either by gdbstub.
+ */
+ if (len != 1) {
+ gdb_put_packet("E02");
+ }
+
+ /* GDB never queries a tag different from an allocation tag (type 1). */
+ if (type != 1) {
+ gdb_put_packet("E03");
+ }
+
+ /* Find out the current translation regime for probe. */
+ mmu_index = cpu_mmu_index(env_cpu(env), false);
+ /* Note that tags are packed here (2 tags packed in one byte). */
+ tags = allocation_tag_mem_probe(env, mmu_index, addr, MMU_DATA_LOAD, 1,
+ MMU_DATA_LOAD, true, 0);
+ if (!tags) {
+ /* Address is not in a tagged region. */
+ gdb_put_packet("E04");
+ return;
+ }
+
+ /* Unpack tag from byte. */
+ addr_tag = load_tag1(addr, tags);
+ g_string_printf(str_buf, "m%.2x", addr_tag);
+
+ gdb_put_packet(str_buf->str);
+}
+
+static void handle_q_isaddresstagged(GArray *params, void *user_ctx)
+{
+ ARMCPU *cpu = ARM_CPU(user_ctx);
+ CPUARMState *env = &cpu->env;
+ uint32_t mmu_index;
+
+ uint64_t addr = gdb_get_cmd_param(params, 0)->val_ull;
+
+ uint8_t *tags;
+ const char *reply;
+
+ /* Find out the current translation regime for probe. */
+ mmu_index = cpu_mmu_index(env_cpu(env), false);
+ tags = allocation_tag_mem_probe(env, mmu_index, addr, MMU_DATA_LOAD, 1,
+ MMU_DATA_LOAD, true, 0);
+ reply = tags ? "01" : "00";
+
+ gdb_put_packet(reply);
+}
+
+static void handle_Q_memtag(GArray *params, void *user_ctx)
+{
+ ARMCPU *cpu = ARM_CPU(user_ctx);
+ CPUARMState *env = &cpu->env;
+ uint32_t mmu_index;
+
+ uint64_t start_addr = gdb_get_cmd_param(params, 0)->val_ull;
+ uint64_t len = gdb_get_cmd_param(params, 1)->val_ul;
+ int type = gdb_get_cmd_param(params, 2)->val_ul;
+ char const *new_tags_str = gdb_get_cmd_param(params, 3)->data;
+
+ uint64_t end_addr;
+
+ int num_new_tags;
+ uint8_t *tags;
+
+ g_autoptr(GByteArray) new_tags = g_byte_array_new();
+
+ /*
+ * Only the allocation tag (i.e. type 1) can be set at the stub side.
+ */
+ if (type != 1) {
+ gdb_put_packet("E02");
+ return;
+ }
+
+ end_addr = start_addr + (len - 1); /* 'len' is always >= 1 */
+ /* Check if request's memory range does not cross page boundaries. */
+ if ((start_addr ^ end_addr) & TARGET_PAGE_MASK) {
+ gdb_put_packet("E03");
+ return;
+ }
+
+ /*
+ * Get all tags in the page starting from the tag of the start address.
+ * Note that there are two tags packed into a single byte here.
+ */
+ /* Find out the current translation regime for probe. */
+ mmu_index = cpu_mmu_index(env_cpu(env), false);
+ tags = allocation_tag_mem_probe(env, mmu_index, start_addr, MMU_DATA_STORE,
+ 1, MMU_DATA_STORE, true, 0);
+ if (!tags) {
+ /* Address is not in a tagged region. */
+ gdb_put_packet("E04");
+ return;
+ }
+
+ /* Convert tags provided by GDB, 2 hex digits per tag. */
+ num_new_tags = strlen(new_tags_str) / 2;
+ gdb_hextomem(new_tags, new_tags_str, num_new_tags);
+
+ uint64_t address = start_addr;
+ int new_tag_index = 0;
+ while (address <= end_addr) {
+ uint8_t new_tag;
+ int packed_index;
+
+ /*
+ * Find packed tag index from unpacked tag index. There are two tags
+ * in one packed index (one tag per nibble).
+ */
+ packed_index = new_tag_index / 2;
+
+ new_tag = new_tags->data[new_tag_index % num_new_tags];
+ store_tag1(address, tags + packed_index, new_tag);
+
+ address += TAG_GRANULE;
+ new_tag_index++;
+ }
+
+ gdb_put_packet("OK");
+}
+
+enum Command {
+ qMemTags,
+ qIsAddressTagged,
+ QMemTags,
+ NUM_CMDS
+};
+
+static const GdbCmdParseEntry cmd_handler_table[NUM_CMDS] = {
+ [qMemTags] = {
+ .handler = handle_q_memtag,
+ .cmd_startswith = true,
+ .cmd = "MemTags:",
+ .schema = "L,l:l0",
+ .need_cpu_context = true
+ },
+ [qIsAddressTagged] = {
+ .handler = handle_q_isaddresstagged,
+ .cmd_startswith = true,
+ .cmd = "IsAddressTagged:",
+ .schema = "L0",
+ .need_cpu_context = true
+ },
+ [QMemTags] = {
+ .handler = handle_Q_memtag,
+ .cmd_startswith = true,
+ .cmd = "MemTags:",
+ .schema = "L,l:l:s0",
+ .need_cpu_context = true
+ },
+};
+#endif /* CONFIG_TCG */
+
+void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *qsupported,
+ GPtrArray *qtable, GPtrArray *stable)
+{
+ /* MTE */
+#ifdef CONFIG_TCG
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ g_string_append(qsupported, ";memory-tagging+");
+
+ g_ptr_array_add(qtable, (gpointer) &cmd_handler_table[qMemTags]);
+ g_ptr_array_add(qtable, (gpointer) &cmd_handler_table[qIsAddressTagged]);
+ g_ptr_array_add(stable, (gpointer) &cmd_handler_table[QMemTags]);
+ }
+#endif
+}
diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h
index b992941..d49c63c 100644
--- a/target/arm/gtimer.h
+++ b/target/arm/gtimer.h
@@ -10,12 +10,14 @@
#define TARGET_ARM_GTIMER_H
enum {
- GTIMER_PHYS = 0,
- GTIMER_VIRT = 1,
- GTIMER_HYP = 2,
- GTIMER_SEC = 3,
- GTIMER_HYPVIRT = 4,
-#define NUM_GTIMERS 5
+ GTIMER_PHYS = 0, /* CNTP_* ; EL1 physical timer */
+ GTIMER_VIRT = 1, /* CNTV_* ; EL1 virtual timer */
+ GTIMER_HYP = 2, /* CNTHP_* ; EL2 physical timer */
+ GTIMER_SEC = 3, /* CNTPS_* ; EL3 physical timer */
+ GTIMER_HYPVIRT = 4, /* CNTHV_* ; EL2 virtual timer ; only if FEAT_VHE */
+ GTIMER_S_EL2_PHYS = 5, /* CNTHPS_* ; only if FEAT_SEL2 */
+ GTIMER_S_EL2_VIRT = 6, /* CNTHVS_* ; only if FEAT_SEL2 */
+#define NUM_GTIMERS 7
};
#endif
diff --git a/target/arm/helper.c b/target/arm/helper.c
index ce31957..c311d2d 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -12,26 +12,32 @@
#include "cpu.h"
#include "internals.h"
#include "cpu-features.h"
-#include "exec/helper-proto.h"
+#include "exec/page-protection.h"
+#include "exec/mmap-lock.h"
#include "qemu/main-loop.h"
#include "qemu/timer.h"
#include "qemu/bitops.h"
-#include "qemu/crc32c.h"
#include "qemu/qemu-print.h"
-#include "exec/exec-all.h"
-#include <zlib.h> /* For crc32 */
+#include "exec/cputlb.h"
+#include "exec/translation-block.h"
#include "hw/irq.h"
-#include "sysemu/cpu-timers.h"
-#include "sysemu/kvm.h"
-#include "sysemu/tcg.h"
+#include "system/cpu-timers.h"
+#include "exec/icount.h"
+#include "system/kvm.h"
+#include "system/tcg.h"
#include "qapi/error.h"
#include "qemu/guest-random.h"
#ifdef CONFIG_TCG
+#include "accel/tcg/probe.h"
+#include "accel/tcg/getpc.h"
#include "semihosting/common-semi.h"
#endif
#include "cpregs.h"
#include "target/arm/gtimer.h"
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
+
#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
static void switch_mode(CPUARMState *env, int mode);
@@ -219,7 +225,7 @@ static void count_cpreg(gpointer key, gpointer opaque)
}
}
-static gint cpreg_key_compare(gconstpointer a, gconstpointer b)
+static gint cpreg_key_compare(gconstpointer a, gconstpointer b, gpointer d)
{
uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a);
uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b);
@@ -243,7 +249,7 @@ void init_cpreg_list(ARMCPU *cpu)
int arraylen;
keys = g_hash_table_get_keys(cpu->cp_regs);
- keys = g_list_sort(keys, cpreg_key_compare);
+ keys = g_list_sort_with_data(keys, cpreg_key_compare, NULL);
cpu->cpreg_array_len = 0;
@@ -285,7 +291,7 @@ static CPAccessResult access_el3_aa32ns(CPUARMState *env,
{
if (!is_a64(env) && arm_current_el(env) == 3 &&
arm_is_secure_below_el3(env)) {
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
return CP_ACCESS_OK;
}
@@ -310,7 +316,7 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env,
return CP_ACCESS_TRAP_EL3;
}
/* This will be EL1 NS and EL2 NS, which just UNDEF */
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
/*
@@ -365,40 +371,6 @@ static CPAccessResult access_tacr(CPUARMState *env, const ARMCPRegInfo *ri,
return CP_ACCESS_OK;
}
-/* Check for traps from EL1 due to HCR_EL2.TTLB. */
-static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
-{
- if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TTLB)) {
- return CP_ACCESS_TRAP_EL2;
- }
- return CP_ACCESS_OK;
-}
-
-/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */
-static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
-{
- if (arm_current_el(env) == 1 &&
- (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) {
- return CP_ACCESS_TRAP_EL2;
- }
- return CP_ACCESS_OK;
-}
-
-#ifdef TARGET_AARCH64
-/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */
-static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
-{
- if (arm_current_el(env) == 1 &&
- (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) {
- return CP_ACCESS_TRAP_EL2;
- }
- return CP_ACCESS_OK;
-}
-#endif
-
static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
ARMCPU *cpu = env_archcpu(env);
@@ -438,12 +410,15 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
raw_write(env, ri, value);
}
-static int alle1_tlbmask(CPUARMState *env)
+int alle1_tlbmask(CPUARMState *env)
{
/*
* Note that the 'ALL' scope must invalidate both stage 1 and
* stage 2 translations, whereas most other scopes only invalidate
* stage 1 translations.
+ *
+ * For AArch32 this is only used for TLBIALLNSNH and VTTBR
+ * writes, so only needs to apply to NS PL1&0, not S PL1&0.
*/
return (ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
@@ -452,174 +427,6 @@ static int alle1_tlbmask(CPUARMState *env)
ARMMMUIdxBit_Stage2_S);
}
-
-/* IS variants of TLB operations must affect all cores */
-static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_all_cpus_synced(cs);
-}
-
-static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_all_cpus_synced(cs);
-}
-
-static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
-}
-
-static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
-}
-
-/*
- * Non-IS variants of TLB operations are upgraded to
- * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
- * force broadcast of these operations.
- */
-static bool tlb_force_broadcast(CPUARMState *env)
-{
- return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
-}
-
-static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate all (TLBIALL) */
- CPUState *cs = env_cpu(env);
-
- if (tlb_force_broadcast(env)) {
- tlb_flush_all_cpus_synced(cs);
- } else {
- tlb_flush(cs);
- }
-}
-
-static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
- CPUState *cs = env_cpu(env);
-
- value &= TARGET_PAGE_MASK;
- if (tlb_force_broadcast(env)) {
- tlb_flush_page_all_cpus_synced(cs, value);
- } else {
- tlb_flush_page(cs, value);
- }
-}
-
-static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate by ASID (TLBIASID) */
- CPUState *cs = env_cpu(env);
-
- if (tlb_force_broadcast(env)) {
- tlb_flush_all_cpus_synced(cs);
- } else {
- tlb_flush(cs);
- }
-}
-
-static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
- CPUState *cs = env_cpu(env);
-
- value &= TARGET_PAGE_MASK;
- if (tlb_force_broadcast(env)) {
- tlb_flush_page_all_cpus_synced(cs, value);
- } else {
- tlb_flush_page(cs, value);
- }
-}
-
-static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_by_mmuidx(cs, alle1_tlbmask(env));
-}
-
-static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env));
-}
-
-
-static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2);
-}
-
-static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2);
-}
-
-static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
-
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2);
-}
-
-static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
-
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_E2);
-}
-
-static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
-
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
-static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
-
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2);
-}
-
static const ARMCPRegInfo cp_reginfo[] = {
/*
* Define the secure and non-secure FCSE identifier CP registers
@@ -729,22 +536,6 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
*/
{ .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
- /*
- * MMU TLB control. Note that the wildcarding means we cover not just
- * the unified TLB ops but also the dside/iside/inner-shareable variants.
- */
- { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY,
- .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write,
- .type = ARM_CP_NO_RAW },
- { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY,
- .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write,
- .type = ARM_CP_NO_RAW },
- { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY,
- .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write,
- .type = ARM_CP_NO_RAW },
- { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY,
- .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write,
- .type = ARM_CP_NO_RAW },
{ .name = "PRRR", .cp = 15, .crn = 10, .crm = 2,
.opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NOP },
{ .name = "NMRR", .cp = 15, .crn = 10, .crm = 2,
@@ -1096,7 +887,7 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
if (el < 2 && (mdcr_el2 & MDCR_TPM)) {
return CP_ACCESS_TRAP_EL2;
@@ -2113,7 +1904,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
.accessfn = pmreg_access,
.fgt = FGT_PMCNTEN,
- .writefn = pmcntenclr_write,
+ .writefn = pmcntenclr_write, .raw_writefn = raw_write,
.type = ARM_CP_ALIAS | ARM_CP_IO },
{ .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2,
@@ -2121,7 +1912,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.fgt = FGT_PMCNTEN,
.type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
- .writefn = pmcntenclr_write },
+ .writefn = pmcntenclr_write, .raw_writefn = raw_write },
{ .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
.access = PL0_RW, .type = ARM_CP_IO,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr),
@@ -2238,16 +2029,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
{ .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
.access = PL1_RW, .accessfn = access_tpm,
.fgt = FGT_PMINTEN,
- .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
- .writefn = pmintenclr_write, },
+ .writefn = pmintenclr_write, .raw_writefn = raw_write },
{ .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2,
.access = PL1_RW, .accessfn = access_tpm,
.fgt = FGT_PMINTEN,
- .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
- .writefn = pmintenclr_write },
+ .writefn = pmintenclr_write, .raw_writefn = raw_write },
{ .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
.access = PL1_R,
@@ -2328,55 +2119,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
.fgt = FGT_ISR_EL1,
.type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read },
- /* 32 bit ITLB invalidates */
- { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiall_write },
- { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimva_write },
- { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiasid_write },
- /* 32 bit DTLB invalidates */
- { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiall_write },
- { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimva_write },
- { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiasid_write },
- /* 32 bit TLB invalidates */
- { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiall_write },
- { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimva_write },
- { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbiasid_write },
- { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimvaa_write },
-};
-
-static const ARMCPRegInfo v7mp_cp_reginfo[] = {
- /* 32 bit TLB invalidates, Inner Shareable */
- { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbiall_is_write },
- { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbimva_is_write },
- { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbiasid_is_write },
- { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbimvaa_is_write },
};
static const ARMCPRegInfo pmovsset_cp_reginfo[] = {
@@ -2423,7 +2165,7 @@ static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (arm_current_el(env) == 0 && (env->teecr & 1)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
return teecr_access(env, ri, isread);
}
@@ -2503,14 +2245,14 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri,
cntkctl = env->cp15.c14_cntkctl;
}
if (!extract32(cntkctl, 0, 2)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
break;
case 1:
if (!isread && ri->state == ARM_CP_STATE_AA32 &&
arm_is_secure_below_el3(env)) {
/* Accesses from 32-bit Secure EL1 UNDEF (*not* trap to EL3!) */
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
break;
case 2:
@@ -2519,7 +2261,7 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri,
}
if (!isread && el < arm_highest_el(env)) {
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
return CP_ACCESS_OK;
@@ -2542,7 +2284,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
/* CNT[PV]CT: not visible from PL0 if EL0[PV]CTEN is zero */
if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
/* fall through */
case 1:
@@ -2583,7 +2325,7 @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx,
* EL0 if EL0[PV]TEN is zero.
*/
if (!extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
/* fall through */
@@ -2649,7 +2391,10 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
switch (arm_current_el(env)) {
case 1:
if (!arm_is_secure(env)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_UNDEFINED;
+ }
+ if (arm_is_el2_enabled(env)) {
+ return CP_ACCESS_UNDEFINED;
}
if (!(env->cp15.scr_el3 & SCR_ST)) {
return CP_ACCESS_TRAP_EL3;
@@ -2657,7 +2402,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
return CP_ACCESS_OK;
case 0:
case 2:
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_UNDEFINED;
case 3:
return CP_ACCESS_OK;
default:
@@ -2665,6 +2410,45 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
}
}
+static CPAccessResult gt_sel2timer_access(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+ /*
+ * The AArch64 register view of the secure EL2 timers are mostly
+ * accessible from EL3 and EL2 although can also be trapped to EL2
+ * from EL1 depending on nested virt config.
+ */
+ switch (arm_current_el(env)) {
+ case 0: /* UNDEFINED */
+ return CP_ACCESS_UNDEFINED;
+ case 1:
+ if (!arm_is_secure(env)) {
+ /* UNDEFINED */
+ return CP_ACCESS_UNDEFINED;
+ } else if (arm_hcr_el2_eff(env) & HCR_NV) {
+ /* Aarch64.SystemAccessTrap(EL2, 0x18) */
+ return CP_ACCESS_TRAP_EL2;
+ }
+ /* UNDEFINED */
+ return CP_ACCESS_UNDEFINED;
+ case 2:
+ if (!arm_is_secure(env)) {
+ /* UNDEFINED */
+ return CP_ACCESS_UNDEFINED;
+ }
+ return CP_ACCESS_OK;
+ case 3:
+ if (env->cp15.scr_el3 & SCR_EEL2) {
+ return CP_ACCESS_OK;
+ } else {
+ return CP_ACCESS_UNDEFINED;
+ }
+ default:
+ g_assert_not_reached();
+ }
+}
+
uint64_t gt_get_countervalue(CPUARMState *env)
{
ARMCPU *cpu = env_archcpu(env);
@@ -2716,12 +2500,80 @@ static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env)
return 0;
}
-static uint64_t gt_phys_cnt_offset(CPUARMState *env)
+static uint64_t gt_indirect_access_timer_offset(CPUARMState *env, int timeridx)
{
- if (arm_current_el(env) >= 2) {
+ /*
+ * Return the timer offset to use for indirect accesses to the timer.
+ * This is the Offset value as defined in D12.2.4.1 "Operation of the
+ * CompareValue views of the timers".
+ *
+ * The condition here is not always the same as the condition for
+ * whether to apply an offset register when doing a direct read of
+ * the counter sysreg; those conditions are described in the
+ * access pseudocode for each counter register.
+ */
+ switch (timeridx) {
+ case GTIMER_PHYS:
+ return gt_phys_raw_cnt_offset(env);
+ case GTIMER_VIRT:
+ return env->cp15.cntvoff_el2;
+ case GTIMER_HYP:
+ case GTIMER_SEC:
+ case GTIMER_HYPVIRT:
+ case GTIMER_S_EL2_PHYS:
+ case GTIMER_S_EL2_VIRT:
return 0;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx)
+{
+ /*
+ * Return the timer offset to use for direct accesses to the
+ * counter registers CNTPCT and CNTVCT, and for direct accesses
+ * to the CNT*_TVAL registers.
+ *
+ * This isn't exactly the same as the indirect-access offset,
+ * because here we also care about what EL the register access
+ * is being made from.
+ *
+ * This corresponds to the access pseudocode for the registers.
+ */
+ uint64_t hcr;
+
+ switch (timeridx) {
+ case GTIMER_PHYS:
+ if (arm_current_el(env) >= 2) {
+ return 0;
+ }
+ return gt_phys_raw_cnt_offset(env);
+ case GTIMER_VIRT:
+ switch (arm_current_el(env)) {
+ case 2:
+ hcr = arm_hcr_el2_eff(env);
+ if (hcr & HCR_E2H) {
+ return 0;
+ }
+ break;
+ case 0:
+ hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ return 0;
+ }
+ break;
+ }
+ return env->cp15.cntvoff_el2;
+ case GTIMER_HYP:
+ case GTIMER_SEC:
+ case GTIMER_HYPVIRT:
+ case GTIMER_S_EL2_PHYS:
+ case GTIMER_S_EL2_VIRT:
+ return 0;
+ default:
+ g_assert_not_reached();
}
- return gt_phys_raw_cnt_offset(env);
}
static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
@@ -2733,8 +2585,7 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
* Timer enabled: calculate and set current ISTATUS, irq, and
* reset timer to when ISTATUS next has to change
*/
- uint64_t offset = timeridx == GTIMER_VIRT ?
- cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env);
+ uint64_t offset = gt_indirect_access_timer_offset(&cpu->env, timeridx);
uint64_t count = gt_get_countervalue(&cpu->env);
/* Note that this must be unsigned 64 bit arithmetic: */
int istatus = count - offset >= gt->cval;
@@ -2797,34 +2648,14 @@ static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri,
static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- return gt_get_countervalue(env) - gt_phys_cnt_offset(env);
-}
-
-uint64_t gt_virt_cnt_offset(CPUARMState *env)
-{
- uint64_t hcr;
-
- switch (arm_current_el(env)) {
- case 2:
- hcr = arm_hcr_el2_eff(env);
- if (hcr & HCR_E2H) {
- return 0;
- }
- break;
- case 0:
- hcr = arm_hcr_el2_eff(env);
- if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
- return 0;
- }
- break;
- }
-
- return env->cp15.cntvoff_el2;
+ uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_PHYS);
+ return gt_get_countervalue(env) - offset;
}
static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- return gt_get_countervalue(env) - gt_virt_cnt_offset(env);
+ uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT);
+ return gt_get_countervalue(env) - offset;
}
static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2836,47 +2667,38 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
gt_recalc_timer(env_archcpu(env), timeridx);
}
-static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
- int timeridx)
+static uint64_t do_tval_read(CPUARMState *env, int timeridx, uint64_t offset)
{
- uint64_t offset = 0;
-
- switch (timeridx) {
- case GTIMER_VIRT:
- case GTIMER_HYPVIRT:
- offset = gt_virt_cnt_offset(env);
- break;
- case GTIMER_PHYS:
- offset = gt_phys_cnt_offset(env);
- break;
- }
-
return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
(gt_get_countervalue(env) - offset));
}
-static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
- int timeridx,
- uint64_t value)
+static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
+ int timeridx)
{
- uint64_t offset = 0;
+ uint64_t offset = gt_direct_access_timer_offset(env, timeridx);
- switch (timeridx) {
- case GTIMER_VIRT:
- case GTIMER_HYPVIRT:
- offset = gt_virt_cnt_offset(env);
- break;
- case GTIMER_PHYS:
- offset = gt_phys_cnt_offset(env);
- break;
- }
+ return do_tval_read(env, timeridx, offset);
+}
+static void do_tval_write(CPUARMState *env, int timeridx, uint64_t value,
+ uint64_t offset)
+{
trace_arm_gt_tval_write(timeridx, value);
env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
sextract64(value, 0, 32);
gt_recalc_timer(env_archcpu(env), timeridx);
}
+static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ int timeridx,
+ uint64_t value)
+{
+ uint64_t offset = gt_direct_access_timer_offset(env, timeridx);
+
+ do_tval_write(env, timeridx, value, offset);
+}
+
static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx,
uint64_t value)
@@ -3006,13 +2828,21 @@ static void gt_virt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
static uint64_t gt_virt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- return gt_tval_read(env, ri, GTIMER_VIRT);
+ /*
+ * This is CNTV_TVAL_EL02; unlike the underlying CNTV_TVAL_EL0
+ * we always apply CNTVOFF_EL2. Special case that here rather
+ * than going into the generic gt_tval_read() and then having
+ * to re-detect that it's this register.
+ * Note that the accessfn/perms mean we know we're at EL2 or EL3 here.
+ */
+ return do_tval_read(env, GTIMER_VIRT, env->cp15.cntvoff_el2);
}
static void gt_virt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- gt_tval_write(env, ri, GTIMER_VIRT, value);
+ /* Similarly for writes to CNTV_TVAL_EL02 */
+ do_tval_write(env, GTIMER_VIRT, value, env->cp15.cntvoff_el2);
}
static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3172,6 +3002,62 @@ static void gt_sec_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
gt_ctl_write(env, ri, GTIMER_SEC, value);
}
+static void gt_sec_pel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ gt_timer_reset(env, ri, GTIMER_S_EL2_PHYS);
+}
+
+static void gt_sec_pel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_cval_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static uint64_t gt_sec_pel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return gt_tval_read(env, ri, GTIMER_S_EL2_PHYS);
+}
+
+static void gt_sec_pel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_tval_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static void gt_sec_pel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_ctl_write(env, ri, GTIMER_S_EL2_PHYS, value);
+}
+
+static void gt_sec_vel2_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ gt_timer_reset(env, ri, GTIMER_S_EL2_VIRT);
+}
+
+static void gt_sec_vel2_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_cval_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
+static uint64_t gt_sec_vel2_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return gt_tval_read(env, ri, GTIMER_S_EL2_VIRT);
+}
+
+static void gt_sec_vel2_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_tval_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
+static void gt_sec_vel2_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_ctl_write(env, ri, GTIMER_S_EL2_VIRT, value);
+}
+
static void gt_hv_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
gt_timer_reset(env, ri, GTIMER_HYPVIRT);
@@ -3228,6 +3114,20 @@ void arm_gt_stimer_cb(void *opaque)
gt_recalc_timer(cpu, GTIMER_SEC);
}
+void arm_gt_sel2timer_cb(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ gt_recalc_timer(cpu, GTIMER_S_EL2_PHYS);
+}
+
+void arm_gt_sel2vtimer_cb(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ gt_recalc_timer(cpu, GTIMER_S_EL2_VIRT);
+}
+
void arm_gt_hvtimer_cb(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -3568,7 +3468,7 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri,
}
return CP_ACCESS_TRAP_EL3;
}
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
}
return CP_ACCESS_OK;
@@ -3599,11 +3499,12 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
GetPhysAddrResult res = {};
/*
- * I_MXTJT: Granule protection checks are not performed on the final address
- * of a successful translation.
+ * I_MXTJT: Granule protection checks are not performed on the final
+ * address of a successful translation. This is a translation not a
+ * memory reference, so "memop = none = 0".
*/
- ret = get_phys_addr_with_space_nogpc(env, value, access_type, mmu_idx, ss,
- &res, &fi);
+ ret = get_phys_addr_with_space_nogpc(env, value, access_type, 0,
+ mmu_idx, ss, &res, &fi);
/*
* ATS operations only do S1 or S1+S2 translations, so we never
@@ -3775,7 +3676,11 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
/* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */
switch (el) {
case 3:
- mmu_idx = ARMMMUIdx_E3;
+ if (ri->crm == 9 && arm_pan_enabled(env)) {
+ mmu_idx = ARMMMUIdx_E30_3_PAN;
+ } else {
+ mmu_idx = ARMMMUIdx_E3;
+ }
break;
case 2:
g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
@@ -3795,7 +3700,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
/* stage 1 current state PL0: ATS1CUR, ATS1CUW */
switch (el) {
case 3:
- mmu_idx = ARMMMUIdx_E10_0;
+ mmu_idx = ARMMMUIdx_E30_0;
break;
case 2:
g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
@@ -3860,7 +3765,7 @@ static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri,
* scr_write() ensures that the NSE bit is not set otherwise.
*/
if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_UNDEFINED;
}
return CP_ACCESS_OK;
}
@@ -3870,7 +3775,7 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri,
{
if (arm_current_el(env) == 3 &&
!(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_UNDEFINED;
}
return at_e012_access(env, ri, isread);
}
@@ -4758,7 +4663,7 @@ static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
return CP_ACCESS_OK;
}
@@ -4848,9 +4753,9 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
/* Cache invalidate/clean to Point of Coherency or Persistence... */
switch (arm_current_el(env)) {
case 0:
- /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */
+ /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set. */
if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
/* fall through */
case 1:
@@ -4868,9 +4773,9 @@ static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags)
/* Cache invalidate/clean to Point of Unification... */
switch (arm_current_el(env)) {
case 0:
- /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */
+ /* ... EL0 must trap to EL1 unless SCTLR_EL1.UCI is set. */
if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
/* fall through */
case 1:
@@ -4895,489 +4800,6 @@ static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri,
return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU);
}
-/*
- * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
- * Page D4-1736 (DDI0487A.b)
- */
-
-static int vae1_tlbmask(CPUARMState *env)
-{
- uint64_t hcr = arm_hcr_el2_eff(env);
- uint16_t mask;
-
- if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
- mask = ARMMMUIdxBit_E20_2 |
- ARMMMUIdxBit_E20_2_PAN |
- ARMMMUIdxBit_E20_0;
- } else {
- mask = ARMMMUIdxBit_E10_1 |
- ARMMMUIdxBit_E10_1_PAN |
- ARMMMUIdxBit_E10_0;
- }
- return mask;
-}
-
-static int vae2_tlbmask(CPUARMState *env)
-{
- uint64_t hcr = arm_hcr_el2_eff(env);
- uint16_t mask;
-
- if (hcr & HCR_E2H) {
- mask = ARMMMUIdxBit_E20_2 |
- ARMMMUIdxBit_E20_2_PAN |
- ARMMMUIdxBit_E20_0;
- } else {
- mask = ARMMMUIdxBit_E2;
- }
- return mask;
-}
-
-/* Return 56 if TBI is enabled, 64 otherwise. */
-static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx,
- uint64_t addr)
-{
- uint64_t tcr = regime_tcr(env, mmu_idx);
- int tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
- int select = extract64(addr, 55, 1);
-
- return (tbi >> select) & 1 ? 56 : 64;
-}
-
-static int vae1_tlbbits(CPUARMState *env, uint64_t addr)
-{
- uint64_t hcr = arm_hcr_el2_eff(env);
- ARMMMUIdx mmu_idx;
-
- /* Only the regime of the mmu_idx below is significant. */
- if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
- mmu_idx = ARMMMUIdx_E20_0;
- } else {
- mmu_idx = ARMMMUIdx_E10_0;
- }
-
- return tlbbits_for_regime(env, mmu_idx, addr);
-}
-
-static int vae2_tlbbits(CPUARMState *env, uint64_t addr)
-{
- uint64_t hcr = arm_hcr_el2_eff(env);
- ARMMMUIdx mmu_idx;
-
- /*
- * Only the regime of the mmu_idx below is significant.
- * Regime EL2&0 has two ranges with separate TBI configuration, while EL2
- * only has one.
- */
- if (hcr & HCR_E2H) {
- mmu_idx = ARMMMUIdx_E20_2;
- } else {
- mmu_idx = ARMMMUIdx_E2;
- }
-
- return tlbbits_for_regime(env, mmu_idx, addr);
-}
-
-static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = vae1_tlbmask(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
-}
-
-static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = vae1_tlbmask(env);
-
- if (tlb_force_broadcast(env)) {
- tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
- } else {
- tlb_flush_by_mmuidx(cs, mask);
- }
-}
-
-static int e2_tlbmask(CPUARMState *env)
-{
- return (ARMMMUIdxBit_E20_0 |
- ARMMMUIdxBit_E20_2 |
- ARMMMUIdxBit_E20_2_PAN |
- ARMMMUIdxBit_E2);
-}
-
-static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = alle1_tlbmask(env);
-
- tlb_flush_by_mmuidx(cs, mask);
-}
-
-static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = e2_tlbmask(env);
-
- tlb_flush_by_mmuidx(cs, mask);
-}
-
-static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- ARMCPU *cpu = env_archcpu(env);
- CPUState *cs = CPU(cpu);
-
- tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3);
-}
-
-static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = alle1_tlbmask(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
-}
-
-static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = e2_tlbmask(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
-}
-
-static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3);
-}
-
-static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA, EL2
- * Currently handles both VAE2 and VALE2, since we don't support
- * flush-last-level-only.
- */
- CPUState *cs = env_cpu(env);
- int mask = vae2_tlbmask(env);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
- int bits = vae2_tlbbits(env, pageaddr);
-
- tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
-}
-
-static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA, EL3
- * Currently handles both VAE3 and VALE3, since we don't support
- * flush-last-level-only.
- */
- ARMCPU *cpu = env_archcpu(env);
- CPUState *cs = CPU(cpu);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3);
-}
-
-static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = vae1_tlbmask(env);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
- int bits = vae1_tlbbits(env, pageaddr);
-
- tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
-}
-
-static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA, EL1&0 (AArch64 version).
- * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
- * since we don't support flush-for-specific-ASID-only or
- * flush-last-level-only.
- */
- CPUState *cs = env_cpu(env);
- int mask = vae1_tlbmask(env);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
- int bits = vae1_tlbbits(env, pageaddr);
-
- if (tlb_force_broadcast(env)) {
- tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
- } else {
- tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
- }
-}
-
-static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = vae2_tlbmask(env);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
- int bits = vae2_tlbbits(env, pageaddr);
-
- tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
-}
-
-static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
- int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr);
-
- tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_E3, bits);
-}
-
-static int ipas2e1_tlbmask(CPUARMState *env, int64_t value)
-{
- /*
- * The MSB of value is the NS field, which only applies if SEL2
- * is implemented and SCR_EL3.NS is not set (i.e. in secure mode).
- */
- return (value >= 0
- && cpu_isar_feature(aa64_sel2, env_archcpu(env))
- && arm_is_secure_below_el3(env)
- ? ARMMMUIdxBit_Stage2_S
- : ARMMMUIdxBit_Stage2);
-}
-
-static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = ipas2e1_tlbmask(env, value);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
- if (tlb_force_broadcast(env)) {
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
- } else {
- tlb_flush_page_by_mmuidx(cs, pageaddr, mask);
- }
-}
-
-static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
- int mask = ipas2e1_tlbmask(env, value);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
-}
-
-#ifdef TARGET_AARCH64
-typedef struct {
- uint64_t base;
- uint64_t length;
-} TLBIRange;
-
-static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg)
-{
- /*
- * Note that the TLBI range TG field encoding differs from both
- * TG0 and TG1 encodings.
- */
- switch (tg) {
- case 1:
- return Gran4K;
- case 2:
- return Gran16K;
- case 3:
- return Gran64K;
- default:
- return GranInvalid;
- }
-}
-
-static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
- uint64_t value)
-{
- unsigned int page_size_granule, page_shift, num, scale, exponent;
- /* Extract one bit to represent the va selector in use. */
- uint64_t select = sextract64(value, 36, 1);
- ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false);
- TLBIRange ret = { };
- ARMGranuleSize gran;
-
- page_size_granule = extract64(value, 46, 2);
- gran = tlbi_range_tg_to_gran_size(page_size_granule);
-
- /* The granule encoded in value must match the granule in use. */
- if (gran != param.gran) {
- qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n",
- page_size_granule);
- return ret;
- }
-
- page_shift = arm_granule_bits(gran);
- num = extract64(value, 39, 5);
- scale = extract64(value, 44, 2);
- exponent = (5 * scale) + 1;
-
- ret.length = (num + 1) << (exponent + page_shift);
-
- if (param.select) {
- ret.base = sextract64(value, 0, 37);
- } else {
- ret.base = extract64(value, 0, 37);
- }
- if (param.ds) {
- /*
- * With DS=1, BaseADDR is always shifted 16 so that it is able
- * to address all 52 va bits. The input address is perforce
- * aligned on a 64k boundary regardless of translation granule.
- */
- page_shift = 16;
- }
- ret.base <<= page_shift;
-
- return ret;
-}
-
-static void do_rvae_write(CPUARMState *env, uint64_t value,
- int idxmap, bool synced)
-{
- ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap);
- TLBIRange range;
- int bits;
-
- range = tlbi_aa64_get_range(env, one_idx, value);
- bits = tlbbits_for_regime(env, one_idx, range.base);
-
- if (synced) {
- tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env),
- range.base,
- range.length,
- idxmap,
- bits);
- } else {
- tlb_flush_range_by_mmuidx(env_cpu(env), range.base,
- range.length, idxmap, bits);
- }
-}
-
-static void tlbi_aa64_rvae1_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, EL1&0.
- * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1,
- * since we don't support flush-for-specific-ASID-only or
- * flush-last-level-only.
- */
-
- do_rvae_write(env, value, vae1_tlbmask(env),
- tlb_force_broadcast(env));
-}
-
-static void tlbi_aa64_rvae1is_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, Inner/Outer Shareable EL1&0.
- * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS,
- * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support
- * flush-for-specific-ASID-only, flush-last-level-only or inner/outer
- * shareable specific flushes.
- */
-
- do_rvae_write(env, value, vae1_tlbmask(env), true);
-}
-
-static void tlbi_aa64_rvae2_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, EL2.
- * Currently handles all of RVAE2 and RVALE2,
- * since we don't support flush-for-specific-ASID-only or
- * flush-last-level-only.
- */
-
- do_rvae_write(env, value, vae2_tlbmask(env),
- tlb_force_broadcast(env));
-
-
-}
-
-static void tlbi_aa64_rvae2is_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, Inner/Outer Shareable, EL2.
- * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS,
- * since we don't support flush-for-specific-ASID-only,
- * flush-last-level-only or inner/outer shareable specific flushes.
- */
-
- do_rvae_write(env, value, vae2_tlbmask(env), true);
-
-}
-
-static void tlbi_aa64_rvae3_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, EL3.
- * Currently handles all of RVAE3 and RVALE3,
- * since we don't support flush-for-specific-ASID-only or
- * flush-last-level-only.
- */
-
- do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env));
-}
-
-static void tlbi_aa64_rvae3is_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /*
- * Invalidate by VA range, EL3, Inner/Outer Shareable.
- * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS,
- * since we don't support flush-for-specific-ASID-only,
- * flush-last-level-only or inner/outer specific flushes.
- */
-
- do_rvae_write(env, value, ARMMMUIdxBit_E3, true);
-}
-
-static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- do_rvae_write(env, value, ipas2e1_tlbmask(env, value),
- tlb_force_broadcast(env));
-}
-
-static void tlbi_aa64_ripas2e1is_write(CPUARMState *env,
- const ARMCPRegInfo *ri,
- uint64_t value)
-{
- do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true);
-}
-#endif
-
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@@ -5393,7 +4815,7 @@ static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
}
} else {
if (!(env->cp15.sctlr_el[1] & SCTLR_DZE)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
if (hcr & HCR_TDZ) {
return CP_ACCESS_TRAP_EL2;
@@ -5426,7 +4848,7 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
* Access to SP_EL0 is undefined if it's being used as
* the stack pointer.
*/
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
return CP_ACCESS_OK;
}
@@ -5568,7 +4990,7 @@ static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri,
mmap_lock();
- tb_invalidate_phys_range(start_address, end_address);
+ tb_invalidate_phys_range(env_cpu(env), start_address, end_address);
mmap_unlock();
}
@@ -5590,7 +5012,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
- .access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
+ .access = PL0_RW, .type = ARM_CP_FPU,
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
@@ -5672,99 +5094,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
.fgt = FGT_DCCISW,
.access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
- /* TLBI operations */
- { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVMALLE1IS,
- .writefn = tlbi_aa64_vmalle1is_write },
- { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAE1IS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIASIDE1IS,
- .writefn = tlbi_aa64_vmalle1is_write },
- { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAAE1IS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVALE1IS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAALE1IS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVMALLE1,
- .writefn = tlbi_aa64_vmalle1_write },
- { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAE1,
- .writefn = tlbi_aa64_vae1_write },
- { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIASIDE1,
- .writefn = tlbi_aa64_vmalle1_write },
- { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAAE1,
- .writefn = tlbi_aa64_vae1_write },
- { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVALE1,
- .writefn = tlbi_aa64_vae1_write },
- { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAALE1,
- .writefn = tlbi_aa64_vae1_write },
- { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1is_write },
- { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1is_write },
- { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1is_write },
- { .name = "TLBI_VMALLS12E1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1is_write },
- { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1_write },
- { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ipas2e1_write },
- { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1_write },
- { .name = "TLBI_VMALLS12E1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1is_write },
#ifndef CONFIG_USER_ONLY
/* 64 bit address translation operations */
{ .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64,
@@ -5820,42 +5149,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.par_el[1]),
.writefn = par_write },
#endif
- /* TLB invalidate last level of translation table walk */
- { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbimva_is_write },
- { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
- .writefn = tlbimvaa_is_write },
- { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimva_write },
- { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
- .writefn = tlbimvaa_write },
- { .name = "TLBIMVALH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbimva_hyp_write },
- { .name = "TLBIMVALHIS",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbimva_hyp_is_write },
- { .name = "TLBIIPAS2",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_hyp_write },
- { .name = "TLBIIPAS2IS",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2is_hyp_write },
- { .name = "TLBIIPAS2L",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_hyp_write },
- { .name = "TLBIIPAS2LIS",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2is_hyp_write },
/* 32 bit cache operations */
{ .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
.type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab },
@@ -6038,6 +5331,11 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
/* Clear RES0 bits. */
value &= valid_mask;
+ /* RW is RAO/WI if EL1 is AArch64 only */
+ if (!cpu_isar_feature(aa64_aa32_el1, cpu)) {
+ value |= HCR_RW;
+ }
+
/*
* These bits change the MMU setup:
* HCR_VM enables stage 2 translation
@@ -6095,6 +5393,12 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
do_hcr_write(env, value, MAKE_64BIT_MASK(32, 32));
}
+static void hcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* hcr_write will set the RES1 bits on an AArch64-only CPU */
+ hcr_write(env, ri, 0);
+}
+
/*
* Return the effective value of HCR_EL2, at the given security state.
* Bits that are not included here:
@@ -6216,6 +5520,14 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
if (cpu_isar_feature(aa64_nmi, cpu)) {
valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI;
}
+ /* FEAT_CMOW adds CMOW */
+ if (cpu_isar_feature(aa64_cmow, cpu)) {
+ valid_mask |= HCRX_CMOW;
+ }
+ /* FEAT_XS adds FGTnXS, FnXS */
+ if (cpu_isar_feature(aa64_xs, cpu)) {
+ valid_mask |= HCRX_FGTNXS | HCRX_FNXS;
+ }
/* Clear RES0 bits. */
env->cp15.hcrx_el2 = value & valid_mask;
@@ -6322,6 +5634,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
.nv2_redirect_offset = 0x78,
+ .resetfn = hcr_reset,
.writefn = hcr_write, .raw_writefn = raw_write },
{ .name = "HCR", .state = ARM_CP_STATE_AA32,
.type = ARM_CP_ALIAS | ARM_CP_IO,
@@ -6437,50 +5750,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2,
.access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS,
.fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) },
- { .name = "TLBIALLNSNH",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiall_nsnh_write },
- { .name = "TLBIALLNSNHIS",
- .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiall_nsnh_is_write },
- { .name = "TLBIALLH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiall_hyp_write },
- { .name = "TLBIALLHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiall_hyp_is_write },
- { .name = "TLBIMVAH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbimva_hyp_write },
- { .name = "TLBIMVAHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbimva_hyp_is_write },
- { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_alle2_write },
- { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2_write },
- { .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2_write },
- { .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_alle2is_write },
- { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2is_write },
- { .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2is_write },
#ifndef CONFIG_USER_ONLY
/*
* Unlike the other EL2-related AT operations, these must
@@ -6581,7 +5850,7 @@ static CPAccessResult sel2_access(CPUARMState *env, const ARMCPRegInfo *ri,
if (arm_current_el(env) == 3 || arm_is_secure_below_el3(env)) {
return CP_ACCESS_OK;
}
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
@@ -6595,6 +5864,56 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
.access = PL2_RW, .accessfn = sel2_access,
.nv2_redirect_offset = 0x48,
.fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) },
+#ifndef CONFIG_USER_ONLY
+ /* Secure EL2 Physical Timer */
+ { .name = "CNTHPS_TVAL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .readfn = gt_sec_pel2_tval_read,
+ .writefn = gt_sec_pel2_tval_write,
+ .resetfn = gt_sec_pel2_timer_reset,
+ },
+ { .name = "CNTHPS_CTL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 1,
+ .type = ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].ctl),
+ .resetvalue = 0,
+ .writefn = gt_sec_pel2_ctl_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTHPS_CVAL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 5, .opc2 = 2,
+ .type = ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_PHYS].cval),
+ .writefn = gt_sec_pel2_cval_write, .raw_writefn = raw_write,
+ },
+ /* Secure EL2 Virtual Timer */
+ { .name = "CNTHVS_TVAL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .readfn = gt_sec_vel2_tval_read,
+ .writefn = gt_sec_vel2_tval_write,
+ .resetfn = gt_sec_vel2_timer_reset,
+ },
+ { .name = "CNTHVS_CTL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 1,
+ .type = ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].ctl),
+ .resetvalue = 0,
+ .writefn = gt_sec_vel2_ctl_write, .raw_writefn = raw_write,
+ },
+ { .name = "CNTHVS_CVAL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 4, .opc2 = 2,
+ .type = ARM_CP_IO, .access = PL2_RW,
+ .accessfn = gt_sel2timer_access,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_S_EL2_VIRT].cval),
+ .writefn = gt_sec_vel2_cval_write, .raw_writefn = raw_write,
+ },
+#endif
};
static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6617,7 +5936,7 @@ static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
if (isread) {
return CP_ACCESS_OK;
}
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
static const ARMCPRegInfo el3_cp_reginfo[] = {
@@ -6693,30 +6012,6 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
.opc0 = 3, .opc1 = 6, .crn = 5, .crm = 1, .opc2 = 1,
.access = PL3_RW, .type = ARM_CP_CONST,
.resetvalue = 0 },
- { .name = "TLBI_ALLE3IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 0,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle3is_write },
- { .name = "TLBI_VAE3IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3is_write },
- { .name = "TLBI_VALE3IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3is_write },
- { .name = "TLBI_ALLE3", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 0,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle3_write },
- { .name = "TLBI_VAE3", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3_write },
- { .name = "TLBI_VALE3", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3_write },
};
#ifndef CONFIG_USER_ONLY
@@ -6729,7 +6024,7 @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
return CP_ACCESS_OK;
}
if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
return CP_ACCESS_OK;
}
@@ -6827,7 +6122,7 @@ static CPAccessResult el2_e2h_e12_access(CPUARMState *env,
}
/* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
- return CP_ACCESS_TRAP_UNCATEGORIZED;
+ return CP_ACCESS_UNDEFINED;
}
if (ri->orig_accessfn) {
return ri->orig_accessfn(env, ri->opaque, isread);
@@ -7004,7 +6299,7 @@ static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
}
} else {
if (!(env->cp15.sctlr_el[1] & SCTLR_UCT)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
if (hcr & HCR_TID2) {
return CP_ACCESS_TRAP_EL2;
@@ -7034,7 +6329,7 @@ static CPAccessResult access_terr(CPUARMState *env, const ARMCPRegInfo *ri,
if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TERR)) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && (env->cp15.scr_el3 & SCR_TERR)) {
+ if (!arm_is_el3_or_mon(env) && (env->cp15.scr_el3 & SCR_TERR)) {
return CP_ACCESS_TRAP_EL3;
}
return CP_ACCESS_OK;
@@ -7232,7 +6527,7 @@ uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
if (el <= 1 && !el_is_in_host(env, el)) {
len = MIN(len, 0xf & (uint32_t)cr[1]);
}
- if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+ if (el <= 2 && arm_is_el2_enabled(env)) {
len = MIN(len, 0xf & (uint32_t)cr[2]);
}
if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -7294,7 +6589,6 @@ static const ARMCPRegInfo zcr_reginfo[] = {
.writefn = zcr_write, .raw_writefn = raw_write },
};
-#ifdef TARGET_AARCH64
static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@@ -7303,7 +6597,7 @@ static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri,
if (el == 0) {
uint64_t sctlr = arm_sctlr(env, el);
if (!(sctlr & SCTLR_EnTP2)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
}
/* TODO: FEAT_FGT */
@@ -7344,7 +6638,7 @@ static void arm_reset_sve_state(CPUARMState *env)
memset(env->vfp.zregs, 0, sizeof(env->vfp.zregs));
/* Recall that FFR is stored as pregs[16]. */
memset(env->vfp.pregs, 0, sizeof(env->vfp.pregs));
- vfp_set_fpcr(env, 0x0800009f);
+ vfp_set_fpsr(env, 0x0800009f);
}
void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
@@ -7459,14 +6753,6 @@ static const ARMCPRegInfo sme_reginfo[] = {
.type = ARM_CP_CONST, .resetvalue = 0 },
};
-static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush(cs);
-}
-
static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
@@ -7484,14 +6770,6 @@ static void gpccr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
env_archcpu(env)->reset_l0gptsz);
}
-static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPUState *cs = env_cpu(env);
-
- tlb_flush_all_cpus_synced(cs);
-}
-
static const ARMCPRegInfo rme_reginfo[] = {
{ .name = "GPCCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 6,
@@ -7503,28 +6781,6 @@ static const ARMCPRegInfo rme_reginfo[] = {
{ .name = "MFAR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 5,
.access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mfar_el3) },
- { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_paall_write },
- { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_paallos_write },
- /*
- * QEMU does not have a way to invalidate by physical address, thus
- * invalidating a range of physical addresses is accomplished by
- * flushing all tlb entries in the outer shareable domain,
- * just like PAALLOS.
- */
- { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_paallos_write },
- { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_paallos_write },
{ .name = "DC_CIPAPA", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 1,
.access = PL3_W, .type = ARM_CP_NOP },
@@ -7566,7 +6822,6 @@ static const ARMCPRegInfo nmi_reginfo[] = {
.writefn = aa64_allint_write, .readfn = aa64_allint_read,
.resetfn = arm_cp_reset_ignore },
};
-#endif /* TARGET_AARCH64 */
static void define_pmu_regs(ARMCPU *cpu)
{
@@ -7677,7 +6932,7 @@ static void define_pmu_regs(ARMCPU *cpu)
static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = env_archcpu(env);
- uint64_t pfr1 = cpu->isar.id_pfr1;
+ uint64_t pfr1 = GET_IDREG(&cpu->isar, ID_PFR1);
if (env->gicv3state) {
pfr1 |= 1 << 28;
@@ -7688,7 +6943,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = env_archcpu(env);
- uint64_t pfr0 = cpu->isar.id_aa64pfr0;
+ uint64_t pfr0 = GET_IDREG(&cpu->isar, ID_AA64PFR0);
if (env->gicv3state) {
pfr0 |= 1 << 24;
@@ -7719,8 +6974,8 @@ static CPAccessResult access_lor_other(CPUARMState *env,
const ARMCPRegInfo *ri, bool isread)
{
if (arm_is_secure_below_el3(env)) {
- /* Access denied in secure mode. */
- return CP_ACCESS_TRAP;
+ /* UNDEF if SCR_EL3.NS == 0 */
+ return CP_ACCESS_UNDEFINED;
}
return access_lor_ns(env, ri, isread);
}
@@ -7758,7 +7013,6 @@ static const ARMCPRegInfo lor_reginfo[] = {
.type = ARM_CP_CONST, .resetvalue = 0 },
};
-#ifdef TARGET_AARCH64
static CPAccessResult access_pauth(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@@ -7830,210 +7084,6 @@ static const ARMCPRegInfo pauth_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, keys.apib.hi) },
};
-static const ARMCPRegInfo tlbirange_reginfo[] = {
- { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAE1IS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAAE1IS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVALE1IS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAALE1IS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAE1OS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAAE1OS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVALE1OS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAALE1OS,
- .writefn = tlbi_aa64_rvae1is_write },
- { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAE1,
- .writefn = tlbi_aa64_rvae1_write },
- { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAAE1,
- .writefn = tlbi_aa64_rvae1_write },
- { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVALE1,
- .writefn = tlbi_aa64_rvae1_write },
- { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIRVAALE1,
- .writefn = tlbi_aa64_rvae1_write },
- { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ripas2e1is_write },
- { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ripas2e1is_write },
- { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2is_write },
- { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2is_write },
- { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ripas2e1_write },
- { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_ripas2e1_write },
- { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2is_write },
- { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2is_write },
- { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2_write },
- { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_rvae2_write },
- { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3is_write },
- { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3is_write },
- { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3is_write },
- { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3is_write },
- { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3_write },
- { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_rvae3_write },
-};
-
-static const ARMCPRegInfo tlbios_reginfo[] = {
- { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVMALLE1OS,
- .writefn = tlbi_aa64_vmalle1is_write },
- { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1,
- .fgt = FGT_TLBIVAE1OS,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIASIDE1OS,
- .writefn = tlbi_aa64_vmalle1is_write },
- { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAAE1OS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVALE1OS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7,
- .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
- .fgt = FGT_TLBIVAALE1OS,
- .writefn = tlbi_aa64_vae1is_write },
- { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_alle2is_write },
- { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2is_write },
- { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1is_write },
- { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
- .writefn = tlbi_aa64_vae2is_write },
- { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle1is_write },
- { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0,
- .access = PL2_W, .type = ARM_CP_NOP },
- { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3,
- .access = PL2_W, .type = ARM_CP_NOP },
- { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4,
- .access = PL2_W, .type = ARM_CP_NOP },
- { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7,
- .access = PL2_W, .type = ARM_CP_NOP },
- { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_alle3is_write },
- { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3is_write },
- { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64,
- .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5,
- .access = PL3_W, .type = ARM_CP_NO_RAW,
- .writefn = tlbi_aa64_vae3is_write },
-};
-
static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
{
Error *err = NULL;
@@ -8345,7 +7395,7 @@ static CPAccessResult access_scxtnum(CPUARMState *env, const ARMCPRegInfo *ri,
if (hcr & HCR_TGE) {
return CP_ACCESS_TRAP_EL2;
}
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
} else if (el < 2 && (env->cp15.sctlr_el[2] & SCTLR_TSCXT)) {
return CP_ACCESS_TRAP_EL2;
@@ -8455,8 +7505,6 @@ static const ARMCPRegInfo nv2_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) },
};
-#endif /* TARGET_AARCH64 */
-
static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
@@ -8465,7 +7513,7 @@ static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
if (el == 0) {
uint64_t sctlr = arm_sctlr(env, el);
if (!(sctlr & SCTLR_EnRCTX)) {
- return CP_ACCESS_TRAP;
+ return CP_ACCESS_TRAP_EL1;
}
} else if (el == 1) {
uint64_t hcr = arm_hcr_el2_eff(env);
@@ -8702,6 +7750,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
{
/* Register all the coprocessor registers based on feature bits */
CPUARMState *env = &cpu->env;
+ ARMISARegisters *isar = &cpu->isar;
+
if (arm_feature(env, ARM_FEATURE_M)) {
/* M profile has no coprocessor registers */
return;
@@ -8716,6 +7766,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_arm_cp_regs(cpu, not_v8_cp_reginfo);
}
+#ifndef CONFIG_USER_ONLY
+ define_tlb_insn_regs(cpu);
+#endif
+
if (arm_feature(env, ARM_FEATURE_V6)) {
/* The ID registers all have impdef reset values */
ARMCPRegInfo v6_idregs[] = {
@@ -8723,7 +7777,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_pfr0 },
+ .resetvalue = GET_IDREG(isar, ID_PFR0)},
/*
* ID_PFR1 is not a plain ARM_CP_CONST because we don't know
* the value of the GIC field until after we define these regs.
@@ -8734,7 +7788,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.accessfn = access_aa32_tid3,
#ifdef CONFIG_USER_ONLY
.type = ARM_CP_CONST,
- .resetvalue = cpu->isar.id_pfr1,
+ .resetvalue = GET_IDREG(isar, ID_PFR1),
#else
.type = ARM_CP_NO_RAW,
.accessfn = access_aa32_tid3,
@@ -8746,7 +7800,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_dfr0 },
+ .resetvalue = GET_IDREG(isar, ID_DFR0)},
{ .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -8756,62 +7810,62 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_mmfr0 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR0)},
{ .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_mmfr1 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR1)},
{ .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_mmfr2 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR2)},
{ .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_mmfr3 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR3)},
{ .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar0 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR0)},
{ .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar1 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR1)},
{ .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar2 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR2)},
{ .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar3 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR3) },
{ .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar4 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR4) },
{ .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar5 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR5) },
{ .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_mmfr4 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR4)},
{ .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa32_tid3,
- .resetvalue = cpu->isar.id_isar6 },
+ .resetvalue = GET_IDREG(isar, ID_ISAR6) },
};
define_arm_cp_regs(cpu, v6_idregs);
define_arm_cp_regs(cpu, v6_cp_reginfo);
@@ -8821,10 +7875,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (arm_feature(env, ARM_FEATURE_V6K)) {
define_arm_cp_regs(cpu, v6k_cp_reginfo);
}
- if (arm_feature(env, ARM_FEATURE_V7MP) &&
- !arm_feature(env, ARM_FEATURE_PMSA)) {
- define_arm_cp_regs(cpu, v7mp_cp_reginfo);
- }
if (arm_feature(env, ARM_FEATURE_V7VE)) {
define_arm_cp_regs(cpu, pmovsset_cp_reginfo);
}
@@ -8866,7 +7916,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.access = PL1_R,
#ifdef CONFIG_USER_ONLY
.type = ARM_CP_CONST,
- .resetvalue = cpu->isar.id_aa64pfr0
+ .resetvalue = GET_IDREG(isar, ID_AA64PFR0)
#else
.type = ARM_CP_NO_RAW,
.accessfn = access_aa64_tid3,
@@ -8878,7 +7928,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64pfr1},
+ .resetvalue = GET_IDREG(isar, ID_AA64PFR1)},
{ .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -8893,12 +7943,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64zfr0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64ZFR0)},
{ .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64smfr0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64SMFR0)},
{ .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -8913,12 +7963,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64dfr0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64DFR0) },
{ .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64dfr1 },
+ .resetvalue = GET_IDREG(isar, ID_AA64DFR1) },
{ .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -8953,17 +8003,17 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64isar0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64ISAR0)},
{ .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64isar1 },
+ .resetvalue = GET_IDREG(isar, ID_AA64ISAR1)},
{ .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64isar2 },
+ .resetvalue = GET_IDREG(isar, ID_AA64ISAR2)},
{ .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -8993,22 +8043,22 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64mmfr0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64MMFR0)},
{ .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64mmfr1 },
+ .resetvalue = GET_IDREG(isar, ID_AA64MMFR1) },
{ .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64mmfr2 },
+ .resetvalue = GET_IDREG(isar, ID_AA64MMFR2) },
{ .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_aa64mmfr3 },
+ .resetvalue = GET_IDREG(isar, ID_AA64MMFR3) },
{ .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -9080,17 +8130,17 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_pfr2 },
+ .resetvalue = GET_IDREG(isar, ID_PFR2)},
{ .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_dfr1 },
+ .resetvalue = GET_IDREG(isar, ID_DFR1)},
{ .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = cpu->isar.id_mmfr5 },
+ .resetvalue = GET_IDREG(isar, ID_MMFR5)},
{ .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -9899,7 +8949,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo);
}
-#ifdef TARGET_AARCH64
if (cpu_isar_feature(aa64_sme, cpu)) {
define_arm_cp_regs(cpu, sme_reginfo);
}
@@ -9909,12 +8958,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (cpu_isar_feature(aa64_rndr, cpu)) {
define_arm_cp_regs(cpu, rndr_reginfo);
}
- if (cpu_isar_feature(aa64_tlbirange, cpu)) {
- define_arm_cp_regs(cpu, tlbirange_reginfo);
- }
- if (cpu_isar_feature(aa64_tlbios, cpu)) {
- define_arm_cp_regs(cpu, tlbios_reginfo);
- }
/* Data Cache clean instructions up to PoP */
if (cpu_isar_feature(aa64_dcpop, cpu)) {
define_one_arm_cp_reg(cpu, dcpop_reg);
@@ -9966,7 +9009,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (cpu_isar_feature(aa64_nmi, cpu)) {
define_arm_cp_regs(cpu, nmi_reginfo);
}
-#endif
if (cpu_isar_feature(any_predinv, cpu)) {
define_arm_cp_regs(cpu, predinv_reginfo);
@@ -10327,6 +9369,31 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
if (r->state != state && r->state != ARM_CP_STATE_BOTH) {
continue;
}
+ if ((r->type & ARM_CP_ADD_TLBI_NXS) &&
+ cpu_isar_feature(aa64_xs, cpu)) {
+ /*
+ * This is a TLBI insn which has an NXS variant. The
+ * NXS variant is at the same encoding except that
+ * crn is +1, and has the same behaviour except for
+ * fine-grained trapping. Add the NXS insn here and
+ * then fall through to add the normal register.
+ * add_cpreg_to_hashtable() copies the cpreg struct
+ * and name that it is passed, so it's OK to use
+ * a local struct here.
+ */
+ ARMCPRegInfo nxs_ri = *r;
+ g_autofree char *name = g_strdup_printf("%sNXS", r->name);
+
+ assert(state == ARM_CP_STATE_AA64);
+ assert(nxs_ri.crn < 0xf);
+ nxs_ri.crn++;
+ if (nxs_ri.fgt) {
+ nxs_ri.fgt |= R_FGT_NXS_MASK;
+ }
+ add_cpreg_to_hashtable(cpu, &nxs_ri, opaque, state,
+ ARM_CP_SECSTATE_NS,
+ crm, opc1, opc2, name);
+ }
if (state == ARM_CP_STATE_AA32) {
/*
* Under AArch32 CP registers can be common
@@ -10765,7 +9832,7 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
uint64_t hcr_el2;
if (arm_feature(env, ARM_FEATURE_EL3)) {
- rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
+ rw = arm_scr_rw_eff(env);
} else {
/*
* Either EL2 is the highest EL (and so the EL2 register width
@@ -10840,6 +9907,7 @@ void arm_log_exception(CPUState *cs)
[EXCP_NMI] = "NMI",
[EXCP_VINMI] = "Virtual IRQ NMI",
[EXCP_VFNMI] = "Virtual FIQ NMI",
+ [EXCP_MON_TRAP] = "Monitor Trap",
};
if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -11406,6 +10474,16 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
mask = CPSR_A | CPSR_I | CPSR_F;
offset = 0;
break;
+ case EXCP_MON_TRAP:
+ new_mode = ARM_CPU_MODE_MON;
+ addr = 0x04;
+ mask = CPSR_A | CPSR_I | CPSR_F;
+ if (env->thumb) {
+ offset = 2;
+ } else {
+ offset = 4;
+ }
+ break;
default:
cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
return; /* Never happens. Keep compiler happy. */
@@ -11539,7 +10617,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
unsigned int new_el = env->exception.target_el;
- target_ulong addr = env->cp15.vbar_el[new_el];
+ vaddr addr = env->cp15.vbar_el[new_el];
unsigned int new_mode = aarch64_pstate_mode(new_el, true);
unsigned int old_mode;
unsigned int cur_el = arm_current_el(env);
@@ -11563,7 +10641,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
switch (new_el) {
case 3:
- is_aa64 = (env->cp15.scr_el3 & SCR_RW) != 0;
+ is_aa64 = arm_scr_rw_eff(env);
break;
case 2:
hcr = arm_hcr_el2_eff(env);
@@ -11861,10 +10939,20 @@ void arm_cpu_do_interrupt(CPUState *cs)
uint64_t arm_sctlr(CPUARMState *env, int el)
{
- /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */
+ /* Only EL0 needs to be adjusted for EL1&0 or EL2&0 or EL3&0 */
if (el == 0) {
ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
- el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ el = 2;
+ break;
+ case ARMMMUIdx_E30_0:
+ el = 3;
+ break;
+ default:
+ el = 1;
+ break;
+ }
}
return env->cp15.sctlr_el[el];
}
@@ -12128,289 +11216,6 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
};
}
-/*
- * Note that signed overflow is undefined in C. The following routines are
- * careful to use unsigned types where modulo arithmetic is required.
- * Failure to do so _will_ break on newer gcc.
- */
-
-/* Signed saturating arithmetic. */
-
-/* Perform 16-bit signed saturating addition. */
-static inline uint16_t add16_sat(uint16_t a, uint16_t b)
-{
- uint16_t res;
-
- res = a + b;
- if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
- if (a & 0x8000) {
- res = 0x8000;
- } else {
- res = 0x7fff;
- }
- }
- return res;
-}
-
-/* Perform 8-bit signed saturating addition. */
-static inline uint8_t add8_sat(uint8_t a, uint8_t b)
-{
- uint8_t res;
-
- res = a + b;
- if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
- if (a & 0x80) {
- res = 0x80;
- } else {
- res = 0x7f;
- }
- }
- return res;
-}
-
-/* Perform 16-bit signed saturating subtraction. */
-static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
-{
- uint16_t res;
-
- res = a - b;
- if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
- if (a & 0x8000) {
- res = 0x8000;
- } else {
- res = 0x7fff;
- }
- }
- return res;
-}
-
-/* Perform 8-bit signed saturating subtraction. */
-static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
-{
- uint8_t res;
-
- res = a - b;
- if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
- if (a & 0x80) {
- res = 0x80;
- } else {
- res = 0x7f;
- }
- }
- return res;
-}
-
-#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
-#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
-#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
-#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
-#define PFX q
-
-#include "op_addsub.h"
-
-/* Unsigned saturating arithmetic. */
-static inline uint16_t add16_usat(uint16_t a, uint16_t b)
-{
- uint16_t res;
- res = a + b;
- if (res < a) {
- res = 0xffff;
- }
- return res;
-}
-
-static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
-{
- if (a > b) {
- return a - b;
- } else {
- return 0;
- }
-}
-
-static inline uint8_t add8_usat(uint8_t a, uint8_t b)
-{
- uint8_t res;
- res = a + b;
- if (res < a) {
- res = 0xff;
- }
- return res;
-}
-
-static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
-{
- if (a > b) {
- return a - b;
- } else {
- return 0;
- }
-}
-
-#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
-#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
-#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
-#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
-#define PFX uq
-
-#include "op_addsub.h"
-
-/* Signed modulo arithmetic. */
-#define SARITH16(a, b, n, op) do { \
- int32_t sum; \
- sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
- RESULT(sum, n, 16); \
- if (sum >= 0) \
- ge |= 3 << (n * 2); \
- } while (0)
-
-#define SARITH8(a, b, n, op) do { \
- int32_t sum; \
- sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
- RESULT(sum, n, 8); \
- if (sum >= 0) \
- ge |= 1 << n; \
- } while (0)
-
-
-#define ADD16(a, b, n) SARITH16(a, b, n, +)
-#define SUB16(a, b, n) SARITH16(a, b, n, -)
-#define ADD8(a, b, n) SARITH8(a, b, n, +)
-#define SUB8(a, b, n) SARITH8(a, b, n, -)
-#define PFX s
-#define ARITH_GE
-
-#include "op_addsub.h"
-
-/* Unsigned modulo arithmetic. */
-#define ADD16(a, b, n) do { \
- uint32_t sum; \
- sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
- RESULT(sum, n, 16); \
- if ((sum >> 16) == 1) \
- ge |= 3 << (n * 2); \
- } while (0)
-
-#define ADD8(a, b, n) do { \
- uint32_t sum; \
- sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
- RESULT(sum, n, 8); \
- if ((sum >> 8) == 1) \
- ge |= 1 << n; \
- } while (0)
-
-#define SUB16(a, b, n) do { \
- uint32_t sum; \
- sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
- RESULT(sum, n, 16); \
- if ((sum >> 16) == 0) \
- ge |= 3 << (n * 2); \
- } while (0)
-
-#define SUB8(a, b, n) do { \
- uint32_t sum; \
- sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
- RESULT(sum, n, 8); \
- if ((sum >> 8) == 0) \
- ge |= 1 << n; \
- } while (0)
-
-#define PFX u
-#define ARITH_GE
-
-#include "op_addsub.h"
-
-/* Halved signed arithmetic. */
-#define ADD16(a, b, n) \
- RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
-#define SUB16(a, b, n) \
- RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
-#define ADD8(a, b, n) \
- RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
-#define SUB8(a, b, n) \
- RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
-#define PFX sh
-
-#include "op_addsub.h"
-
-/* Halved unsigned arithmetic. */
-#define ADD16(a, b, n) \
- RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
-#define SUB16(a, b, n) \
- RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
-#define ADD8(a, b, n) \
- RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
-#define SUB8(a, b, n) \
- RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
-#define PFX uh
-
-#include "op_addsub.h"
-
-static inline uint8_t do_usad(uint8_t a, uint8_t b)
-{
- if (a > b) {
- return a - b;
- } else {
- return b - a;
- }
-}
-
-/* Unsigned sum of absolute byte differences. */
-uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
-{
- uint32_t sum;
- sum = do_usad(a, b);
- sum += do_usad(a >> 8, b >> 8);
- sum += do_usad(a >> 16, b >> 16);
- sum += do_usad(a >> 24, b >> 24);
- return sum;
-}
-
-/* For ARMv6 SEL instruction. */
-uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
-{
- uint32_t mask;
-
- mask = 0;
- if (flags & 1) {
- mask |= 0xff;
- }
- if (flags & 2) {
- mask |= 0xff00;
- }
- if (flags & 4) {
- mask |= 0xff0000;
- }
- if (flags & 8) {
- mask |= 0xff000000;
- }
- return (a & mask) | (b & ~mask);
-}
-
-/*
- * CRC helpers.
- * The upper bytes of val (above the number specified by 'bytes') must have
- * been zeroed out by the caller.
- */
-uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
-{
- uint8_t buf[4];
-
- stl_le_p(buf, val);
-
- /* zlib crc32 converts the accumulator and output to one's complement. */
- return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
-}
-
-uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
-{
- uint8_t buf[4];
-
- stl_le_p(buf, val);
-
- /* Linux crc32c converts the output to one's complement. */
- return crc32c(acc, buf, bytes) ^ 0xffffffff;
-}
/*
* Return the exception level to which FP-disabled exceptions should
@@ -12532,6 +11337,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
switch (mmu_idx) {
case ARMMMUIdx_E10_0:
case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E30_0:
return 0;
case ARMMMUIdx_E10_1:
case ARMMMUIdx_E10_1_PAN:
@@ -12541,6 +11347,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
case ARMMMUIdx_E20_2_PAN:
return 2;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_3_PAN:
return 3;
default:
g_assert_not_reached();
@@ -12569,6 +11376,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
hcr = arm_hcr_el2_eff(env);
if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
idx = ARMMMUIdx_E20_0;
+ } else if (arm_is_secure_below_el3(env) &&
+ !arm_el_is_aa64(env, 3)) {
+ idx = ARMMMUIdx_E30_0;
} else {
idx = ARMMMUIdx_E10_0;
}
@@ -12593,6 +11403,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
}
break;
case 3:
+ if (!arm_el_is_aa64(env, 3) && arm_pan_enabled(env)) {
+ return ARMMMUIdx_E30_3_PAN;
+ }
return ARMMMUIdx_E3;
default:
g_assert_not_reached();
@@ -12606,116 +11419,6 @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env)
return arm_mmu_idx_el(env, arm_current_el(env));
}
-static bool mve_no_pred(CPUARMState *env)
-{
- /*
- * Return true if there is definitely no predication of MVE
- * instructions by VPR or LTPSIZE. (Returning false even if there
- * isn't any predication is OK; generated code will just be
- * a little worse.)
- * If the CPU does not implement MVE then this TB flag is always 0.
- *
- * NOTE: if you change this logic, the "recalculate s->mve_no_pred"
- * logic in gen_update_fp_context() needs to be updated to match.
- *
- * We do not include the effect of the ECI bits here -- they are
- * tracked in other TB flags. This simplifies the logic for
- * "when did we emit code that changes the MVE_NO_PRED TB flag
- * and thus need to end the TB?".
- */
- if (cpu_isar_feature(aa32_mve, env_archcpu(env))) {
- return false;
- }
- if (env->v7m.vpr) {
- return false;
- }
- if (env->v7m.ltpsize < 4) {
- return false;
- }
- return true;
-}
-
-void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
- uint64_t *cs_base, uint32_t *pflags)
-{
- CPUARMTBFlags flags;
-
- assert_hflags_rebuild_correctly(env);
- flags = env->hflags;
-
- if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) {
- *pc = env->pc;
- if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
- DP_TBFLAG_A64(flags, BTYPE, env->btype);
- }
- } else {
- *pc = env->regs[15];
-
- if (arm_feature(env, ARM_FEATURE_M)) {
- if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
- FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
- != env->v7m.secure) {
- DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1);
- }
-
- if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
- (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
- (env->v7m.secure &&
- !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
- /*
- * ASPEN is set, but FPCA/SFPA indicate that there is no
- * active FP context; we must create a new FP context before
- * executing any FP insn.
- */
- DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1);
- }
-
- bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
- if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
- DP_TBFLAG_M32(flags, LSPACT, 1);
- }
-
- if (mve_no_pred(env)) {
- DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
- }
- } else {
- /*
- * Note that XSCALE_CPAR shares bits with VECSTRIDE.
- * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
- */
- if (arm_feature(env, ARM_FEATURE_XSCALE)) {
- DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar);
- } else {
- DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
- DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
- }
- if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
- DP_TBFLAG_A32(flags, VFPEN, 1);
- }
- }
-
- DP_TBFLAG_AM32(flags, THUMB, env->thumb);
- DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits);
- }
-
- /*
- * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
- * states defined in the ARM ARM for software singlestep:
- * SS_ACTIVE PSTATE.SS State
- * 0 x Inactive (the TB flag for SS is always 0)
- * 1 0 Active-pending
- * 1 1 Active-not-pending
- * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB.
- */
- if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) {
- DP_TBFLAG_ANY(flags, PSTATE__SS, 1);
- }
-
- *pflags = flags.flags;
- *cs_base = flags.flags2;
-}
-
-#ifdef TARGET_AARCH64
/*
* The manual says that when SVE is enabled and VQ is widened the
* implementation is allowed to zero the previously inaccessible
@@ -12830,7 +11533,6 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
aarch64_sve_narrow_vq(env, new_len + 1);
}
}
-#endif
#ifndef CONFIG_USER_ONLY
ARMSecuritySpace arm_security_space(CPUARMState *env)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index eca2043..f340a49 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1,1093 +1,6 @@
-DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
-DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
+/* SPDX-License-Identifier: GPL-2.0-or-later */
-DEF_HELPER_3(add_setq, i32, env, i32, i32)
-DEF_HELPER_3(add_saturate, i32, env, i32, i32)
-DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
-DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
-DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32)
-DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32)
-DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32)
-
-#define PAS_OP(pfx) \
- DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
- DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \
- DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \
- DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \
- DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \
- DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr)
-
-PAS_OP(s)
-PAS_OP(u)
-#undef PAS_OP
-
-#define PAS_OP(pfx) \
- DEF_HELPER_2(pfx ## add8, i32, i32, i32) \
- DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \
- DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \
- DEF_HELPER_2(pfx ## add16, i32, i32, i32) \
- DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \
- DEF_HELPER_2(pfx ## subaddx, i32, i32, i32)
-PAS_OP(q)
-PAS_OP(sh)
-PAS_OP(uq)
-PAS_OP(uh)
-#undef PAS_OP
-
-DEF_HELPER_3(ssat, i32, env, i32, i32)
-DEF_HELPER_3(usat, i32, env, i32, i32)
-DEF_HELPER_3(ssat16, i32, env, i32, i32)
-DEF_HELPER_3(usat16, i32, env, i32, i32)
-
-DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
-
-DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
- i32, i32, i32, i32)
-DEF_HELPER_2(exception_internal, noreturn, env, i32)
-DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32)
-DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32)
-DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32)
-DEF_HELPER_2(exception_swstep, noreturn, env, i32)
-DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl)
-DEF_HELPER_1(setend, void, env)
-DEF_HELPER_2(wfi, void, env, i32)
-DEF_HELPER_1(wfe, void, env)
-DEF_HELPER_2(wfit, void, env, i64)
-DEF_HELPER_1(yield, void, env)
-DEF_HELPER_1(pre_hvc, void, env)
-DEF_HELPER_2(pre_smc, void, env, i32)
-DEF_HELPER_1(vesb, void, env)
-
-DEF_HELPER_3(cpsr_write, void, env, i32, i32)
-DEF_HELPER_2(cpsr_write_eret, void, env, i32)
-DEF_HELPER_1(cpsr_read, i32, env)
-
-DEF_HELPER_3(v7m_msr, void, env, i32, i32)
-DEF_HELPER_2(v7m_mrs, i32, env, i32)
-
-DEF_HELPER_2(v7m_bxns, void, env, i32)
-DEF_HELPER_2(v7m_blxns, void, env, i32)
-
-DEF_HELPER_3(v7m_tt, i32, env, i32, i32)
-
-DEF_HELPER_1(v7m_preserve_fp_state, void, env)
-
-DEF_HELPER_2(v7m_vlstm, void, env, i32)
-DEF_HELPER_2(v7m_vlldm, void, env, i32)
-
-DEF_HELPER_2(v8m_stackcheck, void, env, i32)
-
-DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32)
-
-DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32)
-DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32)
-DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_3(set_cp_reg, void, env, cptr, i32)
-DEF_HELPER_2(get_cp_reg, i32, env, cptr)
-DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64)
-DEF_HELPER_2(get_cp_reg64, i64, env, cptr)
-
-DEF_HELPER_2(get_r13_banked, i32, env, i32)
-DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
-
-DEF_HELPER_3(mrs_banked, i32, env, i32, i32)
-DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
-
-DEF_HELPER_2(get_user_reg, i32, env, i32)
-DEF_HELPER_3(set_user_reg, void, env, i32, i32)
-
-DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int)
-DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
-DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
-
-DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
-
-DEF_HELPER_1(vfp_get_fpscr, i32, env)
-DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
-
-DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
-DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
-DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
-DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
-DEF_HELPER_2(vfp_sqrth, f16, f16, env)
-DEF_HELPER_2(vfp_sqrts, f32, f32, env)
-DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
-DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
-DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
-DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
-DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env)
-DEF_HELPER_3(vfp_cmpes, void, f32, f32, env)
-DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
-
-DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
-DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
-DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr)
-DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr)
-
-DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
-DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
-DEF_HELPER_2(vfp_uitod, f64, i32, ptr)
-DEF_HELPER_2(vfp_sitoh, f16, i32, ptr)
-DEF_HELPER_2(vfp_sitos, f32, i32, ptr)
-DEF_HELPER_2(vfp_sitod, f64, i32, ptr)
-
-DEF_HELPER_2(vfp_touih, i32, f16, ptr)
-DEF_HELPER_2(vfp_touis, i32, f32, ptr)
-DEF_HELPER_2(vfp_touid, i32, f64, ptr)
-DEF_HELPER_2(vfp_touizh, i32, f16, ptr)
-DEF_HELPER_2(vfp_touizs, i32, f32, ptr)
-DEF_HELPER_2(vfp_touizd, i32, f64, ptr)
-DEF_HELPER_2(vfp_tosih, s32, f16, ptr)
-DEF_HELPER_2(vfp_tosis, s32, f32, ptr)
-DEF_HELPER_2(vfp_tosid, s32, f64, ptr)
-DEF_HELPER_2(vfp_tosizh, s32, f16, ptr)
-DEF_HELPER_2(vfp_tosizs, s32, f32, ptr)
-DEF_HELPER_2(vfp_tosizd, s32, f64, ptr)
-
-DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
-DEF_HELPER_3(vfp_touqh, i64, f16, i32, ptr)
-DEF_HELPER_3(vfp_tosqh, i64, f16, i32, ptr)
-DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)
-DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr)
-DEF_HELPER_3(vfp_touqs, i64, f32, i32, ptr)
-DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_tosqd, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_touqd, i64, f64, i32, ptr)
-DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_sqtos, f32, i64, i32, ptr)
-DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_uqtos, f32, i64, i32, ptr)
-DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_shtoh, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr)
-DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)
-
-DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, ptr)
-DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, ptr)
-DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, ptr)
-DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, ptr)
-
-DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
-
-DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32)
-DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32)
-DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, ptr, i32)
-DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
-
-DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
-DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
-DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
-
-DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
-DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
-DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
-DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64)
-
-DEF_HELPER_3(shl_cc, i32, env, i32, i32)
-DEF_HELPER_3(shr_cc, i32, env, i32, i32)
-DEF_HELPER_3(sar_cc, i32, env, i32, i32)
-DEF_HELPER_3(ror_cc, i32, env, i32, i32)
-
-DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
-DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
-DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
-
-DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env)
-DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr)
-
-DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32)
-
-/* neon_helper.c */
-DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
-DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
-DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
-DEF_HELPER_2(neon_pmin_s16, i32, i32, i32)
-DEF_HELPER_2(neon_pmax_u8, i32, i32, i32)
-DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
-DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
-DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
-
-DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_rshl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_rshl_s64, i64, i64, i64)
-DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
-DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_2(neon_add_u8, i32, i32, i32)
-DEF_HELPER_2(neon_add_u16, i32, i32, i32)
-DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
-DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
-DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-
-DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
-DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
-DEF_HELPER_2(neon_tst_u32, i32, i32, i32)
-
-DEF_HELPER_1(neon_clz_u8, i32, i32)
-DEF_HELPER_1(neon_clz_u16, i32, i32)
-DEF_HELPER_1(neon_cls_s8, i32, i32)
-DEF_HELPER_1(neon_cls_s16, i32, i32)
-DEF_HELPER_1(neon_cls_s32, i32, i32)
-DEF_HELPER_1(neon_cnt_u8, i32, i32)
-DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32)
-
-DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
-DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32)
-DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32)
-DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
-DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32)
-DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32)
-
-DEF_HELPER_1(neon_narrow_u8, i32, i64)
-DEF_HELPER_1(neon_narrow_u16, i32, i64)
-DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
-DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
-DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
-DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
-DEF_HELPER_1(neon_narrow_high_u16, i32, i64)
-DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64)
-DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64)
-DEF_HELPER_1(neon_widen_u8, i64, i32)
-DEF_HELPER_1(neon_widen_s8, i64, i32)
-DEF_HELPER_1(neon_widen_u16, i64, i32)
-DEF_HELPER_1(neon_widen_s16, i64, i32)
-
-DEF_HELPER_2(neon_addl_u16, i64, i64, i64)
-DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
-DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
-DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
-DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
-DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
-DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
-DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
-DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
-DEF_HELPER_2(neon_abdl_s16, i64, i32, i32)
-DEF_HELPER_2(neon_abdl_u32, i64, i32, i32)
-DEF_HELPER_2(neon_abdl_s32, i64, i32, i32)
-DEF_HELPER_2(neon_abdl_u64, i64, i32, i32)
-DEF_HELPER_2(neon_abdl_s64, i64, i32, i32)
-DEF_HELPER_2(neon_mull_u8, i64, i32, i32)
-DEF_HELPER_2(neon_mull_s8, i64, i32, i32)
-DEF_HELPER_2(neon_mull_u16, i64, i32, i32)
-DEF_HELPER_2(neon_mull_s16, i64, i32, i32)
-
-DEF_HELPER_1(neon_negl_u16, i64, i64)
-DEF_HELPER_1(neon_negl_u32, i64, i64)
-
-DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
-DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
-
-DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr)
-DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr)
-DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr)
-
-/* iwmmxt_helper.c */
-DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64)
-DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64)
-DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64)
-
-#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \
-DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \
-
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackl)
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackh)
-
-DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(mins)
-DEF_IWMMXT_HELPER_SIZE_ENV(minu)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxs)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxu)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(subn)
-DEF_IWMMXT_HELPER_SIZE_ENV(addn)
-DEF_IWMMXT_HELPER_SIZE_ENV(subu)
-DEF_IWMMXT_HELPER_SIZE_ENV(addu)
-DEF_IWMMXT_HELPER_SIZE_ENV(subs)
-DEF_IWMMXT_HELPER_SIZE_ENV(adds)
-
-DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
-
-DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
-DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
-
-DEF_HELPER_1(iwmmxt_bcstb, i64, i32)
-DEF_HELPER_1(iwmmxt_bcstw, i64, i32)
-DEF_HELPER_1(iwmmxt_bcstl, i64, i32)
-
-DEF_HELPER_1(iwmmxt_addcb, i64, i64)
-DEF_HELPER_1(iwmmxt_addcw, i64, i64)
-DEF_HELPER_1(iwmmxt_addcl, i64, i64)
-
-DEF_HELPER_1(iwmmxt_msbb, i32, i64)
-DEF_HELPER_1(iwmmxt_msbw, i32, i64)
-DEF_HELPER_1(iwmmxt_msbl, i32, i64)
-
-DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32)
-
-DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64)
-
-DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32)
-DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32)
-DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32)
-
-DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
-
-DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-
-DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
-
-DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+#include "tcg/helper.h"
#ifdef TARGET_AARCH64
#include "tcg/helper-a64.h"
diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c
new file mode 100644
index 0000000..ff13726
--- /dev/null
+++ b/target/arm/hvf-stub.c
@@ -0,0 +1,20 @@
+/*
+ * QEMU Hypervisor.framework (HVF) stubs for ARM
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hvf_arm.h"
+
+uint32_t hvf_arm_get_default_ipa_bit_size(void)
+{
+ g_assert_not_reached();
+}
+
+uint32_t hvf_arm_get_max_ipa_bit_size(void)
+{
+ g_assert_not_reached();
+}
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index ef9bc42..7b6d291 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -11,26 +11,30 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qemu/log.h"
-#include "sysemu/runstate.h"
-#include "sysemu/hvf.h"
-#include "sysemu/hvf_int.h"
-#include "sysemu/hw_accel.h"
+#include "system/runstate.h"
+#include "system/hvf.h"
+#include "system/hvf_int.h"
+#include "system/hw_accel.h"
#include "hvf_arm.h"
#include "cpregs.h"
+#include "cpu-sysregs.h"
#include <mach/mach_time.h>
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
+#include "hw/boards.h"
#include "hw/irq.h"
#include "qemu/main-loop.h"
-#include "sysemu/cpus.h"
+#include "system/cpus.h"
#include "arm-powerctl.h"
#include "target/arm/cpu.h"
#include "target/arm/internals.h"
#include "target/arm/multiprocessing.h"
#include "target/arm/gtimer.h"
-#include "trace/trace-target_arm_hvf.h"
+#include "trace.h"
#include "migration/vmstate.h"
#include "gdbstub/enums.h"
@@ -183,6 +187,7 @@ void hvf_arm_init_debug(void)
#define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4)
#define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4)
#define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1)
+#define SYSREG_CNTP_CTL_EL0 SYSREG(3, 3, 14, 2, 1)
#define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0)
#define SYSREG_PMUSERENR_EL0 SYSREG(3, 3, 9, 14, 0)
#define SYSREG_PMCNTENSET_EL0 SYSREG(3, 3, 9, 12, 1)
@@ -297,6 +302,8 @@ void hvf_arm_init_debug(void)
static void hvf_wfi(CPUState *cpu);
+static uint32_t chosen_ipa_bit_size;
+
typedef struct HVFVTimer {
/* Vtimer value during migration and paused state */
uint64_t vtimer_val;
@@ -839,6 +846,19 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt)
return val;
}
+static void clamp_id_aa64mmfr0_parange_to_ipa_size(ARMISARegisters *isar)
+{
+ uint32_t ipa_size = chosen_ipa_bit_size ?
+ chosen_ipa_bit_size : hvf_arm_get_max_ipa_bit_size();
+ uint64_t id_aa64mmfr0;
+
+ /* Clamp down the PARange to the IPA size the kernel supports. */
+ uint8_t index = round_down_to_parange_index(ipa_size);
+ id_aa64mmfr0 = GET_IDREG(isar, ID_AA64MMFR0);
+ id_aa64mmfr0 = (id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index;
+ SET_IDREG(isar, ID_AA64MMFR0, id_aa64mmfr0);
+}
+
static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
{
ARMISARegisters host_isar = {};
@@ -846,16 +866,16 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
int reg;
uint64_t *val;
} regs[] = {
- { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 },
- { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 },
- { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 },
- { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 },
- { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 },
- { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 },
+ { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.idregs[ID_AA64PFR0_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.idregs[ID_AA64PFR1_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.idregs[ID_AA64DFR0_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.idregs[ID_AA64DFR1_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.idregs[ID_AA64ISAR0_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.idregs[ID_AA64ISAR1_EL1_IDX] },
/* Add ID_AA64ISAR2_EL1 here when HVF supports it */
- { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
- { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
- { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
+ { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.idregs[ID_AA64MMFR0_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.idregs[ID_AA64MMFR1_EL1_IDX] },
+ { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.idregs[ID_AA64MMFR2_EL1_IDX] },
/* Add ID_AA64MMFR3_EL1 here when HVF supports it */
};
hv_vcpu_t fd;
@@ -863,7 +883,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
hv_vcpu_exit_t *exit;
int i;
- ahcf->dtb_compatible = "arm,arm-v8";
+ ahcf->dtb_compatible = "arm,armv8";
ahcf->features = (1ULL << ARM_FEATURE_V8) |
(1ULL << ARM_FEATURE_NEON) |
(1ULL << ARM_FEATURE_AARCH64) |
@@ -882,6 +902,21 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr);
r |= hv_vcpu_destroy(fd);
+ clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar);
+
+ /*
+ * Disable SME, which is not properly handled by QEMU hvf yet.
+ * To allow this through we would need to:
+ * - make sure that the SME state is correctly handled in the
+ * get_registers/put_registers functions
+ * - get the SME-specific CPU properties to work with accelerators
+ * other than TCG
+ * - fix any assumptions we made that SME implies SVE (since
+ * on the M4 there is SME but not SVE)
+ */
+ SET_IDREG(&host_isar, ID_AA64PFR1,
+ GET_IDREG(&host_isar, ID_AA64PFR1) & ~R_ID_AA64PFR1_SME_MASK);
+
ahcf->isar = host_isar;
/*
@@ -897,13 +932,37 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
ahcf->reset_sctlr |= 0x00800000;
/* Make sure we don't advertise AArch32 support for EL0/EL1 */
- if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) {
+ if ((GET_IDREG(&host_isar, ID_AA64PFR0) & 0xff) != 0x11) {
return false;
}
return r == HV_SUCCESS;
}
+uint32_t hvf_arm_get_default_ipa_bit_size(void)
+{
+ uint32_t default_ipa_size;
+ hv_return_t ret = hv_vm_config_get_default_ipa_size(&default_ipa_size);
+ assert_hvf_ok(ret);
+
+ return default_ipa_size;
+}
+
+uint32_t hvf_arm_get_max_ipa_bit_size(void)
+{
+ uint32_t max_ipa_size;
+ hv_return_t ret = hv_vm_config_get_max_ipa_size(&max_ipa_size);
+ assert_hvf_ok(ret);
+
+ /*
+ * We clamp any IPA size we want to back the VM with to a valid PARange
+ * value so the guest doesn't try and map memory outside of the valid range.
+ * This logic just clamps the passed in IPA bit size to the first valid
+ * PARange value <= to it.
+ */
+ return round_down_to_parange_bit_size(max_ipa_size);
+}
+
void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
{
if (!arm_host_cpu_features.dtb_compatible) {
@@ -929,6 +988,25 @@ void hvf_arch_vcpu_destroy(CPUState *cpu)
{
}
+hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range)
+{
+ hv_return_t ret;
+ hv_vm_config_t config = hv_vm_config_create();
+
+ ret = hv_vm_config_set_ipa_size(config, pa_range);
+ if (ret != HV_SUCCESS) {
+ goto cleanup;
+ }
+ chosen_ipa_bit_size = pa_range;
+
+ ret = hv_vm_create(config);
+
+cleanup:
+ os_release(config);
+
+ return ret;
+}
+
int hvf_arch_init_vcpu(CPUState *cpu)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -992,7 +1070,12 @@ int hvf_arch_init_vcpu(CPUState *cpu)
/* We're limited to underlying hardware caps, override internal versions */
ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1,
- &arm_cpu->isar.id_aa64mmfr0);
+ &arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]);
+ assert_hvf_ok(ret);
+
+ clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar);
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1,
+ arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]);
assert_hvf_ok(ret);
return 0;
@@ -1005,13 +1088,13 @@ void hvf_kick_vcpu_thread(CPUState *cpu)
}
static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
- uint32_t syndrome)
+ uint32_t syndrome, int target_el)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
CPUARMState *env = &arm_cpu->env;
cpu->exception_index = excp;
- env->exception.target_el = 1;
+ env->exception.target_el = target_el;
env->exception.syndrome = syndrome;
arm_cpu_do_interrupt(cpu);
@@ -1199,57 +1282,61 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
return false;
}
-static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
+static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
CPUARMState *env = &arm_cpu->env;
- uint64_t val = 0;
+
+ if (arm_feature(env, ARM_FEATURE_PMU)) {
+ switch (reg) {
+ case SYSREG_PMCR_EL0:
+ *val = env->cp15.c9_pmcr;
+ return 0;
+ case SYSREG_PMCCNTR_EL0:
+ pmu_op_start(env);
+ *val = env->cp15.c15_ccnt;
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMCNTENCLR_EL0:
+ *val = env->cp15.c9_pmcnten;
+ return 0;
+ case SYSREG_PMOVSCLR_EL0:
+ *val = env->cp15.c9_pmovsr;
+ return 0;
+ case SYSREG_PMSELR_EL0:
+ *val = env->cp15.c9_pmselr;
+ return 0;
+ case SYSREG_PMINTENCLR_EL1:
+ *val = env->cp15.c9_pminten;
+ return 0;
+ case SYSREG_PMCCFILTR_EL0:
+ *val = env->cp15.pmccfiltr_el0;
+ return 0;
+ case SYSREG_PMCNTENSET_EL0:
+ *val = env->cp15.c9_pmcnten;
+ return 0;
+ case SYSREG_PMUSERENR_EL0:
+ *val = env->cp15.c9_pmuserenr;
+ return 0;
+ case SYSREG_PMCEID0_EL0:
+ case SYSREG_PMCEID1_EL0:
+ /* We can't really count anything yet, declare all events invalid */
+ *val = 0;
+ return 0;
+ }
+ }
switch (reg) {
case SYSREG_CNTPCT_EL0:
- val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
+ *val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
gt_cntfrq_period_ns(arm_cpu);
- break;
- case SYSREG_PMCR_EL0:
- val = env->cp15.c9_pmcr;
- break;
- case SYSREG_PMCCNTR_EL0:
- pmu_op_start(env);
- val = env->cp15.c15_ccnt;
- pmu_op_finish(env);
- break;
- case SYSREG_PMCNTENCLR_EL0:
- val = env->cp15.c9_pmcnten;
- break;
- case SYSREG_PMOVSCLR_EL0:
- val = env->cp15.c9_pmovsr;
- break;
- case SYSREG_PMSELR_EL0:
- val = env->cp15.c9_pmselr;
- break;
- case SYSREG_PMINTENCLR_EL1:
- val = env->cp15.c9_pminten;
- break;
- case SYSREG_PMCCFILTR_EL0:
- val = env->cp15.pmccfiltr_el0;
- break;
- case SYSREG_PMCNTENSET_EL0:
- val = env->cp15.c9_pmcnten;
- break;
- case SYSREG_PMUSERENR_EL0:
- val = env->cp15.c9_pmuserenr;
- break;
- case SYSREG_PMCEID0_EL0:
- case SYSREG_PMCEID1_EL0:
- /* We can't really count anything yet, declare all events invalid */
- val = 0;
- break;
+ return 0;
case SYSREG_OSLSR_EL1:
- val = env->cp15.oslsr_el1;
- break;
+ *val = env->cp15.oslsr_el1;
+ return 0;
case SYSREG_OSDLR_EL1:
/* Dummy register */
- break;
+ return 0;
case SYSREG_ICC_AP0R0_EL1:
case SYSREG_ICC_AP0R1_EL1:
case SYSREG_ICC_AP0R2_EL1:
@@ -1276,8 +1363,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
case SYSREG_ICC_SRE_EL1:
case SYSREG_ICC_CTLR_EL1:
/* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
- if (!hvf_sysreg_read_cp(cpu, reg, &val)) {
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ if (hvf_sysreg_read_cp(cpu, reg, val)) {
+ return 0;
}
break;
case SYSREG_DBGBVR0_EL1:
@@ -1296,8 +1383,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
case SYSREG_DBGBVR13_EL1:
case SYSREG_DBGBVR14_EL1:
case SYSREG_DBGBVR15_EL1:
- val = env->cp15.dbgbvr[SYSREG_CRM(reg)];
- break;
+ *val = env->cp15.dbgbvr[SYSREG_CRM(reg)];
+ return 0;
case SYSREG_DBGBCR0_EL1:
case SYSREG_DBGBCR1_EL1:
case SYSREG_DBGBCR2_EL1:
@@ -1314,8 +1401,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
case SYSREG_DBGBCR13_EL1:
case SYSREG_DBGBCR14_EL1:
case SYSREG_DBGBCR15_EL1:
- val = env->cp15.dbgbcr[SYSREG_CRM(reg)];
- break;
+ *val = env->cp15.dbgbcr[SYSREG_CRM(reg)];
+ return 0;
case SYSREG_DBGWVR0_EL1:
case SYSREG_DBGWVR1_EL1:
case SYSREG_DBGWVR2_EL1:
@@ -1332,8 +1419,8 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
case SYSREG_DBGWVR13_EL1:
case SYSREG_DBGWVR14_EL1:
case SYSREG_DBGWVR15_EL1:
- val = env->cp15.dbgwvr[SYSREG_CRM(reg)];
- break;
+ *val = env->cp15.dbgwvr[SYSREG_CRM(reg)];
+ return 0;
case SYSREG_DBGWCR0_EL1:
case SYSREG_DBGWCR1_EL1:
case SYSREG_DBGWCR2_EL1:
@@ -1350,35 +1437,25 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
case SYSREG_DBGWCR13_EL1:
case SYSREG_DBGWCR14_EL1:
case SYSREG_DBGWCR15_EL1:
- val = env->cp15.dbgwcr[SYSREG_CRM(reg)];
- break;
+ *val = env->cp15.dbgwcr[SYSREG_CRM(reg)];
+ return 0;
default:
if (is_id_sysreg(reg)) {
/* ID system registers read as RES0 */
- val = 0;
- break;
+ *val = 0;
+ return 0;
}
- cpu_synchronize_state(cpu);
- trace_hvf_unhandled_sysreg_read(env->pc, reg,
- SYSREG_OP0(reg),
- SYSREG_OP1(reg),
- SYSREG_CRN(reg),
- SYSREG_CRM(reg),
- SYSREG_OP2(reg));
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
- return 1;
}
- trace_hvf_sysreg_read(reg,
- SYSREG_OP0(reg),
- SYSREG_OP1(reg),
- SYSREG_CRN(reg),
- SYSREG_CRM(reg),
- SYSREG_OP2(reg),
- val);
- hvf_set_reg(cpu, rt, val);
-
- return 0;
+ cpu_synchronize_state(cpu);
+ trace_hvf_unhandled_sysreg_read(env->pc, reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg));
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1);
+ return 1;
}
static void pmu_update_irq(CPUARMState *env)
@@ -1497,70 +1574,82 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
SYSREG_OP2(reg),
val);
- switch (reg) {
- case SYSREG_PMCCNTR_EL0:
- pmu_op_start(env);
- env->cp15.c15_ccnt = val;
- pmu_op_finish(env);
- break;
- case SYSREG_PMCR_EL0:
- pmu_op_start(env);
-
- if (val & PMCRC) {
- /* The counter has been reset */
- env->cp15.c15_ccnt = 0;
- }
+ if (arm_feature(env, ARM_FEATURE_PMU)) {
+ switch (reg) {
+ case SYSREG_PMCCNTR_EL0:
+ pmu_op_start(env);
+ env->cp15.c15_ccnt = val;
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMCR_EL0:
+ pmu_op_start(env);
+
+ if (val & PMCRC) {
+ /* The counter has been reset */
+ env->cp15.c15_ccnt = 0;
+ }
- if (val & PMCRP) {
- unsigned int i;
- for (i = 0; i < pmu_num_counters(env); i++) {
- env->cp15.c14_pmevcntr[i] = 0;
+ if (val & PMCRP) {
+ unsigned int i;
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ env->cp15.c14_pmevcntr[i] = 0;
+ }
}
- }
- env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
- env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK);
+ env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
+ env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK);
+
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMUSERENR_EL0:
+ env->cp15.c9_pmuserenr = val & 0xf;
+ return 0;
+ case SYSREG_PMCNTENSET_EL0:
+ env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env));
+ return 0;
+ case SYSREG_PMCNTENCLR_EL0:
+ env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env));
+ return 0;
+ case SYSREG_PMINTENCLR_EL1:
+ pmu_op_start(env);
+ env->cp15.c9_pminten |= val;
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMOVSCLR_EL0:
+ pmu_op_start(env);
+ env->cp15.c9_pmovsr &= ~val;
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMSWINC_EL0:
+ pmu_op_start(env);
+ pmswinc_write(env, val);
+ pmu_op_finish(env);
+ return 0;
+ case SYSREG_PMSELR_EL0:
+ env->cp15.c9_pmselr = val & 0x1f;
+ return 0;
+ case SYSREG_PMCCFILTR_EL0:
+ pmu_op_start(env);
+ env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0;
+ pmu_op_finish(env);
+ return 0;
+ }
+ }
- pmu_op_finish(env);
- break;
- case SYSREG_PMUSERENR_EL0:
- env->cp15.c9_pmuserenr = val & 0xf;
- break;
- case SYSREG_PMCNTENSET_EL0:
- env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env));
- break;
- case SYSREG_PMCNTENCLR_EL0:
- env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env));
- break;
- case SYSREG_PMINTENCLR_EL1:
- pmu_op_start(env);
- env->cp15.c9_pminten |= val;
- pmu_op_finish(env);
- break;
- case SYSREG_PMOVSCLR_EL0:
- pmu_op_start(env);
- env->cp15.c9_pmovsr &= ~val;
- pmu_op_finish(env);
- break;
- case SYSREG_PMSWINC_EL0:
- pmu_op_start(env);
- pmswinc_write(env, val);
- pmu_op_finish(env);
- break;
- case SYSREG_PMSELR_EL0:
- env->cp15.c9_pmselr = val & 0x1f;
- break;
- case SYSREG_PMCCFILTR_EL0:
- pmu_op_start(env);
- env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0;
- pmu_op_finish(env);
- break;
+ switch (reg) {
case SYSREG_OSLAR_EL1:
env->cp15.oslsr_el1 = val & 1;
- break;
+ return 0;
+ case SYSREG_CNTP_CTL_EL0:
+ /*
+ * Guests should not rely on the physical counter, but macOS emits
+ * disable writes to it. Let it do so, but ignore the requests.
+ */
+ qemu_log_mask(LOG_UNIMP, "Unsupported write to CNTP_CTL_EL0\n");
+ return 0;
case SYSREG_OSDLR_EL1:
/* Dummy register */
- break;
+ return 0;
case SYSREG_ICC_AP0R0_EL1:
case SYSREG_ICC_AP0R1_EL1:
case SYSREG_ICC_AP0R2_EL1:
@@ -1587,13 +1676,13 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
case SYSREG_ICC_SGI1R_EL1:
case SYSREG_ICC_SRE_EL1:
/* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
- if (!hvf_sysreg_write_cp(cpu, reg, val)) {
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ if (hvf_sysreg_write_cp(cpu, reg, val)) {
+ return 0;
}
break;
case SYSREG_MDSCR_EL1:
env->cp15.mdscr_el1 = val;
- break;
+ return 0;
case SYSREG_DBGBVR0_EL1:
case SYSREG_DBGBVR1_EL1:
case SYSREG_DBGBVR2_EL1:
@@ -1611,7 +1700,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
case SYSREG_DBGBVR14_EL1:
case SYSREG_DBGBVR15_EL1:
env->cp15.dbgbvr[SYSREG_CRM(reg)] = val;
- break;
+ return 0;
case SYSREG_DBGBCR0_EL1:
case SYSREG_DBGBCR1_EL1:
case SYSREG_DBGBCR2_EL1:
@@ -1629,7 +1718,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
case SYSREG_DBGBCR14_EL1:
case SYSREG_DBGBCR15_EL1:
env->cp15.dbgbcr[SYSREG_CRM(reg)] = val;
- break;
+ return 0;
case SYSREG_DBGWVR0_EL1:
case SYSREG_DBGWVR1_EL1:
case SYSREG_DBGWVR2_EL1:
@@ -1647,7 +1736,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
case SYSREG_DBGWVR14_EL1:
case SYSREG_DBGWVR15_EL1:
env->cp15.dbgwvr[SYSREG_CRM(reg)] = val;
- break;
+ return 0;
case SYSREG_DBGWCR0_EL1:
case SYSREG_DBGWCR1_EL1:
case SYSREG_DBGWCR2_EL1:
@@ -1665,20 +1754,18 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
case SYSREG_DBGWCR14_EL1:
case SYSREG_DBGWCR15_EL1:
env->cp15.dbgwcr[SYSREG_CRM(reg)] = val;
- break;
- default:
- cpu_synchronize_state(cpu);
- trace_hvf_unhandled_sysreg_write(env->pc, reg,
- SYSREG_OP0(reg),
- SYSREG_OP1(reg),
- SYSREG_CRN(reg),
- SYSREG_CRM(reg),
- SYSREG_OP2(reg));
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
- return 1;
+ return 0;
}
- return 0;
+ cpu_synchronize_state(cpu);
+ trace_hvf_unhandled_sysreg_write(env->pc, reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg));
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1);
+ return 1;
}
static int hvf_inject_interrupts(CPUState *cpu)
@@ -1828,7 +1915,17 @@ int hvf_vcpu_exec(CPUState *cpu)
flush_cpu_state(cpu);
bql_unlock();
- assert_hvf_ok(hv_vcpu_run(cpu->accel->fd));
+ r = hv_vcpu_run(cpu->accel->fd);
+ bql_lock();
+ switch (r) {
+ case HV_SUCCESS:
+ break;
+ case HV_ILLEGAL_GUEST_STATE:
+ trace_hvf_illegal_guest_state();
+ /* fall through */
+ default:
+ g_assert_not_reached();
+ }
/* handle VMEXIT */
uint64_t exit_reason = hvf_exit->reason;
@@ -1836,7 +1933,6 @@ int hvf_vcpu_exec(CPUState *cpu)
uint32_t ec = syn_get_ec(syndrome);
ret = 0;
- bql_lock();
switch (exit_reason) {
case HV_EXIT_REASON_EXCEPTION:
/* This is the main one, handle below. */
@@ -1871,7 +1967,7 @@ int hvf_vcpu_exec(CPUState *cpu)
if (!hvf_find_sw_breakpoint(cpu, env->pc)) {
/* Re-inject into the guest */
ret = 0;
- hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0));
+ hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0), 1);
}
break;
}
@@ -1902,6 +1998,7 @@ int hvf_vcpu_exec(CPUState *cpu)
bool isv = syndrome & ARM_EL_ISV;
bool iswrite = (syndrome >> 6) & 1;
bool s1ptw = (syndrome >> 7) & 1;
+ bool sse = (syndrome >> 21) & 1;
uint32_t sas = (syndrome >> 22) & 3;
uint32_t len = 1 << sas;
uint32_t srt = (syndrome >> 16) & 0x1f;
@@ -1929,6 +2026,9 @@ int hvf_vcpu_exec(CPUState *cpu)
address_space_read(&address_space_memory,
hvf_exit->exception.physical_address,
MEMTXATTRS_UNSPECIFIED, &val, len);
+ if (sse) {
+ val = sextract64(val, 0, len * 8);
+ }
hvf_set_reg(cpu, srt, val);
}
@@ -1943,7 +2043,17 @@ int hvf_vcpu_exec(CPUState *cpu)
int sysreg_ret = 0;
if (isread) {
- sysreg_ret = hvf_sysreg_read(cpu, reg, rt);
+ sysreg_ret = hvf_sysreg_read(cpu, reg, &val);
+ if (!sysreg_ret) {
+ trace_hvf_sysreg_read(reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg),
+ val);
+ hvf_set_reg(cpu, rt, val);
+ }
} else {
val = hvf_get_reg(cpu, rt);
sysreg_ret = hvf_sysreg_write(cpu, reg, val);
@@ -1962,13 +2072,13 @@ int hvf_vcpu_exec(CPUState *cpu)
cpu_synchronize_state(cpu);
if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) {
if (!hvf_handle_psci_call(cpu)) {
- trace_hvf_unknown_hvc(env->xregs[0]);
+ trace_hvf_unknown_hvc(env->pc, env->xregs[0]);
/* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
env->xregs[0] = -1;
}
} else {
- trace_hvf_unknown_hvc(env->xregs[0]);
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ trace_hvf_unknown_hvc(env->pc, env->xregs[0]);
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1);
}
break;
case EC_AA64_SMC:
@@ -1983,7 +2093,7 @@ int hvf_vcpu_exec(CPUState *cpu)
}
} else {
trace_hvf_unknown_smc(env->xregs[0]);
- hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1);
}
break;
default:
@@ -2182,28 +2292,23 @@ static inline bool hvf_arm_hw_debug_active(CPUState *cpu)
return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
}
-static void hvf_arch_set_traps(void)
+static void hvf_arch_set_traps(CPUState *cpu)
{
- CPUState *cpu;
bool should_enable_traps = false;
hv_return_t r = HV_SUCCESS;
/* Check whether guest debugging is enabled for at least one vCPU; if it
* is, enable exiting the guest on all vCPUs */
- CPU_FOREACH(cpu) {
- should_enable_traps |= cpu->accel->guest_debug_enabled;
- }
- CPU_FOREACH(cpu) {
- /* Set whether debug exceptions exit the guest */
- r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd,
- should_enable_traps);
- assert_hvf_ok(r);
+ should_enable_traps |= cpu->accel->guest_debug_enabled;
+ /* Set whether debug exceptions exit the guest */
+ r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd,
+ should_enable_traps);
+ assert_hvf_ok(r);
- /* Set whether accesses to debug registers exit the guest */
- r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd,
- should_enable_traps);
- assert_hvf_ok(r);
- }
+ /* Set whether accesses to debug registers exit the guest */
+ r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd,
+ should_enable_traps);
+ assert_hvf_ok(r);
}
void hvf_arch_update_guest_debug(CPUState *cpu)
@@ -2244,7 +2349,7 @@ void hvf_arch_update_guest_debug(CPUState *cpu)
deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0);
}
- hvf_arch_set_traps();
+ hvf_arch_set_traps(cpu);
}
bool hvf_arch_supports_guest_debug(void)
diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
index 4fbbe4b..b49746f 100644
--- a/target/arm/hvf/trace-events
+++ b/target/arm/hvf/trace-events
@@ -5,9 +5,10 @@ hvf_inject_irq(void) "injecting IRQ"
hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64
hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")"
-hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
+hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64
hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
-hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
+hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpuid=0x%x"
hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]"
hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]"
+hvf_illegal_guest_state(void) "HV_ILLEGAL_GUEST_STATE"
diff --git a/target/arm/hvf/trace.h b/target/arm/hvf/trace.h
new file mode 100644
index 0000000..04a19c1
--- /dev/null
+++ b/target/arm/hvf/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-target_arm_hvf.h"
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
index e848c1d..ea82f26 100644
--- a/target/arm/hvf_arm.h
+++ b/target/arm/hvf_arm.h
@@ -11,7 +11,7 @@
#ifndef QEMU_HVF_ARM_H
#define QEMU_HVF_ARM_H
-#include "cpu.h"
+#include "target/arm/cpu-qom.h"
/**
* hvf_arm_init_debug() - initialize guest debug capabilities
@@ -22,4 +22,7 @@ void hvf_arm_init_debug(void);
void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu);
+uint32_t hvf_arm_get_default_ipa_bit_size(void);
+uint32_t hvf_arm_get_max_ipa_bit_size(void);
+
#endif
diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c
index f120d55..bb59697 100644
--- a/target/arm/hyp_gdbstub.c
+++ b/target/arm/hyp_gdbstub.c
@@ -54,7 +54,7 @@ GArray *hw_breakpoints, *hw_watchpoints;
* here so future PC comparisons will work properly.
*/
-int insert_hw_breakpoint(target_ulong addr)
+int insert_hw_breakpoint(vaddr addr)
{
HWBreakpoint brk = {
.bcr = 0x1, /* BCR E=1, enable */
@@ -80,7 +80,7 @@ int insert_hw_breakpoint(target_ulong addr)
* Delete a breakpoint and shuffle any above down
*/
-int delete_hw_breakpoint(target_ulong pc)
+int delete_hw_breakpoint(vaddr pc)
{
int i;
for (i = 0; i < hw_breakpoints->len; i++) {
@@ -125,7 +125,7 @@ int delete_hw_breakpoint(target_ulong pc)
* need to ensure you mask the address as required and set BAS=0xff
*/
-int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type)
+int insert_hw_watchpoint(vaddr addr, vaddr len, int type)
{
HWWatchpoint wp = {
.wcr = R_DBGWCR_E_MASK, /* E=1, enable */
@@ -158,7 +158,6 @@ int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type)
break;
default:
g_assert_not_reached();
- break;
}
if (len <= 8) {
/* we align the address and set the bits in BAS */
@@ -183,7 +182,7 @@ int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type)
return 0;
}
-bool check_watchpoint_in_range(int i, target_ulong addr)
+bool check_watchpoint_in_range(int i, vaddr addr)
{
HWWatchpoint *wp = get_hw_wp(i);
uint64_t addr_top, addr_bottom = wp->wvr;
@@ -215,7 +214,7 @@ bool check_watchpoint_in_range(int i, target_ulong addr)
* Delete a breakpoint and shuffle any above down
*/
-int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type)
+int delete_hw_watchpoint(vaddr addr, vaddr len, int type)
{
int i;
for (i = 0; i < cur_hw_wps; i++) {
@@ -227,7 +226,7 @@ int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type)
return -ENOENT;
}
-bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
+bool find_hw_breakpoint(CPUState *cpu, vaddr pc)
{
int i;
@@ -240,7 +239,7 @@ bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
return false;
}
-CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
+CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr)
{
int i;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 11b5da2..21a8d67 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -25,9 +25,13 @@
#ifndef TARGET_ARM_INTERNALS_H
#define TARGET_ARM_INTERNALS_H
+#include "exec/hwaddr.h"
+#include "exec/vaddr.h"
#include "exec/breakpoint.h"
+#include "accel/tcg/tb-cpu-state.h"
#include "hw/registerfields.h"
#include "tcg/tcg-gvec-desc.h"
+#include "system/memory.h"
#include "syndrome.h"
#include "cpu-features.h"
@@ -350,20 +354,30 @@ static inline int r14_bank_number(int mode)
}
void arm_cpu_register(const ARMCPUInfo *info);
-void aarch64_cpu_register(const ARMCPUInfo *info);
void register_cp_regs_for_features(ARMCPU *cpu);
void init_cpreg_list(ARMCPU *cpu);
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
void arm_translate_init(void);
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
+
+void arm_cpu_register_gdb_commands(ARMCPU *cpu);
+void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
+ GPtrArray *, GPtrArray *);
void arm_restore_state_to_opc(CPUState *cs,
const TranslationBlock *tb,
const uint64_t *data);
#ifdef CONFIG_TCG
+TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs);
void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+
+/* Our implementation of TCGCPUOps::cpu_exec_halt */
+bool arm_cpu_exec_halt(CPUState *cs);
+int arm_cpu_mmu_index(CPUState *cs, bool ifetch);
#endif /* CONFIG_TCG */
typedef enum ARMFPRounding {
@@ -383,6 +397,141 @@ static inline FloatRoundMode arm_rmode_to_sf(ARMFPRounding rmode)
return arm_rmode_to_sf_map[rmode];
}
+/* Return the effective value of SCR_EL3.RW */
+static inline bool arm_scr_rw_eff(CPUARMState *env)
+{
+ /*
+ * SCR_EL3.RW has an effective value of 1 if:
+ * - we are NS and EL2 is implemented but doesn't support AArch32
+ * - we are S and EL2 is enabled (in which case it must be AArch64)
+ */
+ ARMCPU *cpu = env_archcpu(env);
+
+ if (env->cp15.scr_el3 & SCR_RW) {
+ return true;
+ }
+ if (env->cp15.scr_el3 & SCR_NS) {
+ return arm_feature(env, ARM_FEATURE_EL2) &&
+ !cpu_isar_feature(aa64_aa32_el2, cpu);
+ } else {
+ return env->cp15.scr_el3 & SCR_EEL2;
+ }
+}
+
+/* Return true if the specified exception level is running in AArch64 state. */
+static inline bool arm_el_is_aa64(CPUARMState *env, int el)
+{
+ /*
+ * This isn't valid for EL0 (if we're in EL0, is_a64() is what you want,
+ * and if we're not in EL0 then the state of EL0 isn't well defined.)
+ */
+ assert(el >= 1 && el <= 3);
+ bool aa64 = arm_feature(env, ARM_FEATURE_AARCH64);
+
+ /*
+ * The highest exception level is always at the maximum supported
+ * register width, and then lower levels have a register width controlled
+ * by bits in the SCR or HCR registers.
+ */
+ if (el == 3) {
+ return aa64;
+ }
+
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ aa64 = aa64 && arm_scr_rw_eff(env);
+ }
+
+ if (el == 2) {
+ return aa64;
+ }
+
+ if (arm_is_el2_enabled(env)) {
+ aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
+ }
+
+ return aa64;
+}
+
+/*
+ * Return the current Exception Level (as per ARMv8; note that this differs
+ * from the ARMv7 Privilege Level).
+ */
+static inline int arm_current_el(CPUARMState *env)
+{
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return arm_v7m_is_handler_mode(env) ||
+ !(env->v7m.control[env->v7m.secure] & 1);
+ }
+
+ if (is_a64(env)) {
+ return extract32(env->pstate, 2, 2);
+ }
+
+ switch (env->uncached_cpsr & 0x1f) {
+ case ARM_CPU_MODE_USR:
+ return 0;
+ case ARM_CPU_MODE_HYP:
+ return 2;
+ case ARM_CPU_MODE_MON:
+ return 3;
+ default:
+ if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+ /* If EL3 is 32-bit then all secure privileged modes run in EL3 */
+ return 3;
+ }
+
+ return 1;
+ }
+}
+
+static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
+ bool sctlr_b)
+{
+#ifdef CONFIG_USER_ONLY
+ /*
+ * In system mode, BE32 is modelled in line with the
+ * architecture (as word-invariant big-endianness), where loads
+ * and stores are done little endian but from addresses which
+ * are adjusted by XORing with the appropriate constant. So the
+ * endianness to use for the raw data access is not affected by
+ * SCTLR.B.
+ * In user mode, however, we model BE32 as byte-invariant
+ * big-endianness (because user-only code cannot tell the
+ * difference), and so we need to use a data access endianness
+ * that depends on SCTLR.B.
+ */
+ if (sctlr_b) {
+ return true;
+ }
+#endif
+ /* In 32bit endianness is determined by looking at CPSR's E bit */
+ return env->uncached_cpsr & CPSR_E;
+}
+
+static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
+{
+ return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
+}
+
+/* Return true if the processor is in big-endian mode. */
+static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
+{
+ if (!is_a64(env)) {
+ return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
+ } else {
+ int cur_el = arm_current_el(env);
+ uint64_t sctlr = arm_sctlr(env, cur_el);
+ return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
+ }
+}
+
+#ifdef CONFIG_USER_ONLY
+static inline bool arm_cpu_bswap_data(CPUARMState *env)
+{
+ return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
+}
+#endif
+
static inline void aarch64_save_sp(CPUARMState *env, int el)
{
if (env->pstate & PSTATE_SP) {
@@ -429,6 +578,25 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm)
*/
unsigned int arm_pamax(ARMCPU *cpu);
+/*
+ * round_down_to_parange_index
+ * @bit_size: uint8_t
+ *
+ * Rounds down the bit_size supplied to the first supported ARM physical
+ * address range and returns the index for this. The index is intended to
+ * be used to set ID_AA64MMFR0_EL1's PARANGE bits.
+ */
+uint8_t round_down_to_parange_index(uint8_t bit_size);
+
+/*
+ * round_down_to_parange_bit_size
+ * @bit_size: uint8_t
+ *
+ * Rounds down the bit_size supplied to the first supported ARM physical
+ * address range bit size and returns this.
+ */
+uint8_t round_down_to_parange_bit_size(uint8_t bit_size);
+
/* Return true if extended addresses are enabled.
* This is always the case if our translation regime is 64 bit,
* but depends on TTBCR.EAE for 32 bit.
@@ -482,16 +650,12 @@ static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
{
return false;
}
-static inline void arm_handle_psci_call(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
#else
/* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */
bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
+#endif
/* Actually handle a PSCI call */
void arm_handle_psci_call(ARMCPU *cpu);
-#endif
/**
* arm_clear_exclusive: clear the exclusive monitor
@@ -561,8 +725,8 @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
struct ARMMMUFaultInfo {
ARMFaultType type;
ARMGPCF gpcf;
- target_ulong s2addr;
- target_ulong paddr;
+ hwaddr s2addr;
+ hwaddr paddr;
ARMSecuritySpace paddr_space;
int level;
int domain;
@@ -776,9 +940,9 @@ void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr,
MMUAccessType access_type, uintptr_t ra);
#else
-bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr);
+bool arm_cpu_tlb_fill_align(CPUState *cs, CPUTLBEntryFull *out, vaddr addr,
+ MMUAccessType access_type, int mmu_idx,
+ MemOp memop, int size, bool probe, uintptr_t ra);
#endif
static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
@@ -845,7 +1009,16 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu)
}
}
-/* Return true if this address translation regime has two ranges. */
+/*
+ * Return true if this address translation regime has two ranges.
+ * Note that this will not return the correct answer for AArch32
+ * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is
+ * never called from a context where EL3 can be AArch32. (The
+ * correct return value for ARMMMUIdx_E3 would be different for
+ * that case, so we can't just make the function return the
+ * correct value anyway; we would need an extra "bool e3_is_aarch32"
+ * argument which all the current callsites would pass as 'false'.)
+ */
static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
{
switch (mmu_idx) {
@@ -870,6 +1043,7 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
case ARMMMUIdx_Stage1_E1_PAN:
case ARMMMUIdx_E10_1_PAN:
case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_E30_3_PAN:
return true;
default:
return false;
@@ -893,10 +1067,11 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
case ARMMMUIdx_E2:
return 2;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
return 3;
case ARMMMUIdx_E10_0:
case ARMMMUIdx_Stage1_E0:
- return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3;
case ARMMMUIdx_Stage1_E1:
case ARMMMUIdx_Stage1_E1_PAN:
case ARMMMUIdx_E10_1:
@@ -918,7 +1093,9 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
{
switch (mmu_idx) {
+ case ARMMMUIdx_E10_0:
case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E30_0:
case ARMMMUIdx_Stage1_E0:
case ARMMMUIdx_MUser:
case ARMMMUIdx_MSUser:
@@ -927,10 +1104,6 @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
return true;
default:
return false;
- case ARMMMUIdx_E10_0:
- case ARMMMUIdx_E10_1:
- case ARMMMUIdx_E10_1_PAN:
- g_assert_not_reached();
}
}
@@ -998,7 +1171,7 @@ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
static inline int arm_num_brps(ARMCPU *cpu)
{
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1;
+ return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1;
} else {
return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1;
}
@@ -1012,7 +1185,7 @@ static inline int arm_num_brps(ARMCPU *cpu)
static inline int arm_num_wrps(ARMCPU *cpu)
{
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1;
+ return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1;
} else {
return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1;
}
@@ -1026,7 +1199,7 @@ static inline int arm_num_wrps(ARMCPU *cpu)
static inline int arm_num_ctx_cmps(ARMCPU *cpu)
{
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1;
+ return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, CTX_CMPS) + 1;
} else {
return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1;
}
@@ -1387,6 +1560,7 @@ typedef struct GetPhysAddrResult {
* @env: CPUARMState
* @address: virtual address to get physical address for
* @access_type: 0 for read, 1 for write, 2 for execute
+ * @memop: memory operation feeding this access, or 0 for none
* @mmu_idx: MMU index indicating required translation regime
* @result: set on translation success.
* @fi: set to fault info if the translation fails
@@ -1404,8 +1578,8 @@ typedef struct GetPhysAddrResult {
* * for PSMAv5 based systems we don't bother to return a full FSR format
* value.
*/
-bool get_phys_addr(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
+bool get_phys_addr(CPUARMState *env, vaddr address,
+ MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx,
GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
__attribute__((nonnull));
@@ -1415,6 +1589,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
* @env: CPUARMState
* @address: virtual address to get physical address for
* @access_type: 0 for read, 1 for write, 2 for execute
+ * @memop: memory operation feeding this access, or 0 for none
* @mmu_idx: MMU index indicating required translation regime
* @space: security space for the access
* @result: set on translation success.
@@ -1423,8 +1598,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
* Similar to get_phys_addr, but use the given security space and don't perform
* a Granule Protection Check on the resulting address.
*/
-bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address,
- MMUAccessType access_type,
+bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address,
+ MMUAccessType access_type, MemOp memop,
ARMMMUIdx mmu_idx, ARMSecuritySpace space,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
@@ -1632,7 +1807,6 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env)
return (1ULL << 31) | ((1ULL << pmu_num_counters(env)) - 1);
}
-#ifdef TARGET_AARCH64
GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg);
int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg);
int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg);
@@ -1640,6 +1814,8 @@ int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg);
int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg);
int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg);
int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg);
+int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg);
+int aarch64_gdb_set_tag_ctl_reg(CPUState *cs, uint8_t *buf, int reg);
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp);
void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp);
void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp);
@@ -1648,7 +1824,12 @@ void aarch64_max_tcg_initfn(Object *obj);
void aarch64_add_pauth_properties(Object *obj);
void aarch64_add_sve_properties(Object *obj);
void aarch64_add_sme_properties(Object *obj);
-#endif
+
+/* Return true if the gdbstub is presenting an AArch64 CPU */
+static inline bool arm_gdbstub_is_aarch64(ARMCPU *cpu)
+{
+ return arm_feature(&cpu->env, ARM_FEATURE_AARCH64);
+}
/* Read the CONTROL register as the MRS instruction would. */
uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure);
@@ -1688,6 +1869,9 @@ static inline uint64_t pauth_ptr_mask(ARMVAParameters param)
/* Add the cpreg definitions for debug related system registers */
void define_debug_regs(ARMCPU *cpu);
+/* Add the cpreg definitions for TLBI instructions */
+void define_tlb_insn_regs(ARMCPU *cpu);
+
/* Effective value of MDCR_EL2 */
static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env)
{
@@ -1719,8 +1903,6 @@ static inline bool arm_fgt_active(CPUARMState *env, int el)
(!arm_feature(env, ARM_FEATURE_EL3) || (env->cp15.scr_el3 & SCR_FGTEN));
}
-void assert_hflags_rebuild_correctly(CPUARMState *env);
-
/*
* Although the ARM implementation of hardware assisted debugging
* allows for different breakpoints per-core, the current GDB
@@ -1762,20 +1944,42 @@ extern GArray *hw_breakpoints, *hw_watchpoints;
#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i))
#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i))
-bool find_hw_breakpoint(CPUState *cpu, target_ulong pc);
-int insert_hw_breakpoint(target_ulong pc);
-int delete_hw_breakpoint(target_ulong pc);
+bool find_hw_breakpoint(CPUState *cpu, vaddr pc);
+int insert_hw_breakpoint(vaddr pc);
+int delete_hw_breakpoint(vaddr pc);
-bool check_watchpoint_in_range(int i, target_ulong addr);
-CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr);
-int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type);
-int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type);
+bool check_watchpoint_in_range(int i, vaddr addr);
+CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, vaddr addr);
+int insert_hw_watchpoint(vaddr addr, vaddr len, int type);
+int delete_hw_watchpoint(vaddr addr, vaddr len, int type);
/* Return the current value of the system counter in ticks */
uint64_t gt_get_countervalue(CPUARMState *env);
/*
* Return the currently applicable offset between the system counter
- * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2).
+ * and the counter for the specified timer, as used for direct register
+ * accesses.
*/
-uint64_t gt_virt_cnt_offset(CPUARMState *env);
+uint64_t gt_direct_access_timer_offset(CPUARMState *env, int timeridx);
+
+/*
+ * Return mask of ARMMMUIdxBit values corresponding to an "invalidate
+ * all EL1" scope; this covers stage 1 and stage 2.
+ */
+int alle1_tlbmask(CPUARMState *env);
+
+/* Set the float_status behaviour to match the Arm defaults */
+void arm_set_default_fp_behaviours(float_status *s);
+/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
+void arm_set_ah_fp_behaviours(float_status *s);
+/* Read the float_status info and return the appropriate FPSR value */
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env);
+/* Clear the exception status flags from all float_status fields */
+void vfp_clear_float_status_exc_flags(CPUARMState *env);
+/*
+ * Update float_status fields to handle the bits of the FPCR
+ * specified by mask changing to the values in val.
+ */
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
+
#endif
diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c
index 965a486..34e57fa 100644
--- a/target/arm/kvm-stub.c
+++ b/target/arm/kvm-stub.c
@@ -22,3 +22,100 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
{
g_assert_not_reached();
}
+
+/*
+ * It's safe to call these functions without KVM support.
+ * They should either do nothing or return "not supported".
+ */
+bool kvm_arm_aarch32_supported(void)
+{
+ return false;
+}
+
+bool kvm_arm_pmu_supported(void)
+{
+ return false;
+}
+
+bool kvm_arm_sve_supported(void)
+{
+ return false;
+}
+
+bool kvm_arm_mte_supported(void)
+{
+ return false;
+}
+
+/*
+ * These functions should never actually be called without KVM support.
+ */
+void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
+{
+ g_assert_not_reached();
+}
+
+int kvm_arm_vgic_probe(void)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_pmu_init(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
+
+uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level)
+{
+ g_assert_not_reached();
+}
+
+void kvm_arm_cpu_pre_save(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
+
+bool kvm_arm_cpu_post_load(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 70f79ed..426f8b1 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -20,17 +20,18 @@
#include "qemu/main-loop.h"
#include "qom/object.h"
#include "qapi/error.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/runstate.h"
-#include "sysemu/kvm.h"
-#include "sysemu/kvm_int.h"
+#include "system/system.h"
+#include "system/runstate.h"
+#include "system/kvm.h"
+#include "system/kvm_int.h"
#include "kvm_arm.h"
#include "cpu.h"
+#include "cpu-sysregs.h"
#include "trace.h"
#include "internals.h"
#include "hw/pci/pci.h"
#include "exec/memattrs.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "gdbstub/enums.h"
#include "hw/boards.h"
#include "hw/irq.h"
@@ -39,8 +40,10 @@
#include "hw/acpi/acpi.h"
#include "hw/acpi/ghes.h"
#include "target/arm/gtimer.h"
+#include "migration/blocker.h"
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
+ KVM_CAP_INFO(DEVICE_CTRL),
KVM_CAP_LAST_INFO
};
@@ -98,8 +101,7 @@ static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature)
return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature);
}
-bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
- int *fdarray,
+bool kvm_arm_create_scratch_host_vcpu(int *fdarray,
struct kvm_vcpu_init *init)
{
int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
@@ -119,6 +121,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
if (vmfd < 0) {
goto err;
}
+
+ /*
+ * The MTE capability must be enabled by the VMM before creating
+ * any VCPUs in order to allow the MTE bits of the ID_AA64PFR1
+ * register to be probed correctly, as they are masked if MTE
+ * is not enabled.
+ */
+ if (kvm_arm_mte_supported()) {
+ KVMState kvm_state;
+
+ kvm_state.fd = kvmfd;
+ kvm_state.vmfd = vmfd;
+ kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
+ }
+
cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
if (cpufd < 0) {
goto err;
@@ -133,40 +150,13 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
struct kvm_vcpu_init preferred;
ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
- if (!ret) {
- init->target = preferred.target;
- }
- }
- if (ret >= 0) {
- ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
if (ret < 0) {
goto err;
}
- } else if (cpus_to_try) {
- /* Old kernel which doesn't know about the
- * PREFERRED_TARGET ioctl: we know it will only support
- * creating one kind of guest CPU which is its preferred
- * CPU type.
- */
- struct kvm_vcpu_init try;
-
- while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
- try.target = *cpus_to_try++;
- memcpy(try.features, init->features, sizeof(init->features));
- ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
- if (ret >= 0) {
- break;
- }
- }
- if (ret < 0) {
- goto err;
- }
- init->target = try.target;
- } else {
- /* Treat a NULL cpus_to_try argument the same as an empty
- * list, which means we will fail the call since this must
- * be an old kernel which doesn't support PREFERRED_TARGET.
- */
+ init->target = preferred.target;
+ }
+ ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
+ if (ret < 0) {
goto err;
}
@@ -229,6 +219,28 @@ static bool kvm_arm_pauth_supported(void)
kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC));
}
+
+static uint64_t idregs_sysreg_to_kvm_reg(ARMSysRegs sysreg)
+{
+ return ARM64_SYS_REG((sysreg & CP_REG_ARM64_SYSREG_OP0_MASK) >> CP_REG_ARM64_SYSREG_OP0_SHIFT,
+ (sysreg & CP_REG_ARM64_SYSREG_OP1_MASK) >> CP_REG_ARM64_SYSREG_OP1_SHIFT,
+ (sysreg & CP_REG_ARM64_SYSREG_CRN_MASK) >> CP_REG_ARM64_SYSREG_CRN_SHIFT,
+ (sysreg & CP_REG_ARM64_SYSREG_CRM_MASK) >> CP_REG_ARM64_SYSREG_CRM_SHIFT,
+ (sysreg & CP_REG_ARM64_SYSREG_OP2_MASK) >> CP_REG_ARM64_SYSREG_OP2_SHIFT);
+}
+
+/* read a sysreg value and store it in the idregs */
+static int get_host_cpu_reg(int fd, ARMHostCPUFeatures *ahcf, ARMIDRegisterIdx index)
+{
+ uint64_t *reg;
+ int ret;
+
+ reg = &ahcf->isar.idregs[index];
+ ret = read_sys_reg64(fd, reg,
+ idregs_sysreg_to_kvm_reg(id_register_sysreg[index]));
+ return ret;
+}
+
static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
{
/* Identify the feature bits corresponding to the host CPU, and
@@ -242,17 +254,6 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
uint64_t features = 0;
int err;
- /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
- * we know these will only support creating one kind of guest CPU,
- * which is its preferred CPU type. Fortunately these old kernels
- * support only a very limited number of CPUs.
- */
- static const uint32_t cpus_to_try[] = {
- KVM_ARM_TARGET_AEM_V8,
- KVM_ARM_TARGET_FOUNDATION_V8,
- KVM_ARM_TARGET_CORTEX_A57,
- QEMU_KVM_ARM_TARGET_NONE
- };
/*
* target = -1 informs kvm_arm_create_scratch_host_vcpu()
* to use the preferred target
@@ -280,17 +281,18 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
if (kvm_arm_pmu_supported()) {
init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
pmu_supported = true;
+ features |= 1ULL << ARM_FEATURE_PMU;
}
- if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
+ if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) {
return false;
}
ahcf->target = init.target;
- ahcf->dtb_compatible = "arm,arm-v8";
+ ahcf->dtb_compatible = "arm,armv8";
+ int fd = fdarray[2];
- err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0,
- ARM64_SYS_REG(3, 0, 0, 4, 0));
+ err = get_host_cpu_reg(fd, ahcf, ID_AA64PFR0_EL1_IDX);
if (unlikely(err < 0)) {
/*
* Before v4.15, the kernel only exposed a limited number of system
@@ -308,31 +310,20 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* ??? Either of these sounds like too much effort just
* to work around running a modern host kernel.
*/
- ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */
+ SET_IDREG(&ahcf->isar, ID_AA64PFR0, 0x00000011); /* EL1&0, AArch64 only */
err = 0;
} else {
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
- ARM64_SYS_REG(3, 0, 0, 4, 1));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0,
- ARM64_SYS_REG(3, 0, 0, 4, 5));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
- ARM64_SYS_REG(3, 0, 0, 5, 0));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
- ARM64_SYS_REG(3, 0, 0, 5, 1));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
- ARM64_SYS_REG(3, 0, 0, 6, 0));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
- ARM64_SYS_REG(3, 0, 0, 6, 1));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2,
- ARM64_SYS_REG(3, 0, 0, 6, 2));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
- ARM64_SYS_REG(3, 0, 0, 7, 0));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
- ARM64_SYS_REG(3, 0, 0, 7, 1));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2,
- ARM64_SYS_REG(3, 0, 0, 7, 2));
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3,
- ARM64_SYS_REG(3, 0, 0, 7, 3));
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64SMFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR2_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR2_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR3_EL1_IDX);
/*
* Note that if AArch32 support is not present in the host,
@@ -341,49 +332,31 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* than skipping the reads and leaving 0, as we must avoid
* considering the values in every case.
*/
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0,
- ARM64_SYS_REG(3, 0, 0, 1, 0));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1,
- ARM64_SYS_REG(3, 0, 0, 1, 1));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0,
- ARM64_SYS_REG(3, 0, 0, 1, 2));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0,
- ARM64_SYS_REG(3, 0, 0, 1, 4));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1,
- ARM64_SYS_REG(3, 0, 0, 1, 5));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2,
- ARM64_SYS_REG(3, 0, 0, 1, 6));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3,
- ARM64_SYS_REG(3, 0, 0, 1, 7));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
- ARM64_SYS_REG(3, 0, 0, 2, 0));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
- ARM64_SYS_REG(3, 0, 0, 2, 1));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
- ARM64_SYS_REG(3, 0, 0, 2, 2));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
- ARM64_SYS_REG(3, 0, 0, 2, 3));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
- ARM64_SYS_REG(3, 0, 0, 2, 4));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
- ARM64_SYS_REG(3, 0, 0, 2, 5));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4,
- ARM64_SYS_REG(3, 0, 0, 2, 6));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
- ARM64_SYS_REG(3, 0, 0, 2, 7));
-
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
+ err |= get_host_cpu_reg(fd, ahcf, ID_PFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_PFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_DFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR2_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR3_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR0_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR2_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR3_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR4_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR5_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_ISAR6_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR4_EL1_IDX);
+
+ err |= read_sys_reg32(fd, &ahcf->isar.mvfr0,
ARM64_SYS_REG(3, 0, 0, 3, 0));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
+ err |= read_sys_reg32(fd, &ahcf->isar.mvfr1,
ARM64_SYS_REG(3, 0, 0, 3, 1));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
+ err |= read_sys_reg32(fd, &ahcf->isar.mvfr2,
ARM64_SYS_REG(3, 0, 0, 3, 2));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2,
- ARM64_SYS_REG(3, 0, 0, 3, 4));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1,
- ARM64_SYS_REG(3, 0, 0, 3, 5));
- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5,
- ARM64_SYS_REG(3, 0, 0, 3, 6));
+ err |= get_host_cpu_reg(fd, ahcf, ID_PFR2_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_DFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_MMFR5_EL1_IDX);
/*
* DBGDIDR is a bit complicated because the kernel doesn't
@@ -395,14 +368,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* arch/arm64/kvm/sys_regs.c:trap_dbgidr() does.
* We only do this if the CPU supports AArch32 at EL1.
*/
- if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) {
- int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS);
- int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS);
+ if (FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL1) >= 2) {
+ int wrps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, WRPS);
+ int brps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, BRPS);
int ctx_cmps =
- FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS);
+ FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, CTX_CMPS);
int version = 6; /* ARMv8 debug architecture */
bool has_el3 =
- !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3);
+ !!FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL3);
uint32_t dbgdidr = 0;
dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps);
@@ -417,7 +390,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
if (pmu_supported) {
/* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0,
+ err |= read_sys_reg64(fd, &ahcf->isar.reset_pmcr_el0,
ARM64_SYS_REG(3, 3, 9, 12, 0));
}
@@ -429,8 +402,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
* enabled SVE support, which resulted in an error rather than RAZ.
* So only read the register if we set KVM_ARM_VCPU_SVE above.
*/
- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0,
- ARM64_SYS_REG(3, 0, 0, 4, 4));
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64ZFR0_EL1_IDX);
}
}
@@ -448,7 +420,6 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
features |= 1ULL << ARM_FEATURE_V8;
features |= 1ULL << ARM_FEATURE_NEON;
features |= 1ULL << ARM_FEATURE_AARCH64;
- features |= 1ULL << ARM_FEATURE_PMU;
features |= 1ULL << ARM_FEATURE_GENERIC_TIMER;
ahcf->features = features;
@@ -675,19 +646,11 @@ static void kvm_arm_set_device_addr(KVMDevice *kd)
{
struct kvm_device_attr *attr = &kd->kdattr;
int ret;
+ uint64_t addr = kd->kda.addr;
- /* If the device control API is available and we have a device fd on the
- * KVMDevice struct, let's use the newer API
- */
- if (kd->dev_fd >= 0) {
- uint64_t addr = kd->kda.addr;
-
- addr |= kd->kda_addr_ormask;
- attr->addr = (uintptr_t)&addr;
- ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
- } else {
- ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
- }
+ addr |= kd->kda_addr_ormask;
+ attr->addr = (uintptr_t)&addr;
+ ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
if (ret < 0) {
fprintf(stderr, "Failed to set device address: %s\n",
@@ -968,13 +931,24 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu)
}
}
-void kvm_arm_cpu_post_load(ARMCPU *cpu)
+bool kvm_arm_cpu_post_load(ARMCPU *cpu)
{
+ if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) {
+ return false;
+ }
+ /* Note that it's OK for the TCG side not to know about
+ * every register in the list; KVM is authoritative if
+ * we're using it.
+ */
+ write_list_to_cpustate(cpu);
+
/* KVM virtual time adjustment */
if (cpu->kvm_adjvtime) {
cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT);
cpu->kvm_vtime_dirty = true;
}
+
+ return true;
}
void kvm_arm_reset_vcpu(ARMCPU *cpu)
@@ -1793,6 +1767,11 @@ bool kvm_arm_sve_supported(void)
return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
}
+bool kvm_arm_mte_supported(void)
+{
+ return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
+}
+
QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
@@ -1821,7 +1800,7 @@ uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
probed = true;
- if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) {
+ if (!kvm_arm_create_scratch_host_vcpu(fdarray, &init)) {
error_report("failed to create scratch VCPU with SVE enabled");
abort();
}
@@ -1860,6 +1839,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu)
#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret;
@@ -1868,8 +1852,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
CPUARMState *env = &cpu->env;
uint64_t psciver;
- if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
- !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
+ if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
error_report("KVM is not supported for this guest CPU type");
return -EINVAL;
}
@@ -1888,13 +1871,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (!arm_feature(env, ARM_FEATURE_AARCH64)) {
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
}
- if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
- cpu->has_pmu = false;
- }
if (cpu->has_pmu) {
cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
- } else {
- env->features &= ~(1ULL << ARM_FEATURE_PMU);
}
if (cpu_isar_feature(aa64_sve, cpu)) {
assert(kvm_arm_sve_supported());
@@ -2047,7 +2025,7 @@ static int kvm_arch_put_sve(CPUState *cs)
return 0;
}
-int kvm_arch_put_registers(CPUState *cs, int level)
+int kvm_arch_put_registers(CPUState *cs, int level, Error **errp)
{
uint64_t val;
uint32_t fpr;
@@ -2231,7 +2209,7 @@ static int kvm_arch_get_sve(CPUState *cs)
return 0;
}
-int kvm_arch_get_registers(CPUState *cs)
+int kvm_arch_get_registers(CPUState *cs, Error **errp)
{
uint64_t val;
unsigned int el;
@@ -2378,7 +2356,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
*/
if (code == BUS_MCEERR_AR) {
kvm_cpu_synchronize_state(c);
- if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
+ if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
kvm_inject_arm_sea(c);
} else {
error_report("failed to record the error");
@@ -2422,3 +2400,69 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
}
return 0;
}
+
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+ static bool tried_to_enable;
+ static bool succeeded_to_enable;
+ Error *mte_migration_blocker = NULL;
+ ARMCPU *cpu = ARM_CPU(cpuobj);
+ int ret;
+
+ if (!tried_to_enable) {
+ /*
+ * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
+ * sense), and we only want a single migration blocker as well.
+ */
+ tried_to_enable = true;
+
+ ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
+ return;
+ }
+
+ /* TODO: Add migration support with MTE enabled */
+ error_setg(&mte_migration_blocker,
+ "Live migration disabled due to MTE enabled");
+ if (migrate_add_blocker(&mte_migration_blocker, errp)) {
+ error_free(mte_migration_blocker);
+ return;
+ }
+
+ succeeded_to_enable = true;
+ }
+
+ if (succeeded_to_enable) {
+ cpu->kvm_mte = true;
+ }
+}
+
+void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level)
+{
+ ARMCPU *cpu = arm_cpu;
+ CPUARMState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+ uint32_t linestate_bit;
+ int irq_id;
+
+ switch (irq) {
+ case ARM_CPU_IRQ:
+ irq_id = KVM_ARM_IRQ_CPU_IRQ;
+ linestate_bit = CPU_INTERRUPT_HARD;
+ break;
+ case ARM_CPU_FIQ:
+ irq_id = KVM_ARM_IRQ_CPU_FIQ;
+ linestate_bit = CPU_INTERRUPT_FIQ;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (level) {
+ env->irq_line_state |= linestate_bit;
+ } else {
+ env->irq_line_state &= ~linestate_bit;
+ }
+ kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level);
+}
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index cfaa0d9..7dc83ca 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -11,7 +11,8 @@
#ifndef QEMU_KVM_ARM_H
#define QEMU_KVM_ARM_H
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
+#include "target/arm/cpu-qom.h"
#define KVM_ARM_VGIC_V2 (1 << 0)
#define KVM_ARM_VGIC_V3 (1 << 1)
@@ -22,17 +23,15 @@
* @devid: the KVM device ID
* @group: device control API group for setting addresses
* @attr: device control API address type
- * @dev_fd: device control device file descriptor (or -1 if not supported)
+ * @dev_fd: device control device file descriptor
* @addr_ormask: value to be OR'ed with resolved address
*
- * Remember the memory region @mr, and when it is mapped by the
- * machine model, tell the kernel that base address using the
- * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API. @devid
- * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or
- * the arm-vgic device in the device control API.
- * The machine model may map
- * and unmap the device multiple times; the kernel will only be told the final
- * address at the point where machine init is complete.
+ * Remember the memory region @mr, and when it is mapped by the machine
+ * model, tell the kernel that base address using the device control API.
+ * @devid should be the ID of the device as defined by the arm-vgic device
+ * in the device control API. The machine model may map and unmap the device
+ * multiple times; the kernel will only be told the final address at the
+ * point where machine init is complete.
*/
void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
uint64_t attr, int dev_fd, uint64_t addr_ormask);
@@ -85,8 +84,10 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu);
* @cpu: ARMCPU
*
* Called from cpu_post_load() to update KVM CPU state from the cpreg list.
+ *
+ * Returns: true on success, or false if write_list_to_kvmstate failed.
*/
-void kvm_arm_cpu_post_load(ARMCPU *cpu);
+bool kvm_arm_cpu_post_load(ARMCPU *cpu);
/**
* kvm_arm_reset_vcpu:
@@ -96,13 +97,9 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu);
*/
void kvm_arm_reset_vcpu(ARMCPU *cpu);
-#ifdef CONFIG_KVM
+struct kvm_vcpu_init;
/**
* kvm_arm_create_scratch_host_vcpu:
- * @cpus_to_try: array of QEMU_KVM_ARM_TARGET_* values (terminated with
- * QEMU_KVM_ARM_TARGET_NONE) to try as fallback if the kernel does not
- * know the PREFERRED_TARGET ioctl. Passing NULL is the same as passing
- * an empty array.
* @fdarray: filled in with kvmfd, vmfd, cpufd file descriptors in that order
* @init: filled in with the necessary values for creating a host
* vcpu. If NULL is provided, will not init the vCPU (though the cpufd
@@ -115,8 +112,7 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu);
* Returns: true on success (and fdarray and init are filled in),
* false on failure (and fdarray and init are not valid).
*/
-bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
- int *fdarray,
+bool kvm_arm_create_scratch_host_vcpu(int *fdarray,
struct kvm_vcpu_init *init);
/**
@@ -189,6 +185,13 @@ bool kvm_arm_pmu_supported(void);
bool kvm_arm_sve_supported(void);
/**
+ * kvm_arm_mte_supported:
+ *
+ * Returns: true if KVM can enable MTE, and false otherwise.
+ */
+bool kvm_arm_mte_supported(void);
+
+/**
* kvm_arm_get_max_vm_ipa_size:
* @ms: Machine state handle
* @fixed_ipa: True when the IPA limit is fixed at 40. This is the case
@@ -214,75 +217,8 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
-#else
-
-/*
- * It's safe to call these functions without KVM support.
- * They should either do nothing or return "not supported".
- */
-static inline bool kvm_arm_aarch32_supported(void)
-{
- return false;
-}
-
-static inline bool kvm_arm_pmu_supported(void)
-{
- return false;
-}
-
-static inline bool kvm_arm_sve_supported(void)
-{
- return false;
-}
-
-/*
- * These functions should never actually be called without KVM support.
- */
-static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
-
-static inline void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
-
-static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
-{
- g_assert_not_reached();
-}
-
-static inline int kvm_arm_vgic_probe(void)
-{
- g_assert_not_reached();
-}
-
-static inline void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq)
-{
- g_assert_not_reached();
-}
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
-static inline void kvm_arm_pmu_init(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
-
-static inline void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa)
-{
- g_assert_not_reached();
-}
-
-static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
-{
- g_assert_not_reached();
-}
-
-static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
-{
- g_assert_not_reached();
-}
-
-#endif
+void arm_cpu_kvm_set_irq(void *arm_cpu, int irq, int level);
#endif
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 0a722ca..e442d48 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -1,12 +1,13 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "qemu/error-report.h"
-#include "sysemu/kvm.h"
-#include "sysemu/tcg.h"
+#include "system/kvm.h"
+#include "system/tcg.h"
#include "kvm_arm.h"
#include "internals.h"
#include "cpu-features.h"
-#include "migration/cpu.h"
+#include "migration/qemu-file-types.h"
+#include "migration/vmstate.h"
#include "target/arm/gtimer.h"
static bool vfp_needed(void *opaque)
@@ -18,6 +19,35 @@ static bool vfp_needed(void *opaque)
: cpu_isar_feature(aa32_vfp_simd, cpu));
}
+static bool vfp_fpcr_fpsr_needed(void *opaque)
+{
+ /*
+ * If either the FPCR or the FPSR include set bits that are not
+ * visible in the AArch32 FPSCR view of floating point control/status
+ * then we must send the FPCR and FPSR as two separate fields in the
+ * cpu/vfp/fpcr_fpsr subsection, and we will send a 0 for the old
+ * FPSCR field in cpu/vfp.
+ *
+ * If all the set bits are representable in an AArch32 FPSCR then we
+ * send that value as the cpu/vfp FPSCR field, and don't send the
+ * cpu/vfp/fpcr_fpsr subsection.
+ *
+ * On incoming migration, if the cpu/vfp FPSCR field is non-zero we
+ * use it, and if the fpcr_fpsr subsection is present we use that.
+ * (The subsection will never be present with a non-zero FPSCR field,
+ * and if FPSCR is zero and the subsection is not present that means
+ * that FPSCR/FPSR/FPCR are zero.)
+ *
+ * This preserves migration compatibility with older QEMU versions,
+ * in both directions.
+ */
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ return (vfp_get_fpcr(env) & ~FPSCR_FPCR_MASK) ||
+ (vfp_get_fpsr(env) & ~FPSCR_FPSR_MASK);
+}
+
static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
const VMStateField *field)
{
@@ -25,7 +55,10 @@ static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
CPUARMState *env = &cpu->env;
uint32_t val = qemu_get_be32(f);
- vfp_set_fpscr(env, val);
+ if (val) {
+ /* 0 means we might have the data in the fpcr_fpsr subsection */
+ vfp_set_fpscr(env, val);
+ }
return 0;
}
@@ -34,8 +67,9 @@ static int put_fpscr(QEMUFile *f, void *opaque, size_t size,
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
+ uint32_t fpscr = vfp_fpcr_fpsr_needed(opaque) ? 0 : vfp_get_fpscr(env);
- qemu_put_be32(f, vfp_get_fpscr(env));
+ qemu_put_be32(f, fpscr);
return 0;
}
@@ -45,6 +79,86 @@ static const VMStateInfo vmstate_fpscr = {
.put = put_fpscr,
};
+static int get_fpcr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+ uint64_t val = qemu_get_be64(f);
+
+ vfp_set_fpcr(env, val);
+ return 0;
+}
+
+static int put_fpcr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ qemu_put_be64(f, vfp_get_fpcr(env));
+ return 0;
+}
+
+static const VMStateInfo vmstate_fpcr = {
+ .name = "fpcr",
+ .get = get_fpcr,
+ .put = put_fpcr,
+};
+
+static int get_fpsr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+ uint64_t val = qemu_get_be64(f);
+
+ vfp_set_fpsr(env, val);
+ return 0;
+}
+
+static int put_fpsr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ qemu_put_be64(f, vfp_get_fpsr(env));
+ return 0;
+}
+
+static const VMStateInfo vmstate_fpsr = {
+ .name = "fpsr",
+ .get = get_fpsr,
+ .put = put_fpsr,
+};
+
+static const VMStateDescription vmstate_vfp_fpcr_fpsr = {
+ .name = "cpu/vfp/fpcr_fpsr",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vfp_fpcr_fpsr_needed,
+ .fields = (const VMStateField[]) {
+ {
+ .name = "fpcr",
+ .version_id = 0,
+ .size = sizeof(uint64_t),
+ .info = &vmstate_fpcr,
+ .flags = VMS_SINGLE,
+ .offset = 0,
+ },
+ {
+ .name = "fpsr",
+ .version_id = 0,
+ .size = sizeof(uint64_t),
+ .info = &vmstate_fpsr,
+ .flags = VMS_SINGLE,
+ .offset = 0,
+ },
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_vfp = {
.name = "cpu/vfp",
.version_id = 3,
@@ -100,6 +214,10 @@ static const VMStateDescription vmstate_vfp = {
.offset = 0,
},
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_vfp_fpcr_fpsr,
+ NULL
}
};
@@ -123,7 +241,6 @@ static const VMStateDescription vmstate_iwmmxt = {
}
};
-#ifdef TARGET_AARCH64
/* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build,
* and ARMPredicateReg is actively empty. This triggers errors
* in the expansion of the VMSTATE macros.
@@ -203,7 +320,6 @@ static const VMStateDescription vmstate_za = {
VMSTATE_END_OF_LIST()
}
};
-#endif /* AARCH64 */
static bool serror_needed(void *opaque)
{
@@ -785,6 +901,20 @@ static int cpu_pre_load(void *opaque)
CPUARMState *env = &cpu->env;
/*
+ * In an inbound migration where on the source FPSCR/FPSR/FPCR are 0,
+ * there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr()
+ * and vfp_set_fpsr() from get_fpcr() and get_fpsr(); also the get_fpscr()
+ * function will not call vfp_set_fpscr() because it will see a 0 in the
+ * inbound data. Ensure that in this case we have a correctly set up
+ * zero FPSCR/FPCR/FPSR.
+ *
+ * This is not strictly needed because FPSCR is zero out of reset, but
+ * it avoids the possibility of future confusing migration bugs if some
+ * future architecture change makes the reset value non-zero.
+ */
+ vfp_set_fpscr(env, 0);
+
+ /*
* Pre-initialize irq_line_state to a value that's never valid as
* real data, so cpu_post_load() can tell whether we've seen the
* irq-line-state subsection in the incoming migration state.
@@ -846,15 +976,9 @@ static int cpu_post_load(void *opaque, int version_id)
}
if (kvm_enabled()) {
- if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) {
+ if (!kvm_arm_cpu_post_load(cpu)) {
return -1;
}
- /* Note that it's OK for the TCG side not to know about
- * every register in the list; KVM is authoritative if
- * we're using it.
- */
- write_list_to_cpustate(cpu);
- kvm_arm_cpu_post_load(cpu);
} else {
if (!write_list_to_cpustate(cpu)) {
return -1;
@@ -970,10 +1094,8 @@ const VMStateDescription vmstate_arm_cpu = {
&vmstate_pmsav7,
&vmstate_pmsav8,
&vmstate_m_security,
-#ifdef TARGET_AARCH64
&vmstate_sve,
&vmstate_za,
-#endif
&vmstate_serror,
&vmstate_irq_line_state,
&vmstate_wfxt_timer,
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 2e10464..7aa81e3 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -1,41 +1,58 @@
arm_ss = ss.source_set()
+arm_common_ss = ss.source_set()
arm_ss.add(files(
- 'cpu.c',
- 'debug_helper.c',
'gdbstub.c',
- 'helper.c',
- 'vfp_helper.c',
))
-arm_ss.add(zlib)
-
-arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'), if_false: files('kvm-stub.c'))
-arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c'))
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'cpu64.c',
- 'gdbstub64.c',
-))
+ 'gdbstub64.c'))
arm_system_ss = ss.source_set()
+arm_common_system_ss = ss.source_set()
arm_system_ss.add(files(
+ 'arm-qmp-cmds.c',
+))
+arm_system_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'))
+arm_system_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c'))
+
+arm_user_ss = ss.source_set()
+arm_user_ss.add(files('cpu.c'))
+arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files(
+ 'cpu32-stubs.c',
+))
+arm_user_ss.add(files(
+ 'debug_helper.c',
+ 'helper.c',
+ 'vfp_fpscr.c',
+))
+
+arm_common_system_ss.add(files('cpu.c'))
+arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files(
+ 'cpu32-stubs.c'))
+arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
+arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
+arm_common_system_ss.add(files(
'arch_dump.c',
'arm-powerctl.c',
- 'arm-qmp-cmds.c',
'cortex-regs.c',
+ 'debug_helper.c',
+ 'helper.c',
'machine.c',
'ptw.c',
+ 'vfp_fpscr.c',
))
-arm_user_ss = ss.source_set()
-
subdir('hvf')
if 'CONFIG_TCG' in config_all_accel
subdir('tcg')
else
- arm_ss.add(files('tcg-stubs.c'))
+ arm_common_system_ss.add(files('tcg-stubs.c'))
endif
target_arch += {'arm': arm_ss}
target_system_arch += {'arm': arm_system_ss}
target_user_arch += {'arm': arm_user_ss}
+target_common_arch += {'arm': arm_common_ss}
+target_common_system_arch += {'arm': arm_common_system_ss}
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 4476b32..561bf26 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -10,15 +10,14 @@
#include "qemu/log.h"
#include "qemu/range.h"
#include "qemu/main-loop.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
+#include "accel/tcg/probe.h"
#include "cpu.h"
#include "internals.h"
#include "cpu-features.h"
#include "idau.h"
-#ifdef CONFIG_TCG
-# include "tcg/oversized-guest.h"
-#endif
typedef struct S1Translate {
/*
@@ -74,17 +73,21 @@ typedef struct S1Translate {
} S1Translate;
static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
- target_ulong address,
- MMUAccessType access_type,
+ vaddr address,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi);
static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
- target_ulong address,
- MMUAccessType access_type,
+ vaddr address,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi);
+static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
+ int user_rw, int prot_rw, int xn, int pxn,
+ ARMSecuritySpace in_pa, ARMSecuritySpace out_pa);
+
/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
static const uint8_t pamax_map[] = {
[0] = 32,
@@ -96,6 +99,21 @@ static const uint8_t pamax_map[] = {
[6] = 52,
};
+uint8_t round_down_to_parange_index(uint8_t bit_size)
+{
+ for (int i = ARRAY_SIZE(pamax_map) - 1; i >= 0; i--) {
+ if (pamax_map[i] <= bit_size) {
+ return i;
+ }
+ }
+ g_assert_not_reached();
+}
+
+uint8_t round_down_to_parange_bit_size(uint8_t bit_size)
+{
+ return pamax_map[round_down_to_parange_index(bit_size)];
+}
+
/*
* The cpu-specific constant value of PAMax; also used by hw/arm/virt.
* Note that machvirt_init calls this on a CPU that is inited but not realized!
@@ -104,7 +122,7 @@ unsigned int arm_pamax(ARMCPU *cpu)
{
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
unsigned int parange =
- FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
+ FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE);
/*
* id_aa64mmfr0 is a read-only register so values outside of the
@@ -265,6 +283,8 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx,
case ARMMMUIdx_E20_2_PAN:
case ARMMMUIdx_E2:
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
break;
case ARMMMUIdx_Phys_S:
@@ -312,7 +332,7 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress,
* physical address size is invalid.
*/
pps = FIELD_EX64(gpccr, GPCCR, PPS);
- if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) {
+ if (pps > FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE)) {
goto fault_walk;
}
pps = pamax_map[pps];
@@ -564,7 +584,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
};
GetPhysAddrResult s2 = { };
- if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, &s2, fi)) {
+ if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, 0, &s2, fi)) {
goto fail;
}
@@ -717,7 +737,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
uint64_t new_val, S1Translate *ptw,
ARMMMUFaultInfo *fi)
{
-#if defined(TARGET_AARCH64) && defined(CONFIG_TCG)
+#if defined(CONFIG_ATOMIC64) && defined(CONFIG_TCG)
uint64_t cur_val;
void *host = ptw->out_host;
@@ -819,7 +839,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
ptw->out_rw = true;
}
-#ifdef CONFIG_ATOMIC64
if (ptw->out_be) {
old_val = cpu_to_be64(old_val);
new_val = cpu_to_be64(new_val);
@@ -831,36 +850,6 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
cur_val = le64_to_cpu(cur_val);
}
-#else
- /*
- * We can't support the full 64-bit atomic cmpxchg on the host.
- * Because this is only used for FEAT_HAFDBS, which is only for AA64,
- * we know that TCG_OVERSIZED_GUEST is set, which means that we are
- * running in round-robin mode and could only race with dma i/o.
- */
-#if !TCG_OVERSIZED_GUEST
-# error "Unexpected configuration"
-#endif
- bool locked = bql_locked();
- if (!locked) {
- bql_lock();
- }
- if (ptw->out_be) {
- cur_val = ldq_be_p(host);
- if (cur_val == old_val) {
- stq_be_p(host, new_val);
- }
- } else {
- cur_val = ldq_le_p(host);
- if (cur_val == old_val) {
- stq_le_p(host, new_val);
- }
- }
- if (!locked) {
- bql_unlock();
- }
-#endif
-
return cur_val;
#else
/* AArch32 does not have FEAT_HADFS; non-TCG guests only use debug-mode. */
@@ -1131,7 +1120,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
hwaddr phys_addr;
uint32_t dacr;
bool ns;
- int user_prot;
+ ARMSecuritySpace out_space;
/* Pagetable walk. */
/* Lookup l1 descriptor. */
@@ -1223,16 +1212,19 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
g_assert_not_reached();
}
}
+ out_space = ptw->in_space;
+ if (ns) {
+ /*
+ * The NS bit will (as required by the architecture) have no effect if
+ * the CPU doesn't support TZ or this is a non-secure translation
+ * regime, because the output space will already be non-secure.
+ */
+ out_space = ARMSS_NonSecure;
+ }
if (domain_prot == 3) {
result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
} else {
- if (pxn && !regime_is_user(env, mmu_idx)) {
- xn = 1;
- }
- if (xn && access_type == MMU_INST_FETCH) {
- fi->type = ARMFault_Permission;
- goto do_fault;
- }
+ int user_rw, prot_rw;
if (arm_feature(env, ARM_FEATURE_V6K) &&
(regime_sctlr(env, mmu_idx) & SCTLR_AFE)) {
@@ -1242,37 +1234,23 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
fi->type = ARMFault_AccessFlag;
goto do_fault;
}
- result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
- user_prot = simple_ap_to_rw_prot_is_user(ap >> 1, 1);
+ prot_rw = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
+ user_rw = simple_ap_to_rw_prot_is_user(ap >> 1, 1);
} else {
- result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
- user_prot = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1);
- }
- if (result->f.prot && !xn) {
- result->f.prot |= PAGE_EXEC;
+ prot_rw = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
+ user_rw = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1);
}
+
+ result->f.prot = get_S1prot(env, mmu_idx, false, user_rw, prot_rw,
+ xn, pxn, result->f.attrs.space, out_space);
if (!(result->f.prot & (1 << access_type))) {
/* Access permission fault. */
fi->type = ARMFault_Permission;
goto do_fault;
}
- if (regime_is_pan(env, mmu_idx) &&
- !regime_is_user(env, mmu_idx) &&
- user_prot &&
- access_type != MMU_INST_FETCH) {
- /* Privileged Access Never fault */
- fi->type = ARMFault_Permission;
- goto do_fault;
- }
- }
- if (ns) {
- /* The NS bit will (as required by the architecture) have no effect if
- * the CPU doesn't support TZ or this is a non-secure translation
- * regime, because the attribute will already be non-secure.
- */
- result->f.attrs.secure = false;
- result->f.attrs.space = ARMSS_NonSecure;
}
+ result->f.attrs.space = out_space;
+ result->f.attrs.secure = arm_space_is_secure(out_space);
result->f.phys_addr = phys_addr;
return false;
do_fault:
@@ -1340,25 +1318,24 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
* @env: CPUARMState
* @mmu_idx: MMU index indicating required translation regime
* @is_aa64: TRUE if AArch64
- * @ap: The 2-bit simple AP (AP[2:1])
+ * @user_rw: Translated AP for user access
+ * @prot_rw: Translated AP for privileged access
* @xn: XN (execute-never) bit
* @pxn: PXN (privileged execute-never) bit
* @in_pa: The original input pa space
* @out_pa: The output pa space, modified by NSTable, NS, and NSE
*/
static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
- int ap, int xn, int pxn,
+ int user_rw, int prot_rw, int xn, int pxn,
ARMSecuritySpace in_pa, ARMSecuritySpace out_pa)
{
ARMCPU *cpu = env_archcpu(env);
bool is_user = regime_is_user(env, mmu_idx);
- int prot_rw, user_rw;
bool have_wxn;
int wxn = 0;
assert(!regime_is_stage2(mmu_idx));
- user_rw = simple_ap_to_rw_prot_is_user(ap, true);
if (is_user) {
prot_rw = user_rw;
} else {
@@ -1376,8 +1353,6 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
regime_is_pan(env, mmu_idx) &&
(regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) {
prot_rw = 0;
- } else {
- prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
}
}
@@ -1669,12 +1644,13 @@ static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw)
* @ptw: Current and next stage parameters for the walk.
* @address: virtual address to get physical address for
* @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
+ * @memop: memory operation feeding this access, or 0 for none
* @result: set on translation success,
* @fi: set to fault info if the translation fails
*/
static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
uint64_t address,
- MMUAccessType access_type,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
{
ARMCPU *cpu = env_archcpu(env);
@@ -1684,7 +1660,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
uint64_t ttbr;
hwaddr descaddr, indexmask, indexmask_grainsize;
uint32_t tableattrs;
- target_ulong page_size;
+ uint64_t page_size;
uint64_t attrs;
int32_t stride;
int addrsize, inputsize, outputsize;
@@ -1727,7 +1703,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* ID_AA64MMFR0 is a read-only register so values outside of the
* supported mappings can be considered an implementation error.
*/
- ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
+ ps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE);
ps = MIN(ps, param.ps);
assert(ps < ARRAY_SIZE(pamax_map));
outputsize = pamax_map[ps];
@@ -1757,7 +1733,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* validation to do here.
*/
if (inputsize < addrsize) {
- target_ulong top_bits = sextract64(address, inputsize,
+ uint64_t top_bits = sextract64(address, inputsize,
addrsize - inputsize);
if (-top_bits != param.select) {
/* The gap between the two regions is a Translation fault */
@@ -2013,8 +1989,21 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
xn = extract64(attrs, 53, 2);
result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0);
}
+
+ result->cacheattrs.is_s2_format = true;
+ result->cacheattrs.attrs = extract32(attrs, 2, 4);
+ /*
+ * Security state does not really affect HCR_EL2.FWB;
+ * we only need to filter FWB for aa32 or other FEAT.
+ */
+ device = S2_attrs_are_device(arm_hcr_el2_eff(env),
+ result->cacheattrs.attrs);
} else {
int nse, ns = extract32(attrs, 5, 1);
+ uint8_t attrindx;
+ uint64_t mair;
+ int user_rw, prot_rw;
+
switch (out_space) {
case ARMSS_Root:
/*
@@ -2080,12 +2069,58 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
xn = 0;
ap &= ~1;
}
+
+ user_rw = simple_ap_to_rw_prot_is_user(ap, true);
+ prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
/*
* Note that we modified ptw->in_space earlier for NSTable, but
* result->f.attrs retains a copy of the original security space.
*/
- result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, xn, pxn,
- result->f.attrs.space, out_space);
+ result->f.prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw,
+ xn, pxn, result->f.attrs.space, out_space);
+
+ /* Index into MAIR registers for cache attributes */
+ attrindx = extract32(attrs, 2, 3);
+ mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
+ assert(attrindx <= 7);
+ result->cacheattrs.is_s2_format = false;
+ result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8);
+
+ /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */
+ if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) {
+ result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */
+ }
+ device = S1_attrs_are_device(result->cacheattrs.attrs);
+ }
+
+ /*
+ * Enable alignment checks on Device memory.
+ *
+ * Per R_XCHFJ, the correct ordering for alignment, permission,
+ * and stage 2 faults is:
+ * - Alignment fault caused by the memory type
+ * - Permission fault
+ * - A stage 2 fault on the memory access
+ * Perform the alignment check now, so that we recognize it in
+ * the correct order. Set TLB_CHECK_ALIGNED so that any subsequent
+ * softmmu tlb hit will also check the alignment; clear along the
+ * non-device path so that tlb_fill_flags is consistent in the
+ * event of restart_atomic_update.
+ *
+ * In v7, for a CPU without the Virtualization Extensions this
+ * access is UNPREDICTABLE; we choose to make it take the alignment
+ * fault as is required for a v7VE CPU. (QEMU doesn't emulate any
+ * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.)
+ */
+ if (device) {
+ unsigned a_bits = memop_atomicity_bits(memop);
+ if (address & ((1 << a_bits) - 1)) {
+ fi->type = ARMFault_Alignment;
+ goto do_fault;
+ }
+ result->f.tlb_fill_flags = TLB_CHECK_ALIGNED;
+ } else {
+ result->f.tlb_fill_flags = 0;
}
if (!(result->f.prot & (1 << access_type))) {
@@ -2115,51 +2150,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
result->f.attrs.space = out_space;
result->f.attrs.secure = arm_space_is_secure(out_space);
- if (regime_is_stage2(mmu_idx)) {
- result->cacheattrs.is_s2_format = true;
- result->cacheattrs.attrs = extract32(attrs, 2, 4);
- /*
- * Security state does not really affect HCR_EL2.FWB;
- * we only need to filter FWB for aa32 or other FEAT.
- */
- device = S2_attrs_are_device(arm_hcr_el2_eff(env),
- result->cacheattrs.attrs);
- } else {
- /* Index into MAIR registers for cache attributes */
- uint8_t attrindx = extract32(attrs, 2, 3);
- uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
- assert(attrindx <= 7);
- result->cacheattrs.is_s2_format = false;
- result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8);
-
- /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */
- if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) {
- result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */
- }
- device = S1_attrs_are_device(result->cacheattrs.attrs);
- }
-
- /*
- * Enable alignment checks on Device memory.
- *
- * Per R_XCHFJ, this check is mis-ordered. The correct ordering
- * for alignment, permission, and stage 2 faults should be:
- * - Alignment fault caused by the memory type
- * - Permission fault
- * - A stage 2 fault on the memory access
- * but due to the way the TCG softmmu TLB operates, we will have
- * implicitly done the permission check and the stage2 lookup in
- * finding the TLB entry, so the alignment check cannot be done sooner.
- *
- * In v7, for a CPU without the Virtualization Extensions this
- * access is UNPREDICTABLE; we choose to make it take the alignment
- * fault as is required for a v7VE CPU. (QEMU doesn't emulate any
- * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.)
- */
- if (device) {
- result->f.tlb_fill_flags |= TLB_CHECK_ALIGNED;
- }
-
/*
* For FEAT_LPA2 and effective DS, the SH field in the attributes
* was re-purposed for output address bits. The SH attribute in
@@ -3202,7 +3192,7 @@ static ARMCacheAttrs combine_cacheattrs(uint64_t hcr,
*/
static bool get_phys_addr_disabled(CPUARMState *env,
S1Translate *ptw,
- target_ulong address,
+ vaddr address,
MMUAccessType access_type,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
@@ -3285,8 +3275,8 @@ static bool get_phys_addr_disabled(CPUARMState *env,
}
static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
- target_ulong address,
- MMUAccessType access_type,
+ vaddr address,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
{
@@ -3298,7 +3288,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
ARMSecuritySpace ipa_space;
uint64_t hcr;
- ret = get_phys_addr_nogpc(env, ptw, address, access_type, result, fi);
+ ret = get_phys_addr_nogpc(env, ptw, address, access_type,
+ memop, result, fi);
/* If S1 fails, return early. */
if (ret) {
@@ -3324,7 +3315,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
cacheattrs1 = result->cacheattrs;
memset(result, 0, sizeof(*result));
- ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, result, fi);
+ ret = get_phys_addr_nogpc(env, ptw, ipa, access_type,
+ memop, result, fi);
fi->s2addr = ipa;
/* Combine the S1 and S2 perms. */
@@ -3390,8 +3382,8 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
}
static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
- target_ulong address,
- MMUAccessType access_type,
+ vaddr address,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
{
@@ -3454,7 +3446,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
if (arm_feature(env, ARM_FEATURE_EL2) &&
!regime_translation_disabled(env, ARMMMUIdx_Stage2, ptw->in_space)) {
return get_phys_addr_twostage(env, ptw, address, access_type,
- result, fi);
+ memop, result, fi);
}
/* fall through */
@@ -3517,7 +3509,8 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
}
if (regime_using_lpae_format(env, mmu_idx)) {
- return get_phys_addr_lpae(env, ptw, address, access_type, result, fi);
+ return get_phys_addr_lpae(env, ptw, address, access_type,
+ memop, result, fi);
} else if (arm_feature(env, ARM_FEATURE_V7) ||
regime_sctlr(env, mmu_idx) & SCTLR_XP) {
return get_phys_addr_v6(env, ptw, address, access_type, result, fi);
@@ -3527,12 +3520,13 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
}
static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
- target_ulong address,
- MMUAccessType access_type,
+ vaddr address,
+ MMUAccessType access_type, MemOp memop,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
{
- if (get_phys_addr_nogpc(env, ptw, address, access_type, result, fi)) {
+ if (get_phys_addr_nogpc(env, ptw, address, access_type,
+ memop, result, fi)) {
return true;
}
if (!granule_protection_check(env, result->f.phys_addr,
@@ -3543,8 +3537,8 @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
return false;
}
-bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address,
- MMUAccessType access_type,
+bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address,
+ MMUAccessType access_type, MemOp memop,
ARMMMUIdx mmu_idx, ARMSecuritySpace space,
GetPhysAddrResult *result,
ARMMMUFaultInfo *fi)
@@ -3553,16 +3547,13 @@ bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address,
.in_mmu_idx = mmu_idx,
.in_space = space,
};
- return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi);
+ return get_phys_addr_nogpc(env, &ptw, address, access_type,
+ memop, result, fi);
}
-bool get_phys_addr(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+static ARMSecuritySpace
+arm_mmu_idx_to_security_space(CPUARMState *env, ARMMMUIdx mmu_idx)
{
- S1Translate ptw = {
- .in_mmu_idx = mmu_idx,
- };
ARMSecuritySpace ss;
switch (mmu_idx) {
@@ -3604,6 +3595,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
ss = ARMSS_Secure;
break;
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
if (arm_feature(env, ARM_FEATURE_AARCH64) &&
cpu_isar_feature(aa64_rme, env_archcpu(env))) {
ss = ARMSS_Root;
@@ -3621,27 +3614,33 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
g_assert_not_reached();
}
- ptw.in_space = ss;
- return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi);
+ return ss;
}
-hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
- MemTxAttrs *attrs)
+bool get_phys_addr(CPUARMState *env, vaddr address,
+ MMUAccessType access_type, MemOp memop, ARMMMUIdx mmu_idx,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- ARMMMUIdx mmu_idx = arm_mmu_idx(env);
- ARMSecuritySpace ss = arm_security_space(env);
S1Translate ptw = {
.in_mmu_idx = mmu_idx,
- .in_space = ss,
+ .in_space = arm_mmu_idx_to_security_space(env, mmu_idx),
+ };
+
+ return get_phys_addr_gpc(env, &ptw, address, access_type,
+ memop, result, fi);
+}
+
+static hwaddr arm_cpu_get_phys_page(CPUARMState *env, vaddr addr,
+ MemTxAttrs *attrs, ARMMMUIdx mmu_idx)
+{
+ S1Translate ptw = {
+ .in_mmu_idx = mmu_idx,
+ .in_space = arm_mmu_idx_to_security_space(env, mmu_idx),
.in_debug = true,
};
GetPhysAddrResult res = {};
ARMMMUFaultInfo fi = {};
- bool ret;
-
- ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi);
+ bool ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, 0, &res, &fi);
*attrs = res.f.attrs;
if (ret) {
@@ -3649,3 +3648,33 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
}
return res.f.phys_addr;
}
+
+hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
+ MemTxAttrs *attrs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+
+ hwaddr res = arm_cpu_get_phys_page(env, addr, attrs, mmu_idx);
+
+ if (res != -1) {
+ return res;
+ }
+
+ /*
+ * Memory may be accessible for an "unprivileged load/store" variant.
+ * In this case, get_a64_user_mem_index function generates an op using an
+ * unprivileged mmu idx, so we need to try with it.
+ */
+ switch (mmu_idx) {
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E10_0);
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return arm_cpu_get_phys_page(env, addr, attrs, ARMMMUIdx_E20_0);
+ default:
+ return -1;
+ }
+}
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
index 152b172..5e5166c 100644
--- a/target/arm/tcg-stubs.c
+++ b/target/arm/tcg-stubs.c
@@ -21,7 +21,30 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
{
g_assert_not_reached();
}
-/* Temporarily while cpu_get_tb_cpu_state() is still in common code */
-void assert_hflags_rebuild_correctly(CPUARMState *env)
+
+/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */
+void define_tlb_insn_regs(ARMCPU *cpu)
+{
+}
+
+/* With KVM, we never use float_status, so these can be no-ops */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+}
+
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+}
+
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
+{
+ return 0;
+}
+
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
+{
+}
+
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
{
}
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 2b7a325..8c798cd 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -21,28 +21,40 @@
%rd 0:5
%esz_sd 22:1 !function=plus_2
+%esz_hs 22:1 !function=plus_1
%esz_hsd 22:2 !function=xor_2
%hl 11:1 21:1
%hlm 11:1 20:2
&r rn
+&rrr rd rn rm
&ri rd imm
+&rr rd rn
+&rr_sf rd rn sf
&rri_sf rd rn imm sf
+&rrr_sf rd rn rm sf
&i imm
&rr_e rd rn esz
+&rri_e rd rn imm esz
&rrr_e rd rn rm esz
&rrx_e rd rn rm idx esz
&rrrr_e rd rn rm ra esz
&qrr_e q rd rn esz
+&qrri_e q rd rn imm esz
&qrrr_e q rd rn rm esz
&qrrx_e q rd rn rm idx esz
&qrrrr_e q rd rn rm ra esz
@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
+@rr_s ........ ... ..... ...... rn:5 rd:5 &rr_e esz=2
@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3
+@rr_e ........ esz:2 . ..... ...... rn:5 rd:5 &rr_e
@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
+@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd
+@rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
+@rrr_s ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=2
@rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3
@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
@@ -54,13 +66,23 @@
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
+@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+@qrr_b . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=0
+@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
+@qrr_s . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=2
+@qrr_bh . q:1 ...... . esz:1 ...... ...... rn:5 rd:5 &qrr_e
+@qrr_hs . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_hs
+@qrr_sd . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_sd
+@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
+
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
+@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd
@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e
@qr2r_e . q:1 ...... esz:2 . ..... ...... rm:5 rd:5 &qrrr_e rn=%rd
@@ -154,7 +176,7 @@ UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32
EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1
EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0
-# Branches
+### Branches
%imm26 0:s26 !function=times_4
@branch . ..... .......................... &i imm=%imm26
@@ -238,6 +260,9 @@ WFIT 1101 0101 0000 0011 0001 0000 001 rd:5
CLREX 1101 0101 0000 0011 0011 ---- 010 11111
DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111
+# For the DSB nXS variant, types always equals MBReqTypes_All and we ignore the
+# domain bits.
+DSB_nXS 1101 0101 0000 0011 0011 -- 10 001 11111
ISB 1101 0101 0000 0011 0011 ---- 110 11111
SB 1101 0101 0000 0011 0011 0000 111 11111
@@ -284,7 +309,7 @@ HLT 1101 0100 010 ................ 000 00 @i16
# DCPS2 1101 0100 101 ................ 000 10 @i16
# DCPS3 1101 0100 101 ................ 000 11 @i16
-# Loads and stores
+### Loads and stores
&stxr rn rt rt2 rs sz lasr
&stlr rn rt sz lasr
@@ -519,7 +544,7 @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm
&ldapr_stlr_i rn rt imm sz sign ext
-@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i
+@ldapr_stlr_i .. ...... .. . imm:s9 .. rn:5 rt:5 &ldapr_stlr_i
STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0
@@ -642,6 +667,138 @@ CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
+### Data Processing (register)
+
+# Data Processing (2-source)
+
+@rrr . .......... rm:5 ...... rn:5 rd:5 &rrr
+@rrr_sf sf:1 .......... rm:5 ...... rn:5 rd:5 &rrr_sf
+
+UDIV . 00 11010110 ..... 00001 0 ..... ..... @rrr_sf
+SDIV . 00 11010110 ..... 00001 1 ..... ..... @rrr_sf
+LSLV . 00 11010110 ..... 00100 0 ..... ..... @rrr_sf
+LSRV . 00 11010110 ..... 00100 1 ..... ..... @rrr_sf
+ASRV . 00 11010110 ..... 00101 0 ..... ..... @rrr_sf
+RORV . 00 11010110 ..... 00101 1 ..... ..... @rrr_sf
+
+CRC32 0 00 11010110 ..... 0100 00 ..... ..... @rrr_b
+CRC32 0 00 11010110 ..... 0100 01 ..... ..... @rrr_h
+CRC32 0 00 11010110 ..... 0100 10 ..... ..... @rrr_s
+CRC32 1 00 11010110 ..... 0100 11 ..... ..... @rrr_d
+
+CRC32C 0 00 11010110 ..... 0101 00 ..... ..... @rrr_b
+CRC32C 0 00 11010110 ..... 0101 01 ..... ..... @rrr_h
+CRC32C 0 00 11010110 ..... 0101 10 ..... ..... @rrr_s
+CRC32C 1 00 11010110 ..... 0101 11 ..... ..... @rrr_d
+
+SUBP 1 00 11010110 ..... 000000 ..... ..... @rrr
+SUBPS 1 01 11010110 ..... 000000 ..... ..... @rrr
+IRG 1 00 11010110 ..... 000100 ..... ..... @rrr
+GMI 1 00 11010110 ..... 000101 ..... ..... @rrr
+
+PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr
+
+# Data Processing (1-source)
+
+@rr . .......... ..... ...... rn:5 rd:5 &rr
+@rr_sf sf:1 .......... ..... ...... rn:5 rd:5 &rr_sf
+
+RBIT . 10 11010110 00000 000000 ..... ..... @rr_sf
+REV16 . 10 11010110 00000 000001 ..... ..... @rr_sf
+REV32 . 10 11010110 00000 000010 ..... ..... @rr_sf
+REV64 1 10 11010110 00000 000011 ..... ..... @rr
+
+CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf
+CLS . 10 11010110 00000 000101 ..... ..... @rr_sf
+
+&pacaut rd rn z
+@pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut
+
+PACIA 1 10 11010110 00001 00.000 ..... ..... @pacaut
+PACIB 1 10 11010110 00001 00.001 ..... ..... @pacaut
+PACDA 1 10 11010110 00001 00.010 ..... ..... @pacaut
+PACDB 1 10 11010110 00001 00.011 ..... ..... @pacaut
+
+AUTIA 1 10 11010110 00001 00.100 ..... ..... @pacaut
+AUTIB 1 10 11010110 00001 00.101 ..... ..... @pacaut
+AUTDA 1 10 11010110 00001 00.110 ..... ..... @pacaut
+AUTDB 1 10 11010110 00001 00.111 ..... ..... @pacaut
+
+XPACI 1 10 11010110 00001 010000 11111 rd:5
+XPACD 1 10 11010110 00001 010001 11111 rd:5
+
+# Logical (shifted reg)
+
+&logic_shift rd rn rm sf sa st n
+@logic_shift sf:1 .. ..... st:2 n:1 rm:5 sa:6 rn:5 rd:5 &logic_shift
+
+AND_r . 00 01010 .. . ..... ...... ..... ..... @logic_shift
+ORR_r . 01 01010 .. . ..... ...... ..... ..... @logic_shift
+EOR_r . 10 01010 .. . ..... ...... ..... ..... @logic_shift
+ANDS_r . 11 01010 .. . ..... ...... ..... ..... @logic_shift
+
+# Add/subtract (shifted reg)
+
+&addsub_shift rd rn rm sf sa st
+@addsub_shift sf:1 .. ..... st:2 . rm:5 sa:6 rn:5 rd:5 &addsub_shift
+
+ADD_r . 00 01011 .. 0 ..... ...... ..... ..... @addsub_shift
+SUB_r . 10 01011 .. 0 ..... ...... ..... ..... @addsub_shift
+ADDS_r . 01 01011 .. 0 ..... ...... ..... ..... @addsub_shift
+SUBS_r . 11 01011 .. 0 ..... ...... ..... ..... @addsub_shift
+
+# Add/subtract (extended reg)
+
+&addsub_ext rd rn rm sf sa st
+@addsub_ext sf:1 .. ........ rm:5 st:3 sa:3 rn:5 rd:5 &addsub_ext
+
+ADD_ext . 00 01011001 ..... ... ... ..... ..... @addsub_ext
+SUB_ext . 10 01011001 ..... ... ... ..... ..... @addsub_ext
+ADDS_ext . 01 01011001 ..... ... ... ..... ..... @addsub_ext
+SUBS_ext . 11 01011001 ..... ... ... ..... ..... @addsub_ext
+
+# Add/subtract (carry)
+
+ADC . 00 11010000 ..... 000000 ..... ..... @rrr_sf
+ADCS . 01 11010000 ..... 000000 ..... ..... @rrr_sf
+SBC . 10 11010000 ..... 000000 ..... ..... @rrr_sf
+SBCS . 11 11010000 ..... 000000 ..... ..... @rrr_sf
+
+# Rotate right into flags
+
+RMIF 1 01 11010000 imm:6 00001 rn:5 0 mask:4
+
+# Evaluate into flags
+
+SETF8 0 01 11010000 00000 000010 rn:5 01101
+SETF16 0 01 11010000 00000 010010 rn:5 01101
+
+# Conditional compare
+
+CCMP sf:1 op:1 1 11010010 y:5 cond:4 imm:1 0 rn:5 0 nzcv:4
+
+# Conditional select
+
+CSEL sf:1 else_inv:1 011010100 rm:5 cond:4 0 else_inc:1 rn:5 rd:5
+
+# Data Processing (3-source)
+
+&rrrr rd rn rm ra
+@rrrr . .. ........ rm:5 . ra:5 rn:5 rd:5 &rrrr
+
+MADD_w 0 00 11011000 ..... 0 ..... ..... ..... @rrrr
+MSUB_w 0 00 11011000 ..... 1 ..... ..... ..... @rrrr
+MADD_x 1 00 11011000 ..... 0 ..... ..... ..... @rrrr
+MSUB_x 1 00 11011000 ..... 1 ..... ..... ..... @rrrr
+
+SMADDL 1 00 11011001 ..... 0 ..... ..... ..... @rrrr
+SMSUBL 1 00 11011001 ..... 1 ..... ..... ..... @rrrr
+UMADDL 1 00 11011101 ..... 0 ..... ..... ..... @rrrr
+UMSUBL 1 00 11011101 ..... 1 ..... ..... ..... @rrrr
+
+SMULH 1 00 11011010 ..... 0 11111 ..... ..... @rrr
+UMULH 1 00 11011110 ..... 0 11111 ..... ..... @rrr
+
### Cryptographic AES
AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
@@ -781,6 +938,16 @@ CMEQ_s 0111 1110 111 ..... 10001 1 ..... ..... @rrr_d
SQDMULH_s 0101 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
+SQRDMLAH_s 0111 1110 ..0 ..... 10000 1 ..... ..... @rrr_e
+SQRDMLSH_s 0111 1110 ..0 ..... 10001 1 ..... ..... @rrr_e
+
+# Decode scalar x scalar as scalar x indexed, with index 0.
+SQDMULL_si 0101 1110 011 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMULL_si 0101 1110 101 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=2
+SQDMLAL_si 0101 1110 011 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMLAL_si 0101 1110 101 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=2
+SQDMLSL_si 0101 1110 011 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMLSL_si 0101 1110 101 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=2
### Advanced SIMD scalar pairwise
@@ -941,6 +1108,59 @@ MLS_v 0.10 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e
SQDMULH_v 0.00 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e
SQRDMULH_v 0.10 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e
+SQRDMLAH_v 0.10 1110 ..0 ..... 10000 1 ..... ..... @qrrr_e
+SQRDMLSH_v 0.10 1110 ..0 ..... 10001 1 ..... ..... @qrrr_e
+
+SDOT_v 0.00 1110 100 ..... 10010 1 ..... ..... @qrrr_s
+UDOT_v 0.10 1110 100 ..... 10010 1 ..... ..... @qrrr_s
+USDOT_v 0.00 1110 100 ..... 10011 1 ..... ..... @qrrr_s
+BFDOT_v 0.10 1110 010 ..... 11111 1 ..... ..... @qrrr_s
+BFMLAL_v 0.10 1110 110 ..... 11111 1 ..... ..... @qrrr_h
+BFMMLA 0110 1110 010 ..... 11101 1 ..... ..... @rrr_q1e0
+SMMLA 0100 1110 100 ..... 10100 1 ..... ..... @rrr_q1e0
+UMMLA 0110 1110 100 ..... 10100 1 ..... ..... @rrr_q1e0
+USMMLA 0100 1110 100 ..... 10101 1 ..... ..... @rrr_q1e0
+
+FCADD_90 0.10 1110 ..0 ..... 11100 1 ..... ..... @qrrr_e
+FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e
+
+FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
+
+SMULL_v 0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+UMULL_v 0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+SMLAL_v 0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+UMLAL_v 0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+SMLSL_v 0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+UMLSL_v 0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+
+SADDL_v 0.00 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+UADDL_v 0.10 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+SSUBL_v 0.00 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+USUBL_v 0.10 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+SABAL_v 0.00 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+UABAL_v 0.10 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+SABDL_v 0.00 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+UABDL_v 0.10 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+
+SQDMULL_v 0.00 1110 011 ..... 11010 0 ..... ..... @qrrr_h
+SQDMULL_v 0.00 1110 101 ..... 11010 0 ..... ..... @qrrr_s
+SQDMLAL_v 0.00 1110 011 ..... 10010 0 ..... ..... @qrrr_h
+SQDMLAL_v 0.00 1110 101 ..... 10010 0 ..... ..... @qrrr_s
+SQDMLSL_v 0.00 1110 011 ..... 10110 0 ..... ..... @qrrr_h
+SQDMLSL_v 0.00 1110 101 ..... 10110 0 ..... ..... @qrrr_s
+
+SADDW 0.00 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+UADDW 0.10 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+SSUBW 0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+USUBW 0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+
+ADDHN 0.00 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+RADDHN 0.10 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+SUBHN 0.00 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+RSUBHN 0.10 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+
+PMULL_p8 0.00 1110 001 ..... 11100 0 ..... ..... @qrrr_b
+PMULL_p64 0.00 1110 111 ..... 11100 0 ..... ..... @qrrr_b
### Advanced SIMD scalar x indexed element
@@ -966,6 +1186,21 @@ SQDMULH_si 0101 1111 10 .. .... 1100 . 0 ..... ..... @rrx_s
SQRDMULH_si 0101 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h
SQRDMULH_si 0101 1111 10 . ..... 1101 . 0 ..... ..... @rrx_s
+SQRDMLAH_si 0111 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h
+SQRDMLAH_si 0111 1111 10 .. .... 1101 . 0 ..... ..... @rrx_s
+
+SQRDMLSH_si 0111 1111 01 .. .... 1111 . 0 ..... ..... @rrx_h
+SQRDMLSH_si 0111 1111 10 .. .... 1111 . 0 ..... ..... @rrx_s
+
+SQDMULL_si 0101 1111 01 .. .... 1011 . 0 ..... ..... @rrx_h
+SQDMULL_si 0101 1111 10 . ..... 1011 . 0 ..... ..... @rrx_s
+
+SQDMLAL_si 0101 1111 01 .. .... 0011 . 0 ..... ..... @rrx_h
+SQDMLAL_si 0101 1111 10 . ..... 0011 . 0 ..... ..... @rrx_s
+
+SQDMLSL_si 0101 1111 01 .. .... 0111 . 0 ..... ..... @rrx_h
+SQDMLSL_si 0101 1111 10 . ..... 0111 . 0 ..... ..... @rrx_s
+
### Advanced SIMD vector x indexed element
FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
@@ -1004,6 +1239,47 @@ SQDMULH_vi 0.00 1111 10 . ..... 1100 . 0 ..... ..... @qrrx_s
SQRDMULH_vi 0.00 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h
SQRDMULH_vi 0.00 1111 10 . ..... 1101 . 0 ..... ..... @qrrx_s
+SQRDMLAH_vi 0.10 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h
+SQRDMLAH_vi 0.10 1111 10 .. .... 1101 . 0 ..... ..... @qrrx_s
+
+SQRDMLSH_vi 0.10 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_h
+SQRDMLSH_vi 0.10 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s
+
+SDOT_vi 0.00 1111 10 .. .... 1110 . 0 ..... ..... @qrrx_s
+UDOT_vi 0.10 1111 10 .. .... 1110 . 0 ..... ..... @qrrx_s
+SUDOT_vi 0.00 1111 00 .. .... 1111 . 0 ..... ..... @qrrx_s
+USDOT_vi 0.00 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s
+BFDOT_vi 0.00 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_s
+BFMLAL_vi 0.00 1111 11 .. .... 1111 . 0 ..... ..... @qrrx_h
+
+FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0
+FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1
+FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1
+
+SMULL_vi 0.00 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
+SMULL_vi 0.00 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
+UMULL_vi 0.10 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
+UMULL_vi 0.10 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
+
+SMLAL_vi 0.00 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
+SMLAL_vi 0.00 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
+UMLAL_vi 0.10 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
+UMLAL_vi 0.10 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
+
+SMLSL_vi 0.00 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
+SMLSL_vi 0.00 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
+UMLSL_vi 0.10 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
+UMLSL_vi 0.10 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
+
+SQDMULL_vi 0.00 1111 01 .. .... 1011 . 0 ..... ..... @qrrx_h
+SQDMULL_vi 0.00 1111 10 . ..... 1011 . 0 ..... ..... @qrrx_s
+
+SQDMLAL_vi 0.00 1111 01 .. .... 0011 . 0 ..... ..... @qrrx_h
+SQDMLAL_vi 0.00 1111 10 . ..... 0011 . 0 ..... ..... @qrrx_s
+
+SQDMLSL_vi 0.00 1111 01 .. .... 0111 . 0 ..... ..... @qrrx_h
+SQDMLSL_vi 0.00 1111 10 . ..... 0111 . 0 ..... ..... @qrrx_s
+
# Floating-point conditional select
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
@@ -1016,3 +1292,605 @@ FMADD 0001 1111 .. 0 ..... 0 ..... ..... ..... @rrrr_hsd
FMSUB 0001 1111 .. 0 ..... 1 ..... ..... ..... @rrrr_hsd
FNMADD 0001 1111 .. 1 ..... 0 ..... ..... ..... @rrrr_hsd
FNMSUB 0001 1111 .. 1 ..... 1 ..... ..... ..... @rrrr_hsd
+
+# Advanced SIMD Extract
+
+EXT_d 0010 1110 00 0 rm:5 00 imm:3 0 rn:5 rd:5
+EXT_q 0110 1110 00 0 rm:5 0 imm:4 0 rn:5 rd:5
+
+# Advanced SIMD Table Lookup
+
+TBL_TBX 0 q:1 00 1110 000 rm:5 0 len:2 tbx:1 00 rn:5 rd:5
+
+# Advanced SIMD Permute
+
+UZP1 0.00 1110 .. 0 ..... 0 001 10 ..... ..... @qrrr_e
+UZP2 0.00 1110 .. 0 ..... 0 101 10 ..... ..... @qrrr_e
+TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
+TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
+ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
+ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
+
+# Advanced SIMD Across Lanes
+
+ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
+SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
+UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
+SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
+UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
+SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
+UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
+
+FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
+FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
+
+FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
+FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
+
+FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
+FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
+
+FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
+FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
+
+# Conversion between floating-point and fixed-point (general register)
+
+&fcvt rd rn esz sf shift
+%fcvt_shift32 10:5 !function=rsub_32
+%fcvt_shift64 10:6 !function=rsub_64
+
+@fcvt32 0 ....... .. ...... 1..... rn:5 rd:5 \
+ &fcvt sf=0 esz=%esz_hsd shift=%fcvt_shift32
+@fcvt64 1 ....... .. ...... ...... rn:5 rd:5 \
+ &fcvt sf=1 esz=%esz_hsd shift=%fcvt_shift64
+
+SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt32
+SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt64
+UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt32
+UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt64
+
+FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt32
+FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt64
+FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt32
+FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt64
+
+# Conversion between floating-point and integer (general register)
+
+@icvt sf:1 ....... .. ...... ...... rn:5 rd:5 \
+ &fcvt esz=%esz_hsd shift=0
+
+SCVTF_g . 0011110 .. 100010 000000 ..... ..... @icvt
+UCVTF_g . 0011110 .. 100011 000000 ..... ..... @icvt
+
+FCVTNS_g . 0011110 .. 100000 000000 ..... ..... @icvt
+FCVTNU_g . 0011110 .. 100001 000000 ..... ..... @icvt
+FCVTPS_g . 0011110 .. 101000 000000 ..... ..... @icvt
+FCVTPU_g . 0011110 .. 101001 000000 ..... ..... @icvt
+FCVTMS_g . 0011110 .. 110000 000000 ..... ..... @icvt
+FCVTMU_g . 0011110 .. 110001 000000 ..... ..... @icvt
+FCVTZS_g . 0011110 .. 111000 000000 ..... ..... @icvt
+FCVTZU_g . 0011110 .. 111001 000000 ..... ..... @icvt
+FCVTAS_g . 0011110 .. 100100 000000 ..... ..... @icvt
+FCVTAU_g . 0011110 .. 100101 000000 ..... ..... @icvt
+
+FJCVTZS 0 0011110 01 111110 000000 ..... ..... @rr
+
+FMOV_ws 0 0011110 00 100110 000000 ..... ..... @rr
+FMOV_sw 0 0011110 00 100111 000000 ..... ..... @rr
+
+FMOV_xd 1 0011110 01 100110 000000 ..... ..... @rr
+FMOV_dx 1 0011110 01 100111 000000 ..... ..... @rr
+
+# Move to/from upper half of 128-bit
+FMOV_xu 1 0011110 10 101110 000000 ..... ..... @rr
+FMOV_ux 1 0011110 10 101111 000000 ..... ..... @rr
+
+# Half-precision allows both sf=0 and sf=1 with identical results
+FMOV_xh - 0011110 11 100110 000000 ..... ..... @rr
+FMOV_hx - 0011110 11 100111 000000 ..... ..... @rr
+
+# Floating-point data processing (1 source)
+
+FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd
+FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd
+FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd
+FSQRT_s 00011110 .. 1 000011 10000 ..... ..... @rr_hsd
+
+FRINTN_s 00011110 .. 1 001000 10000 ..... ..... @rr_hsd
+FRINTP_s 00011110 .. 1 001001 10000 ..... ..... @rr_hsd
+FRINTM_s 00011110 .. 1 001010 10000 ..... ..... @rr_hsd
+FRINTZ_s 00011110 .. 1 001011 10000 ..... ..... @rr_hsd
+FRINTA_s 00011110 .. 1 001100 10000 ..... ..... @rr_hsd
+FRINTX_s 00011110 .. 1 001110 10000 ..... ..... @rr_hsd
+FRINTI_s 00011110 .. 1 001111 10000 ..... ..... @rr_hsd
+
+BFCVT_s 00011110 01 1 000110 10000 ..... ..... @rr_s
+
+FRINT32Z_s 00011110 0. 1 010000 10000 ..... ..... @rr_sd
+FRINT32X_s 00011110 0. 1 010001 10000 ..... ..... @rr_sd
+FRINT64Z_s 00011110 0. 1 010010 10000 ..... ..... @rr_sd
+FRINT64X_s 00011110 0. 1 010011 10000 ..... ..... @rr_sd
+
+FCVT_s_ds 00011110 00 1 000101 10000 ..... ..... @rr
+FCVT_s_hs 00011110 00 1 000111 10000 ..... ..... @rr
+FCVT_s_sd 00011110 01 1 000100 10000 ..... ..... @rr
+FCVT_s_hd 00011110 01 1 000111 10000 ..... ..... @rr
+FCVT_s_sh 00011110 11 1 000100 10000 ..... ..... @rr
+FCVT_s_dh 00011110 11 1 000101 10000 ..... ..... @rr
+
+# Floating-point Immediate
+
+FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd
+
+# Floating-point Compare
+
+FCMP 00011110 .. 1 rm:5 001000 rn:5 e:1 z:1 000 esz=%esz_hsd
+
+# Floating-point Conditional Compare
+
+FCCMP 00011110 .. 1 rm:5 cond:4 01 rn:5 e:1 nzcv:4 esz=%esz_hsd
+
+# Advanced SIMD Modified Immediate / Shift by Immediate
+
+%abcdefgh 16:3 5:5
+
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%neon_rshift_i6 16:6 !function=rsub_64
+%neon_rshift_i5 16:5 !function=rsub_32
+%neon_rshift_i4 16:4 !function=rsub_16
+%neon_rshift_i3 16:3 !function=rsub_8
+
+@q_shri_b . q:1 .. ..... 0001 ... ..... . rn:5 rd:5 \
+ &qrri_e esz=0 imm=%neon_rshift_i3
+@q_shri_h . q:1 .. ..... 001 .... ..... . rn:5 rd:5 \
+ &qrri_e esz=1 imm=%neon_rshift_i4
+@q_shri_s . q:1 .. ..... 01 ..... ..... . rn:5 rd:5 \
+ &qrri_e esz=2 imm=%neon_rshift_i5
+@q_shri_d . 1 .. ..... 1 ...... ..... . rn:5 rd:5 \
+ &qrri_e esz=3 imm=%neon_rshift_i6 q=1
+
+@q_shli_b . q:1 .. ..... 0001 imm:3 ..... . rn:5 rd:5 &qrri_e esz=0
+@q_shli_h . q:1 .. ..... 001 imm:4 ..... . rn:5 rd:5 &qrri_e esz=1
+@q_shli_s . q:1 .. ..... 01 imm:5 ..... . rn:5 rd:5 &qrri_e esz=2
+@q_shli_d . 1 .. ..... 1 imm:6 ..... . rn:5 rd:5 &qrri_e esz=3 q=1
+
+FMOVI_v_h 0 q:1 00 1111 00000 ... 1111 11 ..... rd:5 %abcdefgh
+
+# MOVI, MVNI, ORR, BIC, FMOV are all intermixed via cmode.
+Vimm 0 q:1 op:1 0 1111 00000 ... cmode:4 01 ..... rd:5 %abcdefgh
+
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_b
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_h
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_s
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_d
+
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_b
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_h
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_s
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_d
+
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_b
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_h
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_s
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_d
+
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_b
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_h
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_s
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_d
+
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_b
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_h
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_s
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_d
+
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_b
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_h
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_s
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_d
+
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_b
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_h
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_s
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_d
+
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_b
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_h
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_s
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_d
+
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_b
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_h
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_s
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_d
+
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_b
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_h
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_s
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_d
+
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_b
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_h
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_s
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_d
+
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_b
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_h
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_s
+
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_b
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_h
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_s
+
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_b
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_h
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_s
+
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_b
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_h
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_s
+
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_b
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_h
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_s
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_b
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_h
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_s
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_b
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_h
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_s
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_d
+
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_b
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_h
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_s
+
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_b
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_h
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_s
+
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_b
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_h
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_s
+
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_b
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_h
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_s
+
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_b
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_h
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_s
+
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_b
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_h
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_s
+
+# Advanced SIMD scalar shift by immediate
+
+@shri_b .... ..... 0001 ... ..... . rn:5 rd:5 \
+ &rri_e esz=0 imm=%neon_rshift_i3
+@shri_h .... ..... 001 .... ..... . rn:5 rd:5 \
+ &rri_e esz=1 imm=%neon_rshift_i4
+@shri_s .... ..... 01 ..... ..... . rn:5 rd:5 \
+ &rri_e esz=2 imm=%neon_rshift_i5
+@shri_d .... ..... 1 ...... ..... . rn:5 rd:5 \
+ &rri_e esz=3 imm=%neon_rshift_i6
+
+@shli_b .... ..... 0001 imm:3 ..... . rn:5 rd:5 &rri_e esz=0
+@shli_h .... ..... 001 imm:4 ..... . rn:5 rd:5 &rri_e esz=1
+@shli_s .... ..... 01 imm:5 ..... . rn:5 rd:5 &rri_e esz=2
+@shli_d .... ..... 1 imm:6 ..... . rn:5 rd:5 &rri_e esz=3
+
+SSHR_s 0101 11110 .... ... 00000 1 ..... ..... @shri_d
+USHR_s 0111 11110 .... ... 00000 1 ..... ..... @shri_d
+SSRA_s 0101 11110 .... ... 00010 1 ..... ..... @shri_d
+USRA_s 0111 11110 .... ... 00010 1 ..... ..... @shri_d
+SRSHR_s 0101 11110 .... ... 00100 1 ..... ..... @shri_d
+URSHR_s 0111 11110 .... ... 00100 1 ..... ..... @shri_d
+SRSRA_s 0101 11110 .... ... 00110 1 ..... ..... @shri_d
+URSRA_s 0111 11110 .... ... 00110 1 ..... ..... @shri_d
+SRI_s 0111 11110 .... ... 01000 1 ..... ..... @shri_d
+
+SHL_s 0101 11110 .... ... 01010 1 ..... ..... @shli_d
+SLI_s 0111 11110 .... ... 01010 1 ..... ..... @shli_d
+
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_b
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_h
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_s
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_d
+
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_b
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_h
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_s
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_d
+
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_b
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_h
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_s
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_d
+
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_b
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_h
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_s
+
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_b
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_h
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_s
+
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_b
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_h
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_s
+
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_b
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_h
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_s
+
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_b
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_h
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_s
+
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s
+
+# Advanced SIMD scalar two-register miscellaneous
+
+SQABS_s 0101 1110 ..1 00000 01111 0 ..... ..... @rr_e
+SQNEG_s 0111 1110 ..1 00000 01111 0 ..... ..... @rr_e
+ABS_s 0101 1110 111 00000 10111 0 ..... ..... @rr
+NEG_s 0111 1110 111 00000 10111 0 ..... ..... @rr
+CMGT0_s 0101 1110 111 00000 10001 0 ..... ..... @rr
+CMGE0_s 0111 1110 111 00000 10001 0 ..... ..... @rr
+CMEQ0_s 0101 1110 111 00000 10011 0 ..... ..... @rr
+CMLE0_s 0111 1110 111 00000 10011 0 ..... ..... @rr
+CMLT0_s 0101 1110 111 00000 10101 0 ..... ..... @rr
+
+SQXTUN_s 0111 1110 ..1 00001 00101 0 ..... ..... @rr_e
+SQXTN_s 0101 1110 ..1 00001 01001 0 ..... ..... @rr_e
+UQXTN_s 0111 1110 ..1 00001 01001 0 ..... ..... @rr_e
+
+FCVTXN_s 0111 1110 011 00001 01101 0 ..... ..... @rr_s
+
+FCMGT0_s 0101 1110 111 11000 11001 0 ..... ..... @rr_h
+FCMGT0_s 0101 1110 1.1 00000 11001 0 ..... ..... @rr_sd
+
+FCMGE0_s 0111 1110 111 11000 11001 0 ..... ..... @rr_h
+FCMGE0_s 0111 1110 1.1 00000 11001 0 ..... ..... @rr_sd
+
+FCMEQ0_s 0101 1110 111 11000 11011 0 ..... ..... @rr_h
+FCMEQ0_s 0101 1110 1.1 00000 11011 0 ..... ..... @rr_sd
+
+FCMLE0_s 0111 1110 111 11000 11011 0 ..... ..... @rr_h
+FCMLE0_s 0111 1110 1.1 00000 11011 0 ..... ..... @rr_sd
+
+FCMLT0_s 0101 1110 111 11000 11101 0 ..... ..... @rr_h
+FCMLT0_s 0101 1110 1.1 00000 11101 0 ..... ..... @rr_sd
+
+FRECPE_s 0101 1110 111 11001 11011 0 ..... ..... @rr_h
+FRECPE_s 0101 1110 1.1 00001 11011 0 ..... ..... @rr_sd
+
+FRECPX_s 0101 1110 111 11001 11111 0 ..... ..... @rr_h
+FRECPX_s 0101 1110 1.1 00001 11111 0 ..... ..... @rr_sd
+
+FRSQRTE_s 0111 1110 111 11001 11011 0 ..... ..... @rr_h
+FRSQRTE_s 0111 1110 1.1 00001 11011 0 ..... ..... @rr_sd
+
+@icvt_h . ....... .. ...... ...... rn:5 rd:5 \
+ &fcvt sf=0 esz=1 shift=0
+@icvt_sd . ....... .. ...... ...... rn:5 rd:5 \
+ &fcvt sf=0 esz=%esz_sd shift=0
+
+SCVTF_f 0101 1110 011 11001 11011 0 ..... ..... @icvt_h
+SCVTF_f 0101 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
+
+UCVTF_f 0111 1110 011 11001 11011 0 ..... ..... @icvt_h
+UCVTF_f 0111 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
+
+FCVTNS_f 0101 1110 011 11001 10101 0 ..... ..... @icvt_h
+FCVTNS_f 0101 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
+FCVTNU_f 0111 1110 011 11001 10101 0 ..... ..... @icvt_h
+FCVTNU_f 0111 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
+
+FCVTPS_f 0101 1110 111 11001 10101 0 ..... ..... @icvt_h
+FCVTPS_f 0101 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
+FCVTPU_f 0111 1110 111 11001 10101 0 ..... ..... @icvt_h
+FCVTPU_f 0111 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
+
+FCVTMS_f 0101 1110 011 11001 10111 0 ..... ..... @icvt_h
+FCVTMS_f 0101 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
+FCVTMU_f 0111 1110 011 11001 10111 0 ..... ..... @icvt_h
+FCVTMU_f 0111 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
+
+FCVTZS_f 0101 1110 111 11001 10111 0 ..... ..... @icvt_h
+FCVTZS_f 0101 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
+FCVTZU_f 0111 1110 111 11001 10111 0 ..... ..... @icvt_h
+FCVTZU_f 0111 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
+
+FCVTAS_f 0101 1110 011 11001 11001 0 ..... ..... @icvt_h
+FCVTAS_f 0101 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
+FCVTAU_f 0111 1110 011 11001 11001 0 ..... ..... @icvt_h
+FCVTAU_f 0111 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
+
+%fcvt_f_sh_h 16:4 !function=rsub_16
+%fcvt_f_sh_s 16:5 !function=rsub_32
+%fcvt_f_sh_d 16:6 !function=rsub_64
+
+@fcvt_fixed_h .... .... . 001 .... ...... rn:5 rd:5 \
+ &fcvt sf=0 esz=1 shift=%fcvt_f_sh_h
+@fcvt_fixed_s .... .... . 01 ..... ...... rn:5 rd:5 \
+ &fcvt sf=0 esz=2 shift=%fcvt_f_sh_s
+@fcvt_fixed_d .... .... . 1 ...... ...... rn:5 rd:5 \
+ &fcvt sf=0 esz=3 shift=%fcvt_f_sh_d
+
+SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
+SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
+SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
+
+UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
+UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
+UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
+
+FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
+FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
+FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
+
+FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
+FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
+FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
+
+# Advanced SIMD two-register miscellaneous
+
+SQABS_v 0.00 1110 ..1 00000 01111 0 ..... ..... @qrr_e
+SQNEG_v 0.10 1110 ..1 00000 01111 0 ..... ..... @qrr_e
+ABS_v 0.00 1110 ..1 00000 10111 0 ..... ..... @qrr_e
+NEG_v 0.10 1110 ..1 00000 10111 0 ..... ..... @qrr_e
+CLS_v 0.00 1110 ..1 00000 01001 0 ..... ..... @qrr_e
+CLZ_v 0.10 1110 ..1 00000 01001 0 ..... ..... @qrr_e
+CNT_v 0.00 1110 001 00000 01011 0 ..... ..... @qrr_b
+NOT_v 0.10 1110 001 00000 01011 0 ..... ..... @qrr_b
+RBIT_v 0.10 1110 011 00000 01011 0 ..... ..... @qrr_b
+CMGT0_v 0.00 1110 ..1 00000 10001 0 ..... ..... @qrr_e
+CMGE0_v 0.10 1110 ..1 00000 10001 0 ..... ..... @qrr_e
+CMEQ0_v 0.00 1110 ..1 00000 10011 0 ..... ..... @qrr_e
+CMLE0_v 0.10 1110 ..1 00000 10011 0 ..... ..... @qrr_e
+CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e
+
+REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
+REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh
+REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
+
+SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
+UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e
+
+XTN 0.00 1110 ..1 00001 00101 0 ..... ..... @qrr_e
+SQXTUN_v 0.10 1110 ..1 00001 00101 0 ..... ..... @qrr_e
+SQXTN_v 0.00 1110 ..1 00001 01001 0 ..... ..... @qrr_e
+UQXTN_v 0.10 1110 ..1 00001 01001 0 ..... ..... @qrr_e
+
+FCVTN_v 0.00 1110 0.1 00001 01101 0 ..... ..... @qrr_hs
+FCVTXN_v 0.10 1110 011 00001 01101 0 ..... ..... @qrr_s
+BFCVTN_v 0.00 1110 101 00001 01101 0 ..... ..... @qrr_h
+
+SHLL_v 0.10 1110 ..1 00001 00111 0 ..... ..... @qrr_e
+
+FABS_v 0.00 1110 111 11000 11111 0 ..... ..... @qrr_h
+FABS_v 0.00 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
+
+FNEG_v 0.10 1110 111 11000 11111 0 ..... ..... @qrr_h
+FNEG_v 0.10 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
+
+FSQRT_v 0.10 1110 111 11001 11111 0 ..... ..... @qrr_h
+FSQRT_v 0.10 1110 1.1 00001 11111 0 ..... ..... @qrr_sd
+
+FRINTN_v 0.00 1110 011 11001 10001 0 ..... ..... @qrr_h
+FRINTN_v 0.00 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
+
+FRINTM_v 0.00 1110 011 11001 10011 0 ..... ..... @qrr_h
+FRINTM_v 0.00 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
+
+FRINTP_v 0.00 1110 111 11001 10001 0 ..... ..... @qrr_h
+FRINTP_v 0.00 1110 1.1 00001 10001 0 ..... ..... @qrr_sd
+
+FRINTZ_v 0.00 1110 111 11001 10011 0 ..... ..... @qrr_h
+FRINTZ_v 0.00 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
+
+FRINTA_v 0.10 1110 011 11001 10001 0 ..... ..... @qrr_h
+FRINTA_v 0.10 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
+
+FRINTX_v 0.10 1110 011 11001 10011 0 ..... ..... @qrr_h
+FRINTX_v 0.10 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
+
+FRINTI_v 0.10 1110 111 11001 10011 0 ..... ..... @qrr_h
+FRINTI_v 0.10 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
+
+FRINT32Z_v 0.00 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
+FRINT32X_v 0.10 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
+FRINT64Z_v 0.00 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
+FRINT64X_v 0.10 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
+
+SCVTF_vi 0.00 1110 011 11001 11011 0 ..... ..... @qrr_h
+SCVTF_vi 0.00 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
+
+UCVTF_vi 0.10 1110 011 11001 11011 0 ..... ..... @qrr_h
+UCVTF_vi 0.10 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
+
+FCVTNS_vi 0.00 1110 011 11001 10101 0 ..... ..... @qrr_h
+FCVTNS_vi 0.00 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
+FCVTNU_vi 0.10 1110 011 11001 10101 0 ..... ..... @qrr_h
+FCVTNU_vi 0.10 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
+
+FCVTPS_vi 0.00 1110 111 11001 10101 0 ..... ..... @qrr_h
+FCVTPS_vi 0.00 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
+FCVTPU_vi 0.10 1110 111 11001 10101 0 ..... ..... @qrr_h
+FCVTPU_vi 0.10 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
+
+FCVTMS_vi 0.00 1110 011 11001 10111 0 ..... ..... @qrr_h
+FCVTMS_vi 0.00 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
+FCVTMU_vi 0.10 1110 011 11001 10111 0 ..... ..... @qrr_h
+FCVTMU_vi 0.10 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
+
+FCVTZS_vi 0.00 1110 111 11001 10111 0 ..... ..... @qrr_h
+FCVTZS_vi 0.00 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
+FCVTZU_vi 0.10 1110 111 11001 10111 0 ..... ..... @qrr_h
+FCVTZU_vi 0.10 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
+
+FCVTAS_vi 0.00 1110 011 11001 11001 0 ..... ..... @qrr_h
+FCVTAS_vi 0.00 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
+FCVTAU_vi 0.10 1110 011 11001 11001 0 ..... ..... @qrr_h
+FCVTAU_vi 0.10 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
+
+FCMGT0_v 0.00 1110 111 11000 11001 0 ..... ..... @qrr_h
+FCMGT0_v 0.00 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
+
+FCMGE0_v 0.10 1110 111 11000 11001 0 ..... ..... @qrr_h
+FCMGE0_v 0.10 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
+
+FCMEQ0_v 0.00 1110 111 11000 11011 0 ..... ..... @qrr_h
+FCMEQ0_v 0.00 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
+
+FCMLE0_v 0.10 1110 111 11000 11011 0 ..... ..... @qrr_h
+FCMLE0_v 0.10 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
+
+FCMLT0_v 0.00 1110 111 11000 11101 0 ..... ..... @qrr_h
+FCMLT0_v 0.00 1110 1.1 00000 11101 0 ..... ..... @qrr_sd
+
+FRECPE_v 0.00 1110 111 11001 11011 0 ..... ..... @qrr_h
+FRECPE_v 0.00 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
+
+FRSQRTE_v 0.10 1110 111 11001 11011 0 ..... ..... @qrr_h
+FRSQRTE_v 0.10 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
+
+URECPE_v 0.00 1110 101 00001 11001 0 ..... ..... @qrr_s
+URSQRTE_v 0.10 1110 101 00001 11001 0 ..... ..... @qrr_s
+
+FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd
+
+&fcvt_q rd rn esz q shift
+@fcvtq_h . q:1 . ...... 001 .... ...... rn:5 rd:5 \
+ &fcvt_q esz=1 shift=%fcvt_f_sh_h
+@fcvtq_s . q:1 . ...... 01 ..... ...... rn:5 rd:5 \
+ &fcvt_q esz=2 shift=%fcvt_f_sh_s
+@fcvtq_d . q:1 . ...... 1 ...... ...... rn:5 rd:5 \
+ &fcvt_q esz=3 shift=%fcvt_f_sh_d
+
+SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_h
+SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_s
+SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_d
+
+UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_h
+UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_s
+UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_d
+
+FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_h
+FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_s
+FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_d
+
+FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_h
+FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_s
+FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_d
diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c
new file mode 100644
index 0000000..6701398
--- /dev/null
+++ b/target/arm/tcg/arith_helper.c
@@ -0,0 +1,297 @@
+/*
+ * ARM generic helpers for various arithmetical operations.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* for crc32 */
+
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
+
+/*
+ * Note that signed overflow is undefined in C. The following routines are
+ * careful to use unsigned types where modulo arithmetic is required.
+ * Failure to do so _will_ break on newer gcc.
+ */
+
+/* Signed saturating arithmetic. */
+
+/* Perform 16-bit signed saturating addition. */
+static inline uint16_t add16_sat(uint16_t a, uint16_t b)
+{
+ uint16_t res;
+
+ res = a + b;
+ if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
+ if (a & 0x8000) {
+ res = 0x8000;
+ } else {
+ res = 0x7fff;
+ }
+ }
+ return res;
+}
+
+/* Perform 8-bit signed saturating addition. */
+static inline uint8_t add8_sat(uint8_t a, uint8_t b)
+{
+ uint8_t res;
+
+ res = a + b;
+ if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
+ if (a & 0x80) {
+ res = 0x80;
+ } else {
+ res = 0x7f;
+ }
+ }
+ return res;
+}
+
+/* Perform 16-bit signed saturating subtraction. */
+static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
+{
+ uint16_t res;
+
+ res = a - b;
+ if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
+ if (a & 0x8000) {
+ res = 0x8000;
+ } else {
+ res = 0x7fff;
+ }
+ }
+ return res;
+}
+
+/* Perform 8-bit signed saturating subtraction. */
+static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
+{
+ uint8_t res;
+
+ res = a - b;
+ if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
+ if (a & 0x80) {
+ res = 0x80;
+ } else {
+ res = 0x7f;
+ }
+ }
+ return res;
+}
+
+#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
+#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
+#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
+#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
+#define PFX q
+
+#include "op_addsub.c.inc"
+
+/* Unsigned saturating arithmetic. */
+static inline uint16_t add16_usat(uint16_t a, uint16_t b)
+{
+ uint16_t res;
+ res = a + b;
+ if (res < a) {
+ res = 0xffff;
+ }
+ return res;
+}
+
+static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
+{
+ if (a > b) {
+ return a - b;
+ } else {
+ return 0;
+ }
+}
+
+static inline uint8_t add8_usat(uint8_t a, uint8_t b)
+{
+ uint8_t res;
+ res = a + b;
+ if (res < a) {
+ res = 0xff;
+ }
+ return res;
+}
+
+static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
+{
+ if (a > b) {
+ return a - b;
+ } else {
+ return 0;
+ }
+}
+
+#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
+#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
+#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
+#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
+#define PFX uq
+
+#include "op_addsub.c.inc"
+
+/* Signed modulo arithmetic. */
+#define SARITH16(a, b, n, op) do { \
+ int32_t sum; \
+ sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
+ RESULT(sum, n, 16); \
+ if (sum >= 0) \
+ ge |= 3 << (n * 2); \
+ } while (0)
+
+#define SARITH8(a, b, n, op) do { \
+ int32_t sum; \
+ sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
+ RESULT(sum, n, 8); \
+ if (sum >= 0) \
+ ge |= 1 << n; \
+ } while (0)
+
+
+#define ADD16(a, b, n) SARITH16(a, b, n, +)
+#define SUB16(a, b, n) SARITH16(a, b, n, -)
+#define ADD8(a, b, n) SARITH8(a, b, n, +)
+#define SUB8(a, b, n) SARITH8(a, b, n, -)
+#define PFX s
+#define ARITH_GE
+
+#include "op_addsub.c.inc"
+
+/* Unsigned modulo arithmetic. */
+#define ADD16(a, b, n) do { \
+ uint32_t sum; \
+ sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
+ RESULT(sum, n, 16); \
+ if ((sum >> 16) == 1) \
+ ge |= 3 << (n * 2); \
+ } while (0)
+
+#define ADD8(a, b, n) do { \
+ uint32_t sum; \
+ sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
+ RESULT(sum, n, 8); \
+ if ((sum >> 8) == 1) \
+ ge |= 1 << n; \
+ } while (0)
+
+#define SUB16(a, b, n) do { \
+ uint32_t sum; \
+ sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
+ RESULT(sum, n, 16); \
+ if ((sum >> 16) == 0) \
+ ge |= 3 << (n * 2); \
+ } while (0)
+
+#define SUB8(a, b, n) do { \
+ uint32_t sum; \
+ sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
+ RESULT(sum, n, 8); \
+ if ((sum >> 8) == 0) \
+ ge |= 1 << n; \
+ } while (0)
+
+#define PFX u
+#define ARITH_GE
+
+#include "op_addsub.c.inc"
+
+/* Halved signed arithmetic. */
+#define ADD16(a, b, n) \
+ RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
+#define SUB16(a, b, n) \
+ RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
+#define ADD8(a, b, n) \
+ RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
+#define SUB8(a, b, n) \
+ RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
+#define PFX sh
+
+#include "op_addsub.c.inc"
+
+/* Halved unsigned arithmetic. */
+#define ADD16(a, b, n) \
+ RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
+#define SUB16(a, b, n) \
+ RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
+#define ADD8(a, b, n) \
+ RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
+#define SUB8(a, b, n) \
+ RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
+#define PFX uh
+
+#include "op_addsub.c.inc"
+
+static inline uint8_t do_usad(uint8_t a, uint8_t b)
+{
+ if (a > b) {
+ return a - b;
+ } else {
+ return b - a;
+ }
+}
+
+/* Unsigned sum of absolute byte differences. */
+uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
+{
+ uint32_t sum;
+ sum = do_usad(a, b);
+ sum += do_usad(a >> 8, b >> 8);
+ sum += do_usad(a >> 16, b >> 16);
+ sum += do_usad(a >> 24, b >> 24);
+ return sum;
+}
+
+/* For ARMv6 SEL instruction. */
+uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
+{
+ uint32_t mask;
+
+ mask = 0;
+ if (flags & 1) {
+ mask |= 0xff;
+ }
+ if (flags & 2) {
+ mask |= 0xff00;
+ }
+ if (flags & 4) {
+ mask |= 0xff0000;
+ }
+ if (flags & 8) {
+ mask |= 0xff000000;
+ }
+ return (a & mask) | (b & ~mask);
+}
+
+/*
+ * CRC helpers.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+ uint8_t buf[4];
+
+ stl_le_p(buf, val);
+
+ /* zlib crc32 converts the accumulator and output to one's complement. */
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+ uint8_t buf[4];
+
+ stl_le_p(buf, val);
+
+ /* Linux crc32c converts the output to one's complement. */
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
index c059c68..eddd711 100644
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@@ -10,7 +10,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
#include "internals.h"
#if !defined(CONFIG_USER_ONLY)
@@ -19,7 +19,6 @@
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
- CPUClass *cc = CPU_GET_CLASS(cs);
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
bool ret = false;
@@ -35,7 +34,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
if (interrupt_request & CPU_INTERRUPT_HARD
&& (armv7m_nvic_can_take_pending_exception(env->nvic))) {
cs->exception_index = EXCP_IRQ;
- cc->tcg_ops->do_interrupt(cs);
+ cs->cc->tcg_ops->do_interrupt(cs);
ret = true;
}
return ret;
@@ -46,6 +45,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V6);
set_feature(&cpu->env, ARM_FEATURE_M);
@@ -59,51 +59,53 @@ static void cortex_m0_initfn(Object *obj)
* by looking at ID register fields. We use the same values as
* for the M3.
*/
- cpu->isar.id_pfr0 = 0x00000030;
- cpu->isar.id_pfr1 = 0x00000200;
- cpu->isar.id_dfr0 = 0x00100000;
+ SET_IDREG(isar, ID_PFR0, 0x00000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000200);
+ SET_IDREG(isar, ID_DFR0, 0x00100000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00000030;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x00000000;
- cpu->isar.id_mmfr3 = 0x00000000;
- cpu->isar.id_isar0 = 0x01141110;
- cpu->isar.id_isar1 = 0x02111000;
- cpu->isar.id_isar2 = 0x21112231;
- cpu->isar.id_isar3 = 0x01111110;
- cpu->isar.id_isar4 = 0x01310102;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00000030);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x00000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000000);
+ SET_IDREG(isar, ID_ISAR0, 0x01141110);
+ SET_IDREG(isar, ID_ISAR1, 0x02111000);
+ SET_IDREG(isar, ID_ISAR2, 0x21112231);
+ SET_IDREG(isar, ID_ISAR3, 0x01111110);
+ SET_IDREG(isar, ID_ISAR4, 0x01310102);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
}
static void cortex_m3_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_M);
set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
cpu->midr = 0x410fc231;
cpu->pmsav7_dregion = 8;
- cpu->isar.id_pfr0 = 0x00000030;
- cpu->isar.id_pfr1 = 0x00000200;
- cpu->isar.id_dfr0 = 0x00100000;
+ SET_IDREG(isar, ID_PFR0, 0x00000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000200);
+ SET_IDREG(isar, ID_DFR0, 0x00100000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00000030;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x00000000;
- cpu->isar.id_mmfr3 = 0x00000000;
- cpu->isar.id_isar0 = 0x01141110;
- cpu->isar.id_isar1 = 0x02111000;
- cpu->isar.id_isar2 = 0x21112231;
- cpu->isar.id_isar3 = 0x01111110;
- cpu->isar.id_isar4 = 0x01310102;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00000030);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x00000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000000);
+ SET_IDREG(isar, ID_ISAR0, 0x01141110);
+ SET_IDREG(isar, ID_ISAR1, 0x02111000);
+ SET_IDREG(isar, ID_ISAR2, 0x21112231);
+ SET_IDREG(isar, ID_ISAR3, 0x01111110);
+ SET_IDREG(isar, ID_ISAR4, 0x01310102);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
}
static void cortex_m4_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_M);
@@ -114,26 +116,27 @@ static void cortex_m4_initfn(Object *obj)
cpu->isar.mvfr0 = 0x10110021;
cpu->isar.mvfr1 = 0x11000011;
cpu->isar.mvfr2 = 0x00000000;
- cpu->isar.id_pfr0 = 0x00000030;
- cpu->isar.id_pfr1 = 0x00000200;
- cpu->isar.id_dfr0 = 0x00100000;
+ SET_IDREG(isar, ID_PFR0, 0x00000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000200);
+ SET_IDREG(isar, ID_DFR0, 0x00100000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00000030;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x00000000;
- cpu->isar.id_mmfr3 = 0x00000000;
- cpu->isar.id_isar0 = 0x01141110;
- cpu->isar.id_isar1 = 0x02111000;
- cpu->isar.id_isar2 = 0x21112231;
- cpu->isar.id_isar3 = 0x01111110;
- cpu->isar.id_isar4 = 0x01310102;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00000030);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x00000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000000);
+ SET_IDREG(isar, ID_ISAR0, 0x01141110);
+ SET_IDREG(isar, ID_ISAR1, 0x02111000);
+ SET_IDREG(isar, ID_ISAR2, 0x21112231);
+ SET_IDREG(isar, ID_ISAR3, 0x01111110);
+ SET_IDREG(isar, ID_ISAR4, 0x01310102);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
}
static void cortex_m7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_M);
@@ -144,26 +147,27 @@ static void cortex_m7_initfn(Object *obj)
cpu->isar.mvfr0 = 0x10110221;
cpu->isar.mvfr1 = 0x12000011;
cpu->isar.mvfr2 = 0x00000040;
- cpu->isar.id_pfr0 = 0x00000030;
- cpu->isar.id_pfr1 = 0x00000200;
- cpu->isar.id_dfr0 = 0x00100000;
+ SET_IDREG(isar, ID_PFR0, 0x00000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000200);
+ SET_IDREG(isar, ID_DFR0, 0x00100000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00100030;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x01000000;
- cpu->isar.id_mmfr3 = 0x00000000;
- cpu->isar.id_isar0 = 0x01101110;
- cpu->isar.id_isar1 = 0x02112000;
- cpu->isar.id_isar2 = 0x20232231;
- cpu->isar.id_isar3 = 0x01111131;
- cpu->isar.id_isar4 = 0x01310132;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00100030);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000000);
+ SET_IDREG(isar, ID_ISAR0, 0x01101110);
+ SET_IDREG(isar, ID_ISAR1, 0x02112000);
+ SET_IDREG(isar, ID_ISAR2, 0x20232231);
+ SET_IDREG(isar, ID_ISAR3, 0x01111131);
+ SET_IDREG(isar, ID_ISAR4, 0x01310132);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
}
static void cortex_m33_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_M);
@@ -176,21 +180,21 @@ static void cortex_m33_initfn(Object *obj)
cpu->isar.mvfr0 = 0x10110021;
cpu->isar.mvfr1 = 0x11000011;
cpu->isar.mvfr2 = 0x00000040;
- cpu->isar.id_pfr0 = 0x00000030;
- cpu->isar.id_pfr1 = 0x00000210;
- cpu->isar.id_dfr0 = 0x00200000;
+ SET_IDREG(isar, ID_PFR0, 0x00000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000210);
+ SET_IDREG(isar, ID_DFR0, 0x00200000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00101F40;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x01000000;
- cpu->isar.id_mmfr3 = 0x00000000;
- cpu->isar.id_isar0 = 0x01101110;
- cpu->isar.id_isar1 = 0x02212000;
- cpu->isar.id_isar2 = 0x20232232;
- cpu->isar.id_isar3 = 0x01111131;
- cpu->isar.id_isar4 = 0x01310132;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00101F40);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000000);
+ SET_IDREG(isar, ID_ISAR0, 0x01101110);
+ SET_IDREG(isar, ID_ISAR1, 0x02212000);
+ SET_IDREG(isar, ID_ISAR2, 0x20232232);
+ SET_IDREG(isar, ID_ISAR3, 0x01111131);
+ SET_IDREG(isar, ID_ISAR4, 0x01310132);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
cpu->clidr = 0x00000000;
cpu->ctr = 0x8000c000;
}
@@ -198,6 +202,7 @@ static void cortex_m33_initfn(Object *obj)
static void cortex_m55_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_V8_1M);
@@ -213,37 +218,47 @@ static void cortex_m55_initfn(Object *obj)
cpu->isar.mvfr0 = 0x10110221;
cpu->isar.mvfr1 = 0x12100211;
cpu->isar.mvfr2 = 0x00000040;
- cpu->isar.id_pfr0 = 0x20000030;
- cpu->isar.id_pfr1 = 0x00000230;
- cpu->isar.id_dfr0 = 0x10200000;
+ SET_IDREG(isar, ID_PFR0, 0x20000030);
+ SET_IDREG(isar, ID_PFR1, 0x00000230);
+ SET_IDREG(isar, ID_DFR0, 0x10200000);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00111040;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x01000000;
- cpu->isar.id_mmfr3 = 0x00000011;
- cpu->isar.id_isar0 = 0x01103110;
- cpu->isar.id_isar1 = 0x02212000;
- cpu->isar.id_isar2 = 0x20232232;
- cpu->isar.id_isar3 = 0x01111131;
- cpu->isar.id_isar4 = 0x01310132;
- cpu->isar.id_isar5 = 0x00000000;
- cpu->isar.id_isar6 = 0x00000000;
+ SET_IDREG(isar, ID_MMFR0, 0x00111040);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01000000);
+ SET_IDREG(isar, ID_MMFR3, 0x00000011);
+ SET_IDREG(isar, ID_ISAR0, 0x01103110);
+ SET_IDREG(isar, ID_ISAR1, 0x02212000);
+ SET_IDREG(isar, ID_ISAR2, 0x20232232);
+ SET_IDREG(isar, ID_ISAR3, 0x01111131);
+ SET_IDREG(isar, ID_ISAR4, 0x01310132);
+ SET_IDREG(isar, ID_ISAR5, 0x00000000);
+ SET_IDREG(isar, ID_ISAR6, 0x00000000);
cpu->clidr = 0x00000000; /* caches not implemented */
cpu->ctr = 0x8303c003;
}
static const TCGCPUOps arm_v7m_tcg_ops = {
+ /* ARM processors have a weak memory model */
+ .guest_default_memory_order = 0,
+ .mttcg_supported = true,
+
.initialize = arm_translate_init,
+ .translate_code = arm_translate_code,
+ .get_tb_cpu_state = arm_get_tb_cpu_state,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
.debug_excp_handler = arm_debug_excp_handler,
.restore_state_to_opc = arm_restore_state_to_opc,
+ .mmu_index = arm_cpu_mmu_index,
#ifdef CONFIG_USER_ONLY
.record_sigsegv = arm_cpu_record_sigsegv,
.record_sigbus = arm_cpu_record_sigbus,
#else
- .tlb_fill = arm_cpu_tlb_fill,
+ .tlb_fill_align = arm_cpu_tlb_fill_align,
+ .pointer_wrap = cpu_pointer_wrap_uint32,
.cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
+ .cpu_exec_halt = arm_cpu_exec_halt,
+ .cpu_exec_reset = cpu_reset,
.do_interrupt = arm_v7m_cpu_do_interrupt,
.do_transaction_failed = arm_cpu_do_transaction_failed,
.do_unaligned_access = arm_cpu_do_unaligned_access,
@@ -253,14 +268,13 @@ static const TCGCPUOps arm_v7m_tcg_ops = {
#endif /* !CONFIG_USER_ONLY */
};
-static void arm_v7m_class_init(ObjectClass *oc, void *data)
+static void arm_v7m_class_init(ObjectClass *oc, const void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
CPUClass *cc = CPU_CLASS(oc);
acc->info = data;
cc->tcg_ops = &arm_v7m_tcg_ops;
- cc->gdb_core_xml_file = "arm-m-profile.xml";
}
static const ARMCPUInfo arm_v7m_cpus[] = {
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
index bdd82d9..942b636 100644
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -10,7 +10,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
#include "internals.h"
#include "target/arm/idau.h"
#if !defined(CONFIG_USER_ONLY)
@@ -23,18 +23,19 @@
void aa32_max_features(ARMCPU *cpu)
{
uint32_t t;
+ ARMISARegisters *isar = &cpu->isar;
/* Add additional features supported by QEMU */
- t = cpu->isar.id_isar5;
+ t = GET_IDREG(isar, ID_ISAR5);
t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */
t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */
t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */
t = FIELD_DP32(t, ID_ISAR5, CRC32, 1);
t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */
t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */
- cpu->isar.id_isar5 = t;
+ SET_IDREG(isar, ID_ISAR5, t);
- t = cpu->isar.id_isar6;
+ t = GET_IDREG(isar, ID_ISAR6);
t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */
t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */
t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */
@@ -42,7 +43,7 @@ void aa32_max_features(ARMCPU *cpu)
t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */
t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */
t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */
- cpu->isar.id_isar6 = t;
+ SET_IDREG(isar, ID_ISAR6, t);
t = cpu->isar.mvfr1;
t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */
@@ -54,42 +55,64 @@ void aa32_max_features(ARMCPU *cpu)
t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */
cpu->isar.mvfr2 = t;
- t = cpu->isar.id_mmfr3;
- t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */
- cpu->isar.id_mmfr3 = t;
+ FIELD_DP32_IDREG(isar, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */
- t = cpu->isar.id_mmfr4;
+ t = GET_IDREG(isar, ID_MMFR4);
t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */
t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */
t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */
t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */
- cpu->isar.id_mmfr4 = t;
+ SET_IDREG(isar, ID_MMFR4, t);
- t = cpu->isar.id_mmfr5;
- t = FIELD_DP32(t, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */
- cpu->isar.id_mmfr5 = t;
+ FIELD_DP32_IDREG(isar, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */
- t = cpu->isar.id_pfr0;
- t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CVS2 */
+ t = GET_IDREG(isar, ID_PFR0);
+ t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CSV2 */
t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */
t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */
- cpu->isar.id_pfr0 = t;
+ SET_IDREG(isar, ID_PFR0, t);
- t = cpu->isar.id_pfr2;
+ t = GET_IDREG(isar, ID_PFR2);
t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */
t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */
- cpu->isar.id_pfr2 = t;
+ SET_IDREG(isar, ID_PFR2, t);
- t = cpu->isar.id_dfr0;
- t = FIELD_DP32(t, ID_DFR0, COPDBG, 9); /* FEAT_Debugv8p4 */
- t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */
+ t = GET_IDREG(isar, ID_DFR0);
+ t = FIELD_DP32(t, ID_DFR0, COPDBG, 10); /* FEAT_Debugv8p8 */
+ t = FIELD_DP32(t, ID_DFR0, COPSDBG, 10); /* FEAT_Debugv8p8 */
t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */
- cpu->isar.id_dfr0 = t;
-
- t = cpu->isar.id_dfr1;
- t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */
- cpu->isar.id_dfr1 = t;
+ SET_IDREG(isar, ID_DFR0, t);
+
+ /* Debug ID registers. */
+
+ /* Bit[15] is RES1, Bit[13] and Bits[11:0] are RES0. */
+ t = 0x00008000;
+ t = FIELD_DP32(t, DBGDIDR, SE_IMP, 1);
+ t = FIELD_DP32(t, DBGDIDR, NSUHD_IMP, 1);
+ t = FIELD_DP32(t, DBGDIDR, VERSION, 10); /* FEAT_Debugv8p8 */
+ t = FIELD_DP32(t, DBGDIDR, CTX_CMPS, 1);
+ t = FIELD_DP32(t, DBGDIDR, BRPS, 5);
+ t = FIELD_DP32(t, DBGDIDR, WRPS, 3);
+ cpu->isar.dbgdidr = t;
+
+ t = 0;
+ t = FIELD_DP32(t, DBGDEVID, PCSAMPLE, 3);
+ t = FIELD_DP32(t, DBGDEVID, WPADDRMASK, 1);
+ t = FIELD_DP32(t, DBGDEVID, BPADDRMASK, 15);
+ t = FIELD_DP32(t, DBGDEVID, VECTORCATCH, 0);
+ t = FIELD_DP32(t, DBGDEVID, VIRTEXTNS, 1);
+ t = FIELD_DP32(t, DBGDEVID, DOUBLELOCK, 1);
+ t = FIELD_DP32(t, DBGDEVID, AUXREGS, 0);
+ t = FIELD_DP32(t, DBGDEVID, CIDMASK, 0);
+ cpu->isar.dbgdevid = t;
+
+ /* Bits[31:4] are RES0. */
+ t = 0;
+ t = FIELD_DP32(t, DBGDEVID1, PCSROFFSET, 2);
+ cpu->isar.dbgdevid1 = t;
+
+ FIELD_DP32_IDREG(isar, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */
}
/* CPU models. These are not needed for the AArch64 linux-user build. */
@@ -112,7 +135,7 @@ static void arm926_initfn(Object *obj)
* ARMv5 does not have the ID_ISAR registers, but we can still
* set the field to indicate Jazelle support within QEMU.
*/
- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
+ FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1);
/*
* Similarly, we need to set MVFR0 fields to enable vfp and short vector
* support even though ARMv5 doesn't have this register.
@@ -154,7 +177,7 @@ static void arm1026_initfn(Object *obj)
* ARMv5 does not have the ID_ISAR registers, but we can still
* set the field to indicate Jazelle support within QEMU.
*/
- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
+ FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1);
/*
* Similarly, we need to set MVFR0 fields to enable vfp and short vector
* support even though ARMv5 doesn't have this register.
@@ -178,6 +201,7 @@ static void arm1026_initfn(Object *obj)
static void arm1136_r2_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
/*
* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an
* older core than plain "arm1136". In particular this does not
@@ -198,24 +222,25 @@ static void arm1136_r2_initfn(Object *obj)
cpu->isar.mvfr1 = 0x00000000;
cpu->ctr = 0x1dd20d2;
cpu->reset_sctlr = 0x00050078;
- cpu->isar.id_pfr0 = 0x111;
- cpu->isar.id_pfr1 = 0x1;
- cpu->isar.id_dfr0 = 0x2;
+ SET_IDREG(isar, ID_PFR0, 0x111);
+ SET_IDREG(isar, ID_PFR1, 0x1);
+ SET_IDREG(isar, ID_DFR0, 0x2);
cpu->id_afr0 = 0x3;
- cpu->isar.id_mmfr0 = 0x01130003;
- cpu->isar.id_mmfr1 = 0x10030302;
- cpu->isar.id_mmfr2 = 0x01222110;
- cpu->isar.id_isar0 = 0x00140011;
- cpu->isar.id_isar1 = 0x12002111;
- cpu->isar.id_isar2 = 0x11231111;
- cpu->isar.id_isar3 = 0x01102131;
- cpu->isar.id_isar4 = 0x141;
+ SET_IDREG(isar, ID_MMFR0, 0x01130003);
+ SET_IDREG(isar, ID_MMFR1, 0x10030302);
+ SET_IDREG(isar, ID_MMFR2, 0x01222110);
+ SET_IDREG(isar, ID_ISAR0, 0x00140011);
+ SET_IDREG(isar, ID_ISAR1, 0x12002111);
+ SET_IDREG(isar, ID_ISAR2, 0x11231111);
+ SET_IDREG(isar, ID_ISAR3, 0x01102131);
+ SET_IDREG(isar, ID_ISAR4, 0x141);
cpu->reset_auxcr = 7;
}
static void arm1136_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,arm1136";
set_feature(&cpu->env, ARM_FEATURE_V6K);
@@ -229,24 +254,25 @@ static void arm1136_initfn(Object *obj)
cpu->isar.mvfr1 = 0x00000000;
cpu->ctr = 0x1dd20d2;
cpu->reset_sctlr = 0x00050078;
- cpu->isar.id_pfr0 = 0x111;
- cpu->isar.id_pfr1 = 0x1;
- cpu->isar.id_dfr0 = 0x2;
+ SET_IDREG(isar, ID_PFR0, 0x111);
+ SET_IDREG(isar, ID_PFR1, 0x1);
+ SET_IDREG(isar, ID_DFR0, 0x2);
cpu->id_afr0 = 0x3;
- cpu->isar.id_mmfr0 = 0x01130003;
- cpu->isar.id_mmfr1 = 0x10030302;
- cpu->isar.id_mmfr2 = 0x01222110;
- cpu->isar.id_isar0 = 0x00140011;
- cpu->isar.id_isar1 = 0x12002111;
- cpu->isar.id_isar2 = 0x11231111;
- cpu->isar.id_isar3 = 0x01102131;
- cpu->isar.id_isar4 = 0x141;
+ SET_IDREG(isar, ID_MMFR0, 0x01130003);
+ SET_IDREG(isar, ID_MMFR1, 0x10030302);
+ SET_IDREG(isar, ID_MMFR2, 0x01222110);
+ SET_IDREG(isar, ID_ISAR0, 0x00140011);
+ SET_IDREG(isar, ID_ISAR1, 0x12002111);
+ SET_IDREG(isar, ID_ISAR2, 0x11231111);
+ SET_IDREG(isar, ID_ISAR3, 0x01102131);
+ SET_IDREG(isar, ID_ISAR4, 0x141);
cpu->reset_auxcr = 7;
}
static void arm1176_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,arm1176";
set_feature(&cpu->env, ARM_FEATURE_V6K);
@@ -261,24 +287,25 @@ static void arm1176_initfn(Object *obj)
cpu->isar.mvfr1 = 0x00000000;
cpu->ctr = 0x1dd20d2;
cpu->reset_sctlr = 0x00050078;
- cpu->isar.id_pfr0 = 0x111;
- cpu->isar.id_pfr1 = 0x11;
- cpu->isar.id_dfr0 = 0x33;
+ SET_IDREG(isar, ID_PFR0, 0x111);
+ SET_IDREG(isar, ID_PFR1, 0x11);
+ SET_IDREG(isar, ID_DFR0, 0x33);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x01130003;
- cpu->isar.id_mmfr1 = 0x10030302;
- cpu->isar.id_mmfr2 = 0x01222100;
- cpu->isar.id_isar0 = 0x0140011;
- cpu->isar.id_isar1 = 0x12002111;
- cpu->isar.id_isar2 = 0x11231121;
- cpu->isar.id_isar3 = 0x01102131;
- cpu->isar.id_isar4 = 0x01141;
+ SET_IDREG(isar, ID_MMFR0, 0x01130003);
+ SET_IDREG(isar, ID_MMFR1, 0x10030302);
+ SET_IDREG(isar, ID_MMFR2, 0x01222100);
+ SET_IDREG(isar, ID_ISAR0, 0x0140011);
+ SET_IDREG(isar, ID_ISAR1, 0x12002111);
+ SET_IDREG(isar, ID_ISAR2, 0x11231121);
+ SET_IDREG(isar, ID_ISAR3, 0x01102131);
+ SET_IDREG(isar, ID_ISAR4, 0x01141);
cpu->reset_auxcr = 7;
}
static void arm11mpcore_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,arm11mpcore";
set_feature(&cpu->env, ARM_FEATURE_V6K);
@@ -290,18 +317,18 @@ static void arm11mpcore_initfn(Object *obj)
cpu->isar.mvfr0 = 0x11111111;
cpu->isar.mvfr1 = 0x00000000;
cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */
- cpu->isar.id_pfr0 = 0x111;
- cpu->isar.id_pfr1 = 0x1;
- cpu->isar.id_dfr0 = 0;
+ SET_IDREG(isar, ID_PFR0, 0x111);
+ SET_IDREG(isar, ID_PFR1, 0x1);
+ SET_IDREG(isar, ID_DFR0, 0);
cpu->id_afr0 = 0x2;
- cpu->isar.id_mmfr0 = 0x01100103;
- cpu->isar.id_mmfr1 = 0x10020302;
- cpu->isar.id_mmfr2 = 0x01222000;
- cpu->isar.id_isar0 = 0x00100011;
- cpu->isar.id_isar1 = 0x12002111;
- cpu->isar.id_isar2 = 0x11221011;
- cpu->isar.id_isar3 = 0x01102131;
- cpu->isar.id_isar4 = 0x141;
+ SET_IDREG(isar, ID_MMFR0, 0x01100103);
+ SET_IDREG(isar, ID_MMFR1, 0x10020302);
+ SET_IDREG(isar, ID_MMFR2, 0x01222000);
+ SET_IDREG(isar, ID_ISAR0, 0x00100011);
+ SET_IDREG(isar, ID_ISAR1, 0x12002111);
+ SET_IDREG(isar, ID_ISAR2, 0x11221011);
+ SET_IDREG(isar, ID_ISAR3, 0x01102131);
+ SET_IDREG(isar, ID_ISAR4, 0x141);
cpu->reset_auxcr = 1;
}
@@ -315,6 +342,7 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
static void cortex_a8_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a8";
set_feature(&cpu->env, ARM_FEATURE_V7);
@@ -329,19 +357,19 @@ static void cortex_a8_initfn(Object *obj)
cpu->isar.mvfr1 = 0x00011111;
cpu->ctr = 0x82048004;
cpu->reset_sctlr = 0x00c50078;
- cpu->isar.id_pfr0 = 0x1031;
- cpu->isar.id_pfr1 = 0x11;
- cpu->isar.id_dfr0 = 0x400;
+ SET_IDREG(isar, ID_PFR0, 0x1031);
+ SET_IDREG(isar, ID_PFR1, 0x11);
+ SET_IDREG(isar, ID_DFR0, 0x400);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x31100003;
- cpu->isar.id_mmfr1 = 0x20000000;
- cpu->isar.id_mmfr2 = 0x01202000;
- cpu->isar.id_mmfr3 = 0x11;
- cpu->isar.id_isar0 = 0x00101111;
- cpu->isar.id_isar1 = 0x12112111;
- cpu->isar.id_isar2 = 0x21232031;
- cpu->isar.id_isar3 = 0x11112131;
- cpu->isar.id_isar4 = 0x00111142;
+ SET_IDREG(isar, ID_MMFR0, 0x31100003);
+ SET_IDREG(isar, ID_MMFR1, 0x20000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01202000);
+ SET_IDREG(isar, ID_MMFR3, 0x11);
+ SET_IDREG(isar, ID_ISAR0, 0x00101111);
+ SET_IDREG(isar, ID_ISAR1, 0x12112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232031);
+ SET_IDREG(isar, ID_ISAR3, 0x11112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00111142);
cpu->isar.dbgdidr = 0x15141000;
cpu->clidr = (1 << 27) | (2 << 24) | 3;
cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
@@ -384,6 +412,7 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
static void cortex_a9_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a9";
set_feature(&cpu->env, ARM_FEATURE_V7);
@@ -404,19 +433,19 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.mvfr1 = 0x01111111;
cpu->ctr = 0x80038003;
cpu->reset_sctlr = 0x00c50078;
- cpu->isar.id_pfr0 = 0x1031;
- cpu->isar.id_pfr1 = 0x11;
- cpu->isar.id_dfr0 = 0x000;
+ SET_IDREG(isar, ID_PFR0, 0x1031);
+ SET_IDREG(isar, ID_PFR1, 0x11);
+ SET_IDREG(isar, ID_DFR0, 0x000);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x00100103;
- cpu->isar.id_mmfr1 = 0x20000000;
- cpu->isar.id_mmfr2 = 0x01230000;
- cpu->isar.id_mmfr3 = 0x00002111;
- cpu->isar.id_isar0 = 0x00101111;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232041;
- cpu->isar.id_isar3 = 0x11112131;
- cpu->isar.id_isar4 = 0x00111142;
+ SET_IDREG(isar, ID_MMFR0, 0x00100103);
+ SET_IDREG(isar, ID_MMFR1, 0x20000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01230000);
+ SET_IDREG(isar, ID_MMFR3, 0x00002111);
+ SET_IDREG(isar, ID_ISAR0, 0x00101111);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232041);
+ SET_IDREG(isar, ID_ISAR3, 0x11112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00111142);
cpu->isar.dbgdidr = 0x35141000;
cpu->clidr = (1 << 27) | (1 << 24) | 3;
cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
@@ -451,6 +480,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a7";
set_feature(&cpu->env, ARM_FEATURE_V7VE);
@@ -469,23 +499,23 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.mvfr1 = 0x11111111;
cpu->ctr = 0x84448003;
cpu->reset_sctlr = 0x00c50078;
- cpu->isar.id_pfr0 = 0x00001131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x02010555;
+ SET_IDREG(isar, ID_PFR0, 0x00001131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x02010555);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10101105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01240000;
- cpu->isar.id_mmfr3 = 0x02102211;
+ SET_IDREG(isar, ID_MMFR0, 0x10101105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01240000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
/*
* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
* table 4-41 gives 0x02101110, which includes the arm div insns.
*/
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232041;
- cpu->isar.id_isar3 = 0x11112131;
- cpu->isar.id_isar4 = 0x10011142;
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232041);
+ SET_IDREG(isar, ID_ISAR3, 0x11112131);
+ SET_IDREG(isar, ID_ISAR4, 0x10011142);
cpu->isar.dbgdidr = 0x3515f005;
cpu->isar.dbgdevid = 0x01110f13;
cpu->isar.dbgdevid1 = 0x1;
@@ -500,6 +530,7 @@ static void cortex_a7_initfn(Object *obj)
static void cortex_a15_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a15";
set_feature(&cpu->env, ARM_FEATURE_V7VE);
@@ -520,19 +551,19 @@ static void cortex_a15_initfn(Object *obj)
cpu->isar.mvfr1 = 0x11111111;
cpu->ctr = 0x8444c004;
cpu->reset_sctlr = 0x00c50078;
- cpu->isar.id_pfr0 = 0x00001131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x02010555;
+ SET_IDREG(isar, ID_PFR0, 0x00001131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x02010555);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x20000000;
- cpu->isar.id_mmfr2 = 0x01240000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232041;
- cpu->isar.id_isar3 = 0x11112131;
- cpu->isar.id_isar4 = 0x10011142;
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x20000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01240000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232041);
+ SET_IDREG(isar, ID_ISAR3, 0x11112131);
+ SET_IDREG(isar, ID_ISAR4, 0x10011142);
cpu->isar.dbgdidr = 0x3515f021;
cpu->isar.dbgdevid = 0x01110f13;
cpu->isar.dbgdevid1 = 0x0;
@@ -546,9 +577,9 @@ static void cortex_a15_initfn(Object *obj)
static const ARMCPRegInfo cortexr5_cp_reginfo[] = {
/* Dummy the TCM region regs for the moment */
- { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
+ { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
.access = PL1_RW, .type = ARM_CP_CONST },
- { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
+ { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
.access = PL1_RW, .type = ARM_CP_CONST },
{ .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5,
.opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP },
@@ -557,27 +588,28 @@ static const ARMCPRegInfo cortexr5_cp_reginfo[] = {
static void cortex_r5_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_PMSA);
set_feature(&cpu->env, ARM_FEATURE_PMU);
cpu->midr = 0x411fc153; /* r1p3 */
- cpu->isar.id_pfr0 = 0x0131;
- cpu->isar.id_pfr1 = 0x001;
- cpu->isar.id_dfr0 = 0x010400;
+ SET_IDREG(isar, ID_PFR0, 0x0131);
+ SET_IDREG(isar, ID_PFR1, 0x001);
+ SET_IDREG(isar, ID_DFR0, 0x010400);
cpu->id_afr0 = 0x0;
- cpu->isar.id_mmfr0 = 0x0210030;
- cpu->isar.id_mmfr1 = 0x00000000;
- cpu->isar.id_mmfr2 = 0x01200000;
- cpu->isar.id_mmfr3 = 0x0211;
- cpu->isar.id_isar0 = 0x02101111;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232141;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x0010142;
- cpu->isar.id_isar5 = 0x0;
- cpu->isar.id_isar6 = 0x0;
+ SET_IDREG(isar, ID_MMFR0, 0x0210030);
+ SET_IDREG(isar, ID_MMFR1, 0x00000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01200000);
+ SET_IDREG(isar, ID_MMFR3, 0x0211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101111);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232141);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x0010142);
+ SET_IDREG(isar, ID_ISAR5, 0x0);
+ SET_IDREG(isar, ID_ISAR6, 0x0);
cpu->mp_is_up = true;
cpu->pmsav7_dregion = 16;
cpu->isar.reset_pmcr_el0 = 0x41151800;
@@ -692,6 +724,7 @@ static const ARMCPRegInfo cortex_r52_cp_reginfo[] = {
static void cortex_r52_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_EL2);
@@ -709,21 +742,21 @@ static void cortex_r52_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x8144c004;
cpu->reset_sctlr = 0x30c50838;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x10111001;
- cpu->isar.id_dfr0 = 0x03010006;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x10111001);
+ SET_IDREG(isar, ID_DFR0, 0x03010006);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x00211040;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01200000;
- cpu->isar.id_mmfr3 = 0xf0102211;
- cpu->isar.id_mmfr4 = 0x00000010;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232142;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x00010001;
+ SET_IDREG(isar, ID_MMFR0, 0x00211040);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01200000);
+ SET_IDREG(isar, ID_MMFR3, 0xf0102211);
+ SET_IDREG(isar, ID_MMFR4, 0x00000010);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232142);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x00010001);
cpu->isar.dbgdidr = 0x77168000;
cpu->clidr = (1 << 27) | (1 << 24) | 0x3;
cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
@@ -921,6 +954,7 @@ static void pxa270c5_initfn(Object *obj)
static void arm_max_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
/* aarch64_a57_initfn, advertising none of the aarch64 features */
cpu->dtb_compatible = "arm,cortex-a57";
@@ -940,24 +974,21 @@ static void arm_max_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x8444c004;
cpu->reset_sctlr = 0x00c50838;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x03010066;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x03010066);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10101105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x00011121;
- cpu->isar.id_isar6 = 0;
- cpu->isar.dbgdidr = 0x3516d000;
- cpu->isar.dbgdevid = 0x00110f13;
- cpu->isar.dbgdevid1 = 0x2;
+ SET_IDREG(isar, ID_MMFR0, 0x10101105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x00011121);
+ SET_IDREG(isar, ID_ISAR6, 0);
cpu->isar.reset_pmcr_el0 = 0x41013000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
@@ -1001,19 +1032,31 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "ti925t", .initfn = ti925t_initfn },
{ .name = "sa1100", .initfn = sa1100_initfn },
{ .name = "sa1110", .initfn = sa1110_initfn },
- { .name = "pxa250", .initfn = pxa250_initfn },
- { .name = "pxa255", .initfn = pxa255_initfn },
- { .name = "pxa260", .initfn = pxa260_initfn },
- { .name = "pxa261", .initfn = pxa261_initfn },
- { .name = "pxa262", .initfn = pxa262_initfn },
+ { .name = "pxa250", .initfn = pxa250_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa255", .initfn = pxa255_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa260", .initfn = pxa260_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa261", .initfn = pxa261_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa262", .initfn = pxa262_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
/* "pxa270" is an alias for "pxa270-a0" */
- { .name = "pxa270", .initfn = pxa270a0_initfn },
- { .name = "pxa270-a0", .initfn = pxa270a0_initfn },
- { .name = "pxa270-a1", .initfn = pxa270a1_initfn },
- { .name = "pxa270-b0", .initfn = pxa270b0_initfn },
- { .name = "pxa270-b1", .initfn = pxa270b1_initfn },
- { .name = "pxa270-c0", .initfn = pxa270c0_initfn },
- { .name = "pxa270-c5", .initfn = pxa270c5_initfn },
+ { .name = "pxa270", .initfn = pxa270a0_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-a0", .initfn = pxa270a0_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-a1", .initfn = pxa270a1_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-b0", .initfn = pxa270b0_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-b1", .initfn = pxa270b1_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-c0", .initfn = pxa270c0_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
+ { .name = "pxa270-c5", .initfn = pxa270c5_initfn,
+ .deprecation_note = "iwMMXt CPUs are no longer supported", },
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 0899251..937f29e2 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -29,35 +29,10 @@
#include "cpu-features.h"
#include "cpregs.h"
-static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
- unsigned cachesize)
-{
- unsigned lg_linesize = ctz32(linesize);
- unsigned sets;
-
- /*
- * The 64-bit CCSIDR_EL1 format is:
- * [55:32] number of sets - 1
- * [23:3] associativity - 1
- * [2:0] log2(linesize) - 4
- * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
- */
- assert(assoc != 0);
- assert(is_power_of_2(linesize));
- assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
-
- /* sets * associativity * linesize == cachesize. */
- sets = cachesize / (assoc * linesize);
- assert(cachesize % (assoc * linesize) == 0);
-
- return ((uint64_t)(sets - 1) << 32)
- | ((assoc - 1) << 3)
- | (lg_linesize - 4);
-}
-
static void aarch64_a35_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a35";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -74,28 +49,28 @@ static void aarch64_a35_initfn(Object *obj)
cpu->midr = 0x411fd040;
cpu->revidr = 0;
cpu->ctr = 0x84448004;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x03010066;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x03010066);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x00011121;
- cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->isar.id_aa64pfr1 = 0;
- cpu->isar.id_aa64dfr0 = 0x10305106;
- cpu->isar.id_aa64dfr1 = 0;
- cpu->isar.id_aa64isar0 = 0x00011120;
- cpu->isar.id_aa64isar1 = 0;
- cpu->isar.id_aa64mmfr0 = 0x00101122;
- cpu->isar.id_aa64mmfr1 = 0;
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x00011121);
+ SET_IDREG(isar, ID_AA64PFR0, 0x00002222);
+ SET_IDREG(isar, ID_AA64PFR1, 0);
+ SET_IDREG(isar, ID_AA64DFR0, 0x10305106);
+ SET_IDREG(isar, ID_AA64DFR1, 0);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x00011120);
+ SET_IDREG(isar, ID_AA64ISAR1, 0);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x00101122);
+ SET_IDREG(isar, ID_AA64MMFR1, 0);
cpu->clidr = 0x0a200023;
cpu->dcz_blocksize = 4;
@@ -106,9 +81,12 @@ static void aarch64_a35_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x410a3000;
/* From B2.29 Cache ID registers */
- cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
- cpu->ccsidr[2] = 0x703fe03a; /* 512KB L2 cache */
+ /* 32KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7);
+ /* 32KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 2);
+ /* 512KB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 512 * KiB, 7);
/* From B3.5 VGIC Type register */
cpu->gic_num_lrs = 4;
@@ -180,11 +158,8 @@ static bool cpu_arm_get_rme(Object *obj, Error **errp)
static void cpu_arm_set_rme(Object *obj, bool value, Error **errp)
{
ARMCPU *cpu = ARM_CPU(obj);
- uint64_t t;
- t = cpu->isar.id_aa64pfr0;
- t = FIELD_DP64(t, ID_AA64PFR0, RME, value);
- cpu->isar.id_aa64pfr0 = t;
+ FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, RME, value);
}
static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name,
@@ -221,12 +196,13 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name,
visit_type_uint32(v, name, &value, errp);
}
-static Property arm_cpu_lpa2_property =
+static const Property arm_cpu_lpa2_property =
DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true);
static void aarch64_a55_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a55";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -243,38 +219,41 @@ static void aarch64_a55_initfn(Object *obj)
cpu->clidr = 0x82000023;
cpu->ctr = 0x84448004; /* L1Ip = VIPT */
cpu->dcz_blocksize = 4; /* 64 bytes */
- cpu->isar.id_aa64dfr0 = 0x0000000010305408ull;
- cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
- cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
- cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
- cpu->isar.id_aa64pfr0 = 0x0000000010112222ull;
- cpu->isar.id_aa64pfr1 = 0x0000000000000010ull;
+ SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull);
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull);
+ SET_IDREG(isar, ID_AA64PFR0, 0x0000000010112222ull);
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_dfr0 = 0x04010088;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x01011121;
- cpu->isar.id_isar6 = 0x00000010;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_mmfr4 = 0x00021110;
- cpu->isar.id_pfr0 = 0x10010131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_pfr2 = 0x00000011;
+ SET_IDREG(isar, ID_DFR0, 0x04010088);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x01011121);
+ SET_IDREG(isar, ID_ISAR6, 0x00000010);
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_MMFR4, 0x00021110);
+ SET_IDREG(isar, ID_PFR0, 0x10010131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
cpu->midr = 0x412FD050; /* r2p0 */
cpu->revidr = 0;
/* From B2.23 CCSIDR_EL1 */
- cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */
- cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */
+ /* 32KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7);
+ /* 32KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 2);
+ /* 512KB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 512 * KiB, 7);
/* From B2.96 SCTLR_EL3 */
cpu->reset_sctlr = 0x30c50838;
@@ -296,6 +275,7 @@ static void aarch64_a55_initfn(Object *obj)
static void aarch64_a72_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a72";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -315,32 +295,35 @@ static void aarch64_a72_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x8444c004;
cpu->reset_sctlr = 0x00c50838;
- cpu->isar.id_pfr0 = 0x00000131;
- cpu->isar.id_pfr1 = 0x00011011;
- cpu->isar.id_dfr0 = 0x03010066;
+ SET_IDREG(isar, ID_PFR0, 0x00000131);
+ SET_IDREG(isar, ID_PFR1, 0x00011011);
+ SET_IDREG(isar, ID_DFR0, 0x03010066);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02102211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00011142;
- cpu->isar.id_isar5 = 0x00011121;
- cpu->isar.id_aa64pfr0 = 0x00002222;
- cpu->isar.id_aa64dfr0 = 0x10305106;
- cpu->isar.id_aa64isar0 = 0x00011120;
- cpu->isar.id_aa64mmfr0 = 0x00001124;
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02102211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00011142);
+ SET_IDREG(isar, ID_ISAR5, 0x00011121);
+ SET_IDREG(isar, ID_AA64PFR0, 0x00002222);
+ SET_IDREG(isar, ID_AA64DFR0, 0x10305106);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x00011120);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x00001124);
cpu->isar.dbgdidr = 0x3516d000;
cpu->isar.dbgdevid = 0x01110f13;
cpu->isar.dbgdevid1 = 0x2;
cpu->isar.reset_pmcr_el0 = 0x41023000;
cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
- cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */
+ /* 32KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7);
+ /* 48KB L1 dcache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 3, 64, 48 * KiB, 2);
+ /* 1MB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 64, 1 * MiB, 7);
cpu->dcz_blocksize = 4; /* 64 bytes */
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
@@ -352,6 +335,7 @@ static void aarch64_a72_initfn(Object *obj)
static void aarch64_a76_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a76";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -368,38 +352,41 @@ static void aarch64_a76_initfn(Object *obj)
cpu->clidr = 0x82000023;
cpu->ctr = 0x8444C004;
cpu->dcz_blocksize = 4;
- cpu->isar.id_aa64dfr0 = 0x0000000010305408ull;
- cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
- cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
- cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
- cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */
- cpu->isar.id_aa64pfr1 = 0x0000000000000010ull;
+ SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull),
+ SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull);
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull);
+ SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_dfr0 = 0x04010088;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x01011121;
- cpu->isar.id_isar6 = 0x00000010;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_mmfr4 = 0x00021110;
- cpu->isar.id_pfr0 = 0x10010131;
- cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
- cpu->isar.id_pfr2 = 0x00000011;
+ SET_IDREG(isar, ID_DFR0, 0x04010088);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x01011121);
+ SET_IDREG(isar, ID_ISAR6, 0x00000010);
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_MMFR4, 0x00021110);
+ SET_IDREG(isar, ID_PFR0, 0x10010131);
+ SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
cpu->midr = 0x414fd0b1; /* r4p1 */
cpu->revidr = 0;
/* From B2.18 CCSIDR_EL1 */
- cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
- cpu->ccsidr[2] = 0x707fe03a; /* 512KB L2 cache */
+ /* 64KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7);
+ /* 64KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2);
+ /* 512KB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 512 * KiB, 7);
/* From B2.93 SCTLR_EL3 */
cpu->reset_sctlr = 0x30c50838;
@@ -422,6 +409,7 @@ static void aarch64_a76_initfn(Object *obj)
static void aarch64_a64fx_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,a64fx";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -436,22 +424,25 @@ static void aarch64_a64fx_initfn(Object *obj)
cpu->revidr = 0x00000000;
cpu->ctr = 0x86668006;
cpu->reset_sctlr = 0x30000180;
- cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */
- cpu->isar.id_aa64pfr1 = 0x0000000000000000;
- cpu->isar.id_aa64dfr0 = 0x0000000010305408;
- cpu->isar.id_aa64dfr1 = 0x0000000000000000;
+ SET_IDREG(isar, ID_AA64PFR0, 0x0000000101111111); /* No RAS Extensions */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000000);
+ SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408),
+ SET_IDREG(isar, ID_AA64DFR1, 0x0000000000000000),
cpu->id_aa64afr0 = 0x0000000000000000;
cpu->id_aa64afr1 = 0x0000000000000000;
- cpu->isar.id_aa64mmfr0 = 0x0000000000001122;
- cpu->isar.id_aa64mmfr1 = 0x0000000011212100;
- cpu->isar.id_aa64mmfr2 = 0x0000000000001011;
- cpu->isar.id_aa64isar0 = 0x0000000010211120;
- cpu->isar.id_aa64isar1 = 0x0000000000010001;
- cpu->isar.id_aa64zfr0 = 0x0000000000000000;
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000001122);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000011212100);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x0000000010211120);
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000010001);
+ SET_IDREG(isar, ID_AA64ZFR0, 0x0000000000000000);
cpu->clidr = 0x0000000080000023;
- cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */
- cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */
- cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */
+ /* 64KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 7);
+ /* 64KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 2);
+ /* 8MB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 16, 256, 8 * MiB, 7);
cpu->dcz_blocksize = 6; /* 256 bytes */
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
@@ -592,6 +583,7 @@ static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu)
static void aarch64_neoverse_n1_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,neoverse-n1";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -608,38 +600,41 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
cpu->clidr = 0x82000023;
cpu->ctr = 0x8444c004;
cpu->dcz_blocksize = 4;
- cpu->isar.id_aa64dfr0 = 0x0000000110305408ull;
- cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
- cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
- cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
- cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */
- cpu->isar.id_aa64pfr1 = 0x0000000000000020ull;
+ SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull);
+ SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull);
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull);
+ SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_dfr0 = 0x04010088;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x01011121;
- cpu->isar.id_isar6 = 0x00000010;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_mmfr4 = 0x00021110;
- cpu->isar.id_pfr0 = 0x10010131;
- cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
- cpu->isar.id_pfr2 = 0x00000011;
+ SET_IDREG(isar, ID_DFR0, 0x04010088);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x01011121);
+ SET_IDREG(isar, ID_ISAR6, 0x00000010);
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_MMFR4, 0x00021110);
+ SET_IDREG(isar, ID_PFR0, 0x10010131);
+ SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
cpu->midr = 0x414fd0c1; /* r4p1 */
cpu->revidr = 0;
/* From B2.23 CCSIDR_EL1 */
- cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
- cpu->ccsidr[2] = 0x70ffe03a; /* 1MB L2 cache */
+ /* 64KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7);
+ /* 64KB L1 icache */
+ cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2);
+ /* 1MB L2 dcache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 1 * MiB, 7);
/* From B2.98 SCTLR_EL3 */
cpu->reset_sctlr = 0x30c50838;
@@ -664,6 +659,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
static void aarch64_neoverse_v1_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,neoverse-v1";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -682,32 +678,32 @@ static void aarch64_neoverse_v1_initfn(Object *obj)
cpu->dcz_blocksize = 4;
cpu->id_aa64afr0 = 0x00000000;
cpu->id_aa64afr1 = 0x00000000;
- cpu->isar.id_aa64dfr0 = 0x000001f210305519ull;
- cpu->isar.id_aa64dfr1 = 0x00000000;
- cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */
- cpu->isar.id_aa64isar1 = 0x0111000001211032ull;
- cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull;
- cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */
- cpu->isar.id_aa64pfr1 = 0x0000000000000020ull;
+ SET_IDREG(isar, ID_AA64DFR0, 0x000001f210305519ull),
+ SET_IDREG(isar, ID_AA64DFR1, 0x00000000),
+ SET_IDREG(isar, ID_AA64ISAR0, 0x1011111110212120ull); /* with FEAT_RNG */
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0011000001211032ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull),
+ SET_IDREG(isar, ID_AA64MMFR2, 0x0220011102101011ull),
+ SET_IDREG(isar, ID_AA64PFR0, 0x1101110120111112ull); /* GIC filled in later */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull);
cpu->id_afr0 = 0x00000000;
- cpu->isar.id_dfr0 = 0x15011099;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x11011121;
- cpu->isar.id_isar6 = 0x01100111;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_mmfr4 = 0x01021110;
- cpu->isar.id_pfr0 = 0x21110131;
- cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
- cpu->isar.id_pfr2 = 0x00000011;
+ SET_IDREG(isar, ID_DFR0, 0x15011099);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x11011121);
+ SET_IDREG(isar, ID_ISAR6, 0x01100111);
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_MMFR4, 0x01021110);
+ SET_IDREG(isar, ID_PFR0, 0x21110131);
+ SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
cpu->midr = 0x411FD402; /* r1p2 */
cpu->revidr = 0;
@@ -721,9 +717,12 @@ static void aarch64_neoverse_v1_initfn(Object *obj)
* L2: 8-way set associative, 64 byte line size, either 512K or 1MB.
* L3: No L3 (this matches the CLIDR_EL1 value).
*/
- cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
- cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
- cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB); /* L2 cache */
+ /* 64KB L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0);
+ /* 64KB L1 icache */
+ cpu->ccsidr[1] = cpu->ccsidr[0];
+ /* 1MB L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 1 * MiB, 0);
/* From 3.2.115 SCTLR_EL3 */
cpu->reset_sctlr = 0x30c50838;
@@ -740,7 +739,7 @@ static void aarch64_neoverse_v1_initfn(Object *obj)
cpu->isar.mvfr2 = 0x00000043;
/* From 3.7.5 ID_AA64ZFR0_EL1 */
- cpu->isar.id_aa64zfr0 = 0x0000100000100000;
+ SET_IDREG(isar, ID_AA64ZFR0, 0x0000100000100000);
cpu->sve_vq.supported = (1 << 0) /* 128bit */
| (1 << 1); /* 256bit */
@@ -887,6 +886,7 @@ static const ARMCPRegInfo cortex_a710_cp_reginfo[] = {
static void aarch64_a710_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,cortex-a710";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -902,38 +902,38 @@ static void aarch64_a710_initfn(Object *obj)
/* Ordered by Section B.4: AArch64 registers */
cpu->midr = 0x412FD471; /* r2p1 */
cpu->revidr = 0;
- cpu->isar.id_pfr0 = 0x21110131;
- cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
- cpu->isar.id_dfr0 = 0x16011099;
+ SET_IDREG(isar, ID_PFR0, 0x21110131);
+ SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */
+ SET_IDREG(isar, ID_DFR0, 0x16011099);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x11011121; /* with Crypto */
- cpu->isar.id_mmfr4 = 0x21021110;
- cpu->isar.id_isar6 = 0x01111111;
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */
+ SET_IDREG(isar, ID_MMFR4, 0x21021110);
+ SET_IDREG(isar, ID_ISAR6, 0x01111111);
cpu->isar.mvfr0 = 0x10110222;
cpu->isar.mvfr1 = 0x13211111;
cpu->isar.mvfr2 = 0x00000043;
- cpu->isar.id_pfr2 = 0x00000011;
- cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */
- cpu->isar.id_aa64pfr1 = 0x0000000000000221ull;
- cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */
- cpu->isar.id_aa64dfr0 = 0x000011f010305619ull;
- cpu->isar.id_aa64dfr1 = 0;
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
+ SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull);
+ SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */
+ SET_IDREG(isar, ID_AA64DFR0, 0x000011f010305619ull);
+ SET_IDREG(isar, ID_AA64DFR1, 0);
cpu->id_aa64afr0 = 0;
cpu->id_aa64afr1 = 0;
- cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */
- cpu->isar.id_aa64isar1 = 0x0010111101211052ull;
- cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull;
+ SET_IDREG(isar, ID_AA64ISAR0, 0x0221111110212120ull); /* with Crypto */
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0010111101211052ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101122ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x1221011110101011ull);
cpu->clidr = 0x0000001482000023ull;
cpu->gm_blocksize = 4;
cpu->ctr = 0x000000049444c004ull;
@@ -959,9 +959,12 @@ static void aarch64_a710_initfn(Object *obj)
* L1: 4-way set associative 64-byte line size, total either 32K or 64K.
* L2: 8-way set associative 64 byte line size, total either 256K or 512K.
*/
- cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
- cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
- cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */
+ /* L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0);
+ /* L1 icache */
+ cpu->ccsidr[1] = cpu->ccsidr[0];
+ /* L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 512 * KiB, 0);
/* FIXME: Not documented -- copied from neoverse-v1 */
cpu->reset_sctlr = 0x30c50838;
@@ -985,6 +988,7 @@ static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = {
static void aarch64_neoverse_n2_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
cpu->dtb_compatible = "arm,neoverse-n2";
set_feature(&cpu->env, ARM_FEATURE_V8);
@@ -1000,38 +1004,38 @@ static void aarch64_neoverse_n2_initfn(Object *obj)
/* Ordered by Section B.5: AArch64 ID registers */
cpu->midr = 0x410FD493; /* r0p3 */
cpu->revidr = 0;
- cpu->isar.id_pfr0 = 0x21110131;
- cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
- cpu->isar.id_dfr0 = 0x16011099;
+ SET_IDREG(isar, ID_PFR0, 0x21110131);
+ SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */
+ SET_IDREG(isar, ID_DFR0, 0x16011099);
cpu->id_afr0 = 0;
- cpu->isar.id_mmfr0 = 0x10201105;
- cpu->isar.id_mmfr1 = 0x40000000;
- cpu->isar.id_mmfr2 = 0x01260000;
- cpu->isar.id_mmfr3 = 0x02122211;
- cpu->isar.id_isar0 = 0x02101110;
- cpu->isar.id_isar1 = 0x13112111;
- cpu->isar.id_isar2 = 0x21232042;
- cpu->isar.id_isar3 = 0x01112131;
- cpu->isar.id_isar4 = 0x00010142;
- cpu->isar.id_isar5 = 0x11011121; /* with Crypto */
- cpu->isar.id_mmfr4 = 0x01021110;
- cpu->isar.id_isar6 = 0x01111111;
+ SET_IDREG(isar, ID_MMFR0, 0x10201105);
+ SET_IDREG(isar, ID_MMFR1, 0x40000000);
+ SET_IDREG(isar, ID_MMFR2, 0x01260000);
+ SET_IDREG(isar, ID_MMFR3, 0x02122211);
+ SET_IDREG(isar, ID_ISAR0, 0x02101110);
+ SET_IDREG(isar, ID_ISAR1, 0x13112111);
+ SET_IDREG(isar, ID_ISAR2, 0x21232042);
+ SET_IDREG(isar, ID_ISAR3, 0x01112131);
+ SET_IDREG(isar, ID_ISAR4, 0x00010142);
+ SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */
+ SET_IDREG(isar, ID_MMFR4, 0x01021110);
+ SET_IDREG(isar, ID_ISAR6, 0x01111111);
cpu->isar.mvfr0 = 0x10110222;
cpu->isar.mvfr1 = 0x13211111;
cpu->isar.mvfr2 = 0x00000043;
- cpu->isar.id_pfr2 = 0x00000011;
- cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */
- cpu->isar.id_aa64pfr1 = 0x0000000000000221ull;
- cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */
- cpu->isar.id_aa64dfr0 = 0x000011f210305619ull;
- cpu->isar.id_aa64dfr1 = 0;
+ SET_IDREG(isar, ID_PFR2, 0x00000011);
+ SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */
+ SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull);
+ SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */
+ SET_IDREG(isar, ID_AA64DFR0, 0x000011f210305619ull);
+ SET_IDREG(isar, ID_AA64DFR1, 0);
cpu->id_aa64afr0 = 0;
cpu->id_aa64afr1 = 0;
- cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */
- cpu->isar.id_aa64isar1 = 0x0011111101211052ull;
- cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull;
- cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
- cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull;
+ SET_IDREG(isar, ID_AA64ISAR0, 0x1221111110212120ull); /* with Crypto and FEAT_RNG */
+ SET_IDREG(isar, ID_AA64ISAR1, 0x0011111101211052ull);
+ SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101125ull);
+ SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull);
+ SET_IDREG(isar, ID_AA64MMFR2, 0x1221011112101011ull);
cpu->clidr = 0x0000001482000023ull;
cpu->gm_blocksize = 4;
cpu->ctr = 0x00000004b444c004ull;
@@ -1057,10 +1061,12 @@ static void aarch64_neoverse_n2_initfn(Object *obj)
* L1: 4-way set associative 64-byte line size, total 64K.
* L2: 8-way set associative 64 byte line size, total either 512K or 1024K.
*/
- cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
- cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
- cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */
-
+ /* L1 dcache */
+ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 4, 64, 64 * KiB, 0);
+ /* L1 icache */
+ cpu->ccsidr[1] = cpu->ccsidr[0];
+ /* L2 cache */
+ cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_CCIDX, 8, 64, 512 * KiB, 0);
/* FIXME: Not documented -- copied from neoverse-v1 */
cpu->reset_sctlr = 0x30c50838;
@@ -1083,6 +1089,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj)
void aarch64_max_tcg_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
+ ARMISARegisters *isar = &cpu->isar;
uint64_t t;
uint32_t u;
@@ -1133,7 +1140,7 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, CTR_EL0, DIC, 1);
cpu->ctr = t;
- t = cpu->isar.id_aa64isar0;
+ t = GET_IDREG(isar, ID_AA64ISAR0);
t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */
t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */
t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */
@@ -1148,9 +1155,9 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */
t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */
t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */
- cpu->isar.id_aa64isar0 = t;
+ SET_IDREG(isar, ID_AA64ISAR0, t);
- t = cpu->isar.id_aa64isar1;
+ t = GET_IDREG(isar, ID_AA64ISAR1);
t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */
t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED);
t = FIELD_DP64(t, ID_AA64ISAR1, API, 1);
@@ -1160,18 +1167,20 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */
t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */
t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */
- t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); /* FEAT_BF16 */
+ t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 2); /* FEAT_BF16, FEAT_EBF16 */
t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */
- cpu->isar.id_aa64isar1 = t;
+ t = FIELD_DP64(t, ID_AA64ISAR1, XS, 1); /* FEAT_XS */
+ SET_IDREG(isar, ID_AA64ISAR1, t);
- t = cpu->isar.id_aa64isar2;
+ t = GET_IDREG(isar, ID_AA64ISAR2);
+ t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */
t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
- t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
+ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */
- cpu->isar.id_aa64isar2 = t;
+ SET_IDREG(isar, ID_AA64ISAR2, t);
- t = cpu->isar.id_aa64pfr0;
+ t = GET_IDREG(isar, ID_AA64PFR0);
t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */
t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */
t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */
@@ -1180,9 +1189,9 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */
t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3); /* FEAT_CSV2_3 */
t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */
- cpu->isar.id_aa64pfr0 = t;
+ SET_IDREG(isar, ID_AA64PFR0, t);
- t = cpu->isar.id_aa64pfr1;
+ t = GET_IDREG(isar, ID_AA64PFR1);
t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */
t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */
/*
@@ -1195,9 +1204,9 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */
t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */
- cpu->isar.id_aa64pfr1 = t;
+ SET_IDREG(isar, ID_AA64PFR1, t);
- t = cpu->isar.id_aa64mmfr0;
+ t = GET_IDREG(isar, ID_AA64MMFR0);
t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */
@@ -1205,9 +1214,9 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */
t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */
t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */
- cpu->isar.id_aa64mmfr0 = t;
+ SET_IDREG(isar, ID_AA64MMFR0, t);
- t = cpu->isar.id_aa64mmfr1;
+ t = GET_IDREG(isar, ID_AA64MMFR1);
t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */
t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */
t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */
@@ -1217,10 +1226,12 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
+ t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
- cpu->isar.id_aa64mmfr1 = t;
+ t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
+ SET_IDREG(isar, ID_AA64MMFR1, t);
- t = cpu->isar.id_aa64mmfr2;
+ t = GET_IDREG(isar, ID_AA64MMFR2);
t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */
t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */
t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */
@@ -1234,31 +1245,29 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */
t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */
t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */
- cpu->isar.id_aa64mmfr2 = t;
+ SET_IDREG(isar, ID_AA64MMFR2, t);
- t = cpu->isar.id_aa64mmfr3;
- t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */
- cpu->isar.id_aa64mmfr3 = t;
+ FIELD_DP64_IDREG(isar, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */
- t = cpu->isar.id_aa64zfr0;
+ t = GET_IDREG(isar, ID_AA64ZFR0);
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */
t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */
- t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); /* FEAT_BF16 */
+ t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */
t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */
t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */
t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */
t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */
t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */
- cpu->isar.id_aa64zfr0 = t;
+ SET_IDREG(isar, ID_AA64ZFR0, t);
- t = cpu->isar.id_aa64dfr0;
- t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */
+ t = GET_IDREG(isar, ID_AA64DFR0);
+ t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 10); /* FEAT_Debugv8p8 */
t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */
t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */
- cpu->isar.id_aa64dfr0 = t;
+ SET_IDREG(isar, ID_AA64DFR0, t);
- t = cpu->isar.id_aa64smfr0;
+ t = GET_IDREG(isar, ID_AA64SMFR0);
t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
@@ -1266,7 +1275,7 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
- cpu->isar.id_aa64smfr0 = t;
+ SET_IDREG(isar, ID_AA64SMFR0, t);
/* Replicate the same data to the 32-bit id registers. */
aa32_max_features(cpu);
@@ -1312,7 +1321,7 @@ static void aarch64_cpu_register_types(void)
size_t i;
for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) {
- aarch64_cpu_register(&aarch64_cpus[i]);
+ arm_cpu_register(&aarch64_cpus[i]);
}
}
diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c
index 7cadd61..3428bd1 100644
--- a/target/arm/tcg/crypto_helper.c
+++ b/target/arm/tcg/crypto_helper.c
@@ -10,14 +10,16 @@
*/
#include "qemu/osdep.h"
+#include "qemu/bitops.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "crypto/aes-round.h"
#include "crypto/sm4.h"
#include "vec_internal.h"
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
+
union CRYPTO_STATE {
uint8_t bytes[16];
uint32_t words[4];
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 56a1dc1..01867f8 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -88,6 +88,25 @@ GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
#undef GEN_CMP0
+void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Signed shift out of range results in all-sign-bits */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+}
+
+void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Unsigned shift out of range results in all-zero-bits */
+ if (shift >= (8 << vece)) {
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+ }
+}
+
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(a, a, shift);
@@ -285,7 +304,7 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
tcg_gen_add_i32(d, d, t);
}
- void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
TCGv_i64 t = tcg_temp_new_i64();
@@ -297,10 +316,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +510,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +702,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +790,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1060,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max, zero;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,26 +1079,21 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
- * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * The choice of GE (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
* is significant so we must use an unsigned compare; otherwise we
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
- tcg_gen_andc_vec(vece, lval, lval, lsh);
- tcg_gen_andc_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval);
} else {
- tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
- tcg_gen_and_vec(vece, lval, lval, lsh);
- tcg_gen_and_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval);
}
tcg_gen_or_vec(vece, dst, lval, rval);
}
@@ -1093,7 +1103,7 @@ void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_ushl_vec,
@@ -1169,7 +1179,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,29 +1190,28 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
/* Select in-bound left shift. */
- tcg_gen_andc_vec(vece, lval, lval, tmp);
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval);
/* Select between left and right shift. */
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
@@ -1211,7 +1220,7 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_sshl_vec,
@@ -1304,6 +1313,42 @@ void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
opr_sz, max_sz, 0, fns[vece]);
}
+void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
+ gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
+ gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
+ gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
{
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
@@ -2313,3 +2358,372 @@ void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
assert(vece <= MO_32);
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
}
+
+void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni4 = gen_helper_neon_cls_s8,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_cls_s16,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_clrsb_i32,
+ .vece = MO_32 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_clz32_i32(TCGv_i32 d, TCGv_i32 n)
+{
+ tcg_gen_clzi_i32(d, n, 32);
+}
+
+void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni4 = gen_helper_neon_clz_u8,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_clz_u16,
+ .vece = MO_16 },
+ { .fni4 = gen_clz32_i32,
+ .vece = MO_32 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_cnt_b);
+}
+
+void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_rbit_b);
+}
+
+void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_rotli(MO_16, rd_ofs, rn_ofs, 8, opr_sz, max_sz);
+}
+
+static void gen_bswap32_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ tcg_gen_bswap64_i64(d, n);
+ tcg_gen_rotli_i64(d, d, 32);
+}
+
+void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g = {
+ .fni8 = gen_bswap32_i64,
+ .fni4 = tcg_gen_bswap32_i32,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_32
+ };
+
+ switch (vece) {
+ case MO_16:
+ tcg_gen_gvec_rotli(MO_32, rd_ofs, rn_ofs, 16, opr_sz, max_sz);
+ break;
+ case MO_8:
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni8 = tcg_gen_bswap64_i64,
+ .vece = MO_64 },
+ { .fni8 = tcg_gen_hswap_i64,
+ .vece = MO_64 },
+ };
+
+ switch (vece) {
+ case MO_32:
+ tcg_gen_gvec_rotli(MO_64, rd_ofs, rn_ofs, 32, opr_sz, max_sz);
+ break;
+ case MO_8:
+ case MO_16:
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void gen_saddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ int half = 4 << vece;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, n, half);
+ tcg_gen_sari_vec(vece, d, n, half);
+ tcg_gen_sari_vec(vece, t, t, half);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_saddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_ext32s_i64(t, n);
+ tcg_gen_sari_i64(d, n, 32);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_helper_neon_addlp_s8,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_helper_neon_addlp_s16,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_saddlp_s_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_sadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_saddlp_vec(vece, t, n);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_sadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_helper_neon_addlp_s8(t, n);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_sadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_helper_neon_addlp_s16(t, n);
+ tcg_gen_vec_add32_i64(d, d, t);
+}
+
+static void gen_sadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_saddlp_s_i64(t, n);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_b_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_h_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_s_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_uaddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ int half = 4 << vece;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, half));
+
+ tcg_gen_shri_vec(vece, t, n, half);
+ tcg_gen_and_vec(vece, d, n, m);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_uaddlp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0xff));
+
+ tcg_gen_shri_i64(t, n, 8);
+ tcg_gen_and_i64(d, n, m);
+ tcg_gen_and_i64(t, t, m);
+ /* No carry between widened unsigned elements. */
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaddlp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_32, 0xffff));
+
+ tcg_gen_shri_i64(t, n, 16);
+ tcg_gen_and_i64(d, n, m);
+ tcg_gen_and_i64(t, t, m);
+ /* No carry between widened unsigned elements. */
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_ext32u_i64(t, n);
+ tcg_gen_shri_i64(d, n, 32);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_b_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_h_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_s_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_uadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_uaddlp_vec(vece, t, n);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_uadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_b_i64(t, n);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_uadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_h_i64(t, n);
+ tcg_gen_vec_add32_i64(d, d, t);
+}
+
+static void gen_uadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_s_i64(t, n);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_b_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_h_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_s_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ uint64_t s_bit = 1ull << ((8 << vece) - 1);
+ tcg_gen_gvec_andi(vece, dofs, aofs, s_bit - 1, oprsz, maxsz);
+}
+
+void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ uint64_t s_bit = 1ull << ((8 << vece) - 1);
+ tcg_gen_gvec_xori(vece, dofs, aofs, s_bit, oprsz, maxsz);
+}
+
+void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_32);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_urecpe_s);
+}
+
+void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_32);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_ursqrte_s);
+}
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 0ea8668..4f618ae 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -28,12 +28,20 @@
#include "qemu/bitops.h"
#include "internals.h"
#include "qemu/crc32c.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "exec/cpu-common.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/helper-retaddr.h"
+#include "accel/tcg/probe.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
#include "qemu/int128.h"
#include "qemu/atomic128.h"
#include "fpu/softfloat.h"
-#include <zlib.h> /* For crc32 */
+#include <zlib.h> /* for crc32 */
+#ifdef CONFIG_USER_ONLY
+#include "user/page-protection.h"
+#endif
+#include "vec_internal.h"
/* C2.4.7 Multiply and divide */
/* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -130,40 +138,38 @@ static inline uint32_t float_rel_to_flags(int res)
return flags;
}
-uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
+uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status)
{
return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
}
-uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
+uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status)
{
return float_rel_to_flags(float16_compare(x, y, fp_status));
}
-uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
+uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status)
{
return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
}
-uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
+uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status)
{
return float_rel_to_flags(float32_compare(x, y, fp_status));
}
-uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
+uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status)
{
return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
}
-uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
+uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status)
{
return float_rel_to_flags(float64_compare(x, y, fp_status));
}
-float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
+float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst)
{
- float_status *fpst = fpstp;
-
a = float32_squash_input_denormal(a, fpst);
b = float32_squash_input_denormal(b, fpst);
@@ -176,10 +182,8 @@ float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
return float32_mul(a, b, fpst);
}
-float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
+float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst)
{
- float_status *fpst = fpstp;
-
a = float64_squash_input_denormal(a, fpst);
b = float64_squash_input_denormal(b, fpst);
@@ -193,184 +197,71 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
}
/* 64bit/double versions of the neon float compare functions */
-uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float64_eq_quiet(a, b, fpst);
}
-uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float64_le(b, a, fpst);
}
-uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float64_lt(b, a, fpst);
}
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+/*
+ * Reciprocal step and sqrt step. Note that unlike the A32/T32
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
+ * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
*/
-
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_two;
- }
- return float16_muladd(a, b, float16_two, 0, fpst);
-}
-
-float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_two;
- }
- return float32_muladd(a, b, float32_two, 0, fpst);
-}
-
-float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_two;
- }
- return float64_muladd(a, b, float64_two, 0, fpst);
-}
-
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_one_point_five;
- }
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
-}
-
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_one_point_five;
- }
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
-}
-
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_one_point_five;
- }
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
-}
-
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
- uint64_t nsignmask = 0x0080008000800080ULL;
- uint64_t wsignmask = 0x8000800080008000ULL;
- uint64_t elementmask = 0x00ff00ff00ff00ffULL;
- uint64_t tmp1, tmp2;
- uint64_t res, signres;
-
- /* Extract odd elements, sign extend each to a 16 bit field */
- tmp1 = a & elementmask;
- tmp1 ^= nsignmask;
- tmp1 |= wsignmask;
- tmp1 = (tmp1 - nsignmask) ^ wsignmask;
- /* Ditto for the even elements */
- tmp2 = (a >> 8) & elementmask;
- tmp2 ^= nsignmask;
- tmp2 |= wsignmask;
- tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
- /* calculate the result by summing bits 0..14, 16..22, etc,
- * and then adjusting the sign bits 15, 23, etc manually.
- * This ensures the addition can't overflow the 16 bit field.
- */
- signres = (tmp1 ^ tmp2) & wsignmask;
- res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
- res ^= signres;
-
- return res;
-}
-
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x00ff00ff00ff00ffULL;
- tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
- return tmp;
-}
-
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
- int32_t reslo, reshi;
-
- reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
- reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
- return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x0000ffff0000ffffULL;
- tmp += (a >> 16) & 0x0000ffff0000ffffULL;
- return tmp;
-}
+#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _two; \
+ } \
+ return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
+ }
+
+DO_RECPS(recpsf_f16, uint32_t, float16, chs)
+DO_RECPS(recpsf_f32, float32, float32, chs)
+DO_RECPS(recpsf_f64, float64, float64, chs)
+DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
+DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
+
+#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _one_point_five; \
+ } \
+ return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
+ -1, 0, fpst); \
+ } \
+
+DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
+DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
+DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
+DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
-uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
+uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
{
- float_status *fpst = fpstp;
uint16_t val16, sbit;
int16_t exp;
@@ -401,9 +292,8 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
}
}
-float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
+float32 HELPER(frecpx_f32)(float32 a, float_status *fpst)
{
- float_status *fpst = fpstp;
uint32_t val32, sbit;
int32_t exp;
@@ -434,9 +324,8 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
}
}
-float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
+float64 HELPER(frecpx_f64)(float64 a, float_status *fpst)
{
- float_status *fpst = fpstp;
uint64_t val64, sbit;
int64_t exp;
@@ -467,28 +356,53 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
}
}
-float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
+float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst)
{
- /* Von Neumann rounding is implemented by using round-to-zero
- * and then setting the LSB of the result if Inexact was raised.
- */
float32 r;
- float_status *fpst = &env->vfp.fp_status;
- float_status tstat = *fpst;
- int exflags;
-
- set_float_rounding_mode(float_round_to_zero, &tstat);
- set_float_exception_flags(0, &tstat);
- r = float64_to_float32(a, &tstat);
- exflags = get_float_exception_flags(&tstat);
- if (exflags & float_flag_inexact) {
- r = make_float32(float32_val(r) | 1);
- }
- exflags |= get_float_exception_flags(fpst);
- set_float_exception_flags(exflags, fpst);
+ int old = get_float_rounding_mode(fpst);
+
+ set_float_rounding_mode(float_round_to_odd, fpst);
+ r = float64_to_float32(a, fpst);
+ set_float_rounding_mode(old, fpst);
return r;
}
+/*
+ * AH=1 min/max have some odd special cases:
+ * comparing two zeroes (regardless of sign), (NaN, anything),
+ * or (anything, NaN) should return the second argument (possibly
+ * squashed to zero).
+ * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16.
+ */
+#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ bool save; \
+ CTYPE r; \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \
+ return b; \
+ } \
+ if (FLOATTYPE ## _is_any_nan(a) || \
+ FLOATTYPE ## _is_any_nan(b)) { \
+ float_raise(float_flag_invalid, fpst); \
+ return b; \
+ } \
+ save = get_flush_to_zero(fpst); \
+ set_flush_to_zero(false, fpst); \
+ r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \
+ set_flush_to_zero(save, fpst); \
+ return r; \
+ }
+
+AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min)
+AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min)
+AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
+AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
+AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
+AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
+
/* 64-bit versions of the CRC helpers. Note that although the operation
* (and the prototypes of crc32c() and crc32() mean that only the bottom
* 32 bits of the accumulator and result are used, we pass and return
@@ -524,27 +438,17 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
#define ADVSIMD_HALFOP(name) \
-uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
+uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
return float16_ ## name(a, b, fpst); \
}
-ADVSIMD_HALFOP(add)
-ADVSIMD_HALFOP(sub)
-ADVSIMD_HALFOP(mul)
-ADVSIMD_HALFOP(div)
-ADVSIMD_HALFOP(min)
-ADVSIMD_HALFOP(max)
-ADVSIMD_HALFOP(minnum)
-ADVSIMD_HALFOP(maxnum)
-
#define ADVSIMD_TWOHALFOP(name) \
-uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \
+ float_status *fpst) \
{ \
float16 a1, a2, b1, b2; \
uint32_t r1, r2; \
- float_status *fpst = fpstp; \
a1 = extract32(two_a, 0, 16); \
a2 = extract32(two_a, 16, 16); \
b1 = extract32(two_b, 0, 16); \
@@ -564,10 +468,8 @@ ADVSIMD_TWOHALFOP(minnum)
ADVSIMD_TWOHALFOP(maxnum)
/* Data processing - scalar floating-point and advanced SIMD */
-static float16 float16_mulx(float16 a, float16 b, void *fpstp)
+static float16 float16_mulx(float16 a, float16 b, float_status *fpst)
{
- float_status *fpst = fpstp;
-
a = float16_squash_input_denormal(a, fpst);
b = float16_squash_input_denormal(b, fpst);
@@ -585,16 +487,14 @@ ADVSIMD_TWOHALFOP(mulx)
/* fused multiply-accumulate */
uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
- void *fpstp)
+ float_status *fpst)
{
- float_status *fpst = fpstp;
return float16_muladd(a, b, c, 0, fpst);
}
uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
- uint32_t two_c, void *fpstp)
+ uint32_t two_c, float_status *fpst)
{
- float_status *fpst = fpstp;
float16 a1, a2, b1, b2, c1, c2;
uint32_t r1, r2;
a1 = extract32(two_a, 0, 16);
@@ -616,31 +516,27 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
-uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
int compare = float16_compare_quiet(a, b, fpst);
return ADVSIMD_CMPRES(compare == float_relation_equal);
}
-uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
int compare = float16_compare(a, b, fpst);
return ADVSIMD_CMPRES(compare == float_relation_greater ||
compare == float_relation_equal);
}
-uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
int compare = float16_compare(a, b, fpst);
return ADVSIMD_CMPRES(compare == float_relation_greater);
}
-uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float16 f0 = float16_abs(a);
float16 f1 = float16_abs(b);
int compare = float16_compare(f0, f1, fpst);
@@ -648,9 +544,8 @@ uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
compare == float_relation_equal);
}
-uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float16 f0 = float16_abs(a);
float16 f1 = float16_abs(b);
int compare = float16_compare(f0, f1, fpst);
@@ -658,12 +553,12 @@ uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
}
/* round to integral */
-uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
+uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status)
{
return float16_round_to_int(x, fp_status);
}
-uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
+uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
float16 ret;
@@ -679,38 +574,6 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
return ret;
}
-/*
- * Half-precision floating point conversion functions
- *
- * There are a multitude of conversion functions with various
- * different rounding modes. This is dealt with by the calling code
- * setting the mode appropriately before calling the helper.
- */
-
-uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- /* Invalid if we are passed a NaN */
- if (float16_is_any_nan(a)) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int16(a, fpst);
-}
-
-uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- /* Invalid if we are passed a NaN */
- if (float16_is_any_nan(a)) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint16(a, fpst);
-}
-
static int el_from_spsr(uint32_t spsr)
{
/* Return the exception level that this SPSR is requesting a return to,
@@ -771,6 +634,7 @@ static void cpsr_write_from_spsr_elx(CPUARMState *env,
void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
{
+ ARMCPU *cpu = env_archcpu(env);
int cur_el = arm_current_el(env);
unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
uint32_t spsr = env->banked_spsr[spsr_idx];
@@ -817,12 +681,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
goto illegal_return;
}
+ if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) {
+ /* Return to AArch32 when CPU is AArch64-only */
+ goto illegal_return;
+ }
+
if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
goto illegal_return;
}
bql_lock();
- arm_call_pre_el_change_hook(env_archcpu(env));
+ arm_call_pre_el_change_hook(cpu);
bql_unlock();
if (!return_to_aa64) {
@@ -850,7 +719,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
int tbii;
env->aarch64 = true;
- spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
+ spsr &= aarch64_pstate_valid_mask(&cpu->isar);
pstate_write(env, spsr);
if (!arm_singlestep_active(env)) {
env->pstate &= ~PSTATE_SS;
@@ -889,7 +758,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
bql_lock();
- arm_call_el_change_hook(env_archcpu(env));
+ arm_call_el_change_hook(cpu);
bql_unlock();
return;
@@ -915,19 +784,10 @@ illegal_return:
"resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
}
-/*
- * Square Root and Reciprocal square root
- */
-
-uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
-{
- float_status *s = fpstp;
-
- return float16_sqrt(a, s);
-}
-
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{
+ uintptr_t ra = GETPC();
+
/*
* Implement DC ZVA, which zeroes a fixed-length block of memory.
* Note that we do not implement the (architecturally mandated)
@@ -948,8 +808,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
#ifndef CONFIG_USER_ONLY
if (unlikely(!mem)) {
- uintptr_t ra = GETPC();
-
/*
* Trap if accessing an invalid page. DC_ZVA requires that we supply
* the original pointer for an invalid page. But watchpoints require
@@ -971,7 +829,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
}
#endif
+ set_helper_retaddr(ra);
memset(mem, 0, blocklen);
+ clear_helper_retaddr();
}
void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
@@ -1120,7 +980,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
return setsize;
}
@@ -1163,7 +1025,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
return setsize;
}
@@ -1286,7 +1150,6 @@ static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
env->ZF = 1; /* our env->ZF encoding is inverted */
env->CF = 0;
env->VF = 0;
- return;
}
void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
@@ -1342,7 +1205,7 @@ static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
/* Do the actual memset: we leave the last partial page to SETE */
stagesetsize = setsize & TARGET_PAGE_MASK;
while (stagesetsize > 0) {
- step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
+ step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
toaddr += step;
setsize -= step;
stagesetsize -= step;
@@ -1497,7 +1360,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
}
#endif
/* Easy case: just memmove the host memory */
+ set_helper_retaddr(ra);
memmove(wmem, rmem, copysize);
+ clear_helper_retaddr();
return copysize;
}
@@ -1572,7 +1437,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
* Easy case: just memmove the host memory. Note that wmem and
* rmem here point to the *last* byte to copy.
*/
+ set_helper_retaddr(ra);
memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
+ clear_helper_retaddr();
return copysize;
}
@@ -1682,7 +1549,6 @@ static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
env->ZF = 1; /* our env->ZF encoding is inverted */
env->CF = 0;
env->VF = 0;
- return;
}
void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
@@ -1867,3 +1733,42 @@ void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
{
do_cpye(env, syndrome, wdesc, rdesc, false, GETPC());
}
+
+static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+ return page_get_flags(addr) & PAGE_BTI;
+#else
+ CPUTLBEntryFull *full;
+ void *host;
+ int mmu_idx = cpu_mmu_index(env_cpu(env), true);
+ int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
+ false, &host, &full, ra);
+
+ assert(!(flags & TLB_INVALID_MASK));
+ return full->extra.arm.guarded;
+#endif
+}
+
+void HELPER(guarded_page_check)(CPUARMState *env)
+{
+ /*
+ * We have already verified that bti is enabled, and that the
+ * instruction at PC is not ok for BTYPE. This is always at
+ * the beginning of a block, so PC is always up-to-date and
+ * no unwind is required.
+ */
+ if (is_guarded_page(env, env->pc, 0)) {
+ raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype),
+ exception_target_el(env));
+ }
+}
+
+void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc)
+{
+ /*
+ * We have already checked for branch via x16 and x17.
+ * What remains for choosing BTYPE is checking for a guarded page.
+ */
+ env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1;
+}
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index 371388f..8502346 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -23,64 +23,62 @@ DEF_HELPER_2(msr_i_spsel, void, env, i32)
DEF_HELPER_2(msr_i_daifset, void, env, i32)
DEF_HELPER_2(msr_i_daifclear, void, env, i32)
DEF_HELPER_1(msr_set_allint_el1, void, env)
-DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr)
-DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr)
-DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
-DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
-DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
-DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
-DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
-DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
-DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
-DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
-DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
-DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
-DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
-DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
-DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
+DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, fpst)
+DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, fpst)
+DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, fpst)
+DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, fpst)
+DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, fpst)
+DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, fpst)
+DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
+DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
+DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
+DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
+DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
+DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, fpst)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
-DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_FLAGS_3(advsimd_minnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_addh, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_subh, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_mulh, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_divh, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, ptr)
-DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, ptr)
-DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, ptr)
-DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, ptr)
-DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, ptr)
-DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr)
-DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr)
-DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr)
-DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
-DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
-DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr)
-DEF_HELPER_2(advsimd_rinth, f16, f16, ptr)
-DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr)
-DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
-DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, fpst)
+DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, fpst)
+DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, fpst)
+DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, fpst)
+DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, fpst)
+DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, fpst)
+DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, fpst)
+DEF_HELPER_3(advsimd_add2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_div2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_max2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_min2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, fpst)
+DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, fpst)
+DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst)
+DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
+DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
+
+DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst)
DEF_HELPER_2(exception_return, void, env, i64)
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
@@ -133,14 +131,17 @@ DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
-DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_1(guarded_page_check, TCG_CALL_NO_WG, void, env)
+DEF_HELPER_FLAGS_2(guarded_page_br, TCG_CALL_NO_RWG, void, env, tl)
+
+DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
index 27eef49..858d691 100644
--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -121,13 +121,13 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index cc4e1d8..0b1b588 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -541,10 +541,18 @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -959,433 +967,545 @@ DEF_HELPER_FLAGS_4(sve_umini_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(sve_umini_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_5(gvec_recps_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_recps_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_recps_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_rsqrts_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_faddv_d, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxnmv_h, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxnmv_s, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxnmv_d, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminnmv_h, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminnmv_s, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminnmv_d, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxv_h, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxv_s, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fmaxv_d, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminv_h, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG,
- i64, ptr, ptr, ptr, i32)
+ i64, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG,
+ i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
- i64, i64, ptr, ptr, ptr, i32)
+ i64, i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
- i64, i64, ptr, ptr, ptr, i32)
+ i64, i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fadda_d, TCG_CALL_NO_RWG,
- i64, i64, ptr, ptr, ptr, i32)
+ i64, i64, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmge0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmge0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmge0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmgt0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmgt0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmgt0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmlt0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmlt0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmlt0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmle0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmle0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmle0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmeq0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmeq0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmeq0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmne0_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmul_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fdiv_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnum_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fabd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fscalbn_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmulx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmulx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmulx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadds_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadds_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fadds_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubs_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubs_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubs_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmuls_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmuls_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmuls_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubrs_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubrs_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fsubrs_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnms_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnms_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxnms_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnms_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnms_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fminnms_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxs_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxs_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmaxs_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmins_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, i64, ptr, i32)
+ void, ptr, ptr, ptr, i64, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
+DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i64, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_hs, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_hs, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_ss, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_hd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_dd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_hh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_hs, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_ss, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_hd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzu_dd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frint_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frint_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frint_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frintx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frintx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frintx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frecpx_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frecpx_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_frecpx_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fsqrt_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fsqrt_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_fsqrt_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_hh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_sh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_dh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_ss, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_scvt_dd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_hh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_sh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_dh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_ss, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_ucvt_dd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmge_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmge_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmge_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmgt_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmgt_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmgt_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmeq_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmeq_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmeq_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmne_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmne_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmne_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmuo_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmuo_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcmuo_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facge_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facge_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facge_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facgt_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facgt_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_facgt_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcadd_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve2_saddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_saddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -2582,39 +2702,39 @@ DEF_HELPER_FLAGS_4(sve2_xar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_xar_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_h, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -2682,8 +2802,8 @@ DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
@@ -2755,20 +2875,20 @@ DEF_HELPER_FLAGS_5(sve2_cdot_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sve2_fcvtlt_sd, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+ void, ptr, ptr, ptr, fpst, i32)
-DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h
new file mode 100644
index 0000000..80db7c2
--- /dev/null
+++ b/target/arm/tcg/helper.h
@@ -0,0 +1,1153 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
+
+DEF_HELPER_3(add_setq, i32, env, i32, i32)
+DEF_HELPER_3(add_saturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
+DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32)
+DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32)
+
+#define PAS_OP(pfx) \
+ DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
+ DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \
+ DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \
+ DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \
+ DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \
+ DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr)
+
+PAS_OP(s)
+PAS_OP(u)
+#undef PAS_OP
+
+#define PAS_OP(pfx) \
+ DEF_HELPER_2(pfx ## add8, i32, i32, i32) \
+ DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \
+ DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \
+ DEF_HELPER_2(pfx ## add16, i32, i32, i32) \
+ DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \
+ DEF_HELPER_2(pfx ## subaddx, i32, i32, i32)
+PAS_OP(q)
+PAS_OP(sh)
+PAS_OP(uq)
+PAS_OP(uh)
+#undef PAS_OP
+
+DEF_HELPER_3(ssat, i32, env, i32, i32)
+DEF_HELPER_3(usat, i32, env, i32, i32)
+DEF_HELPER_3(ssat16, i32, env, i32, i32)
+DEF_HELPER_3(usat16, i32, env, i32, i32)
+
+DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+
+DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
+ i32, i32, i32, i32)
+DEF_HELPER_2(exception_internal, noreturn, env, i32)
+DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32)
+DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32)
+DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32)
+DEF_HELPER_2(exception_swstep, noreturn, env, i32)
+DEF_HELPER_2(exception_pc_alignment, noreturn, env, vaddr)
+DEF_HELPER_1(setend, void, env)
+DEF_HELPER_2(wfi, void, env, i32)
+DEF_HELPER_1(wfe, void, env)
+DEF_HELPER_2(wfit, void, env, i64)
+DEF_HELPER_1(yield, void, env)
+DEF_HELPER_1(pre_hvc, void, env)
+DEF_HELPER_2(pre_smc, void, env, i32)
+DEF_HELPER_1(vesb, void, env)
+
+DEF_HELPER_3(cpsr_write, void, env, i32, i32)
+DEF_HELPER_2(cpsr_write_eret, void, env, i32)
+DEF_HELPER_1(cpsr_read, i32, env)
+
+DEF_HELPER_3(v7m_msr, void, env, i32, i32)
+DEF_HELPER_2(v7m_mrs, i32, env, i32)
+
+DEF_HELPER_2(v7m_bxns, void, env, i32)
+DEF_HELPER_2(v7m_blxns, void, env, i32)
+
+DEF_HELPER_3(v7m_tt, i32, env, i32, i32)
+
+DEF_HELPER_1(v7m_preserve_fp_state, void, env)
+
+DEF_HELPER_2(v7m_vlstm, void, env, i32)
+DEF_HELPER_2(v7m_vlldm, void, env, i32)
+
+DEF_HELPER_2(v8m_stackcheck, void, env, i32)
+
+DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32)
+
+DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32)
+DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_3(set_cp_reg, void, env, cptr, i32)
+DEF_HELPER_2(get_cp_reg, i32, env, cptr)
+DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64)
+DEF_HELPER_2(get_cp_reg64, i64, env, cptr)
+
+DEF_HELPER_2(get_r13_banked, i32, env, i32)
+DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
+
+DEF_HELPER_3(mrs_banked, i32, env, i32, i32)
+DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
+
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int)
+DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
+
+DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, vaddr, i32, i32, i32)
+
+DEF_HELPER_1(vfp_get_fpscr, i32, env)
+DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
+
+DEF_HELPER_3(vfp_addh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_adds, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_addd, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_subh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_subs, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_subd, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_mulh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_muls, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_muld, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_divh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_divs, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_divd, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_maxh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_maxs, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_maxd, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_minh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_mins, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_mind, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_maxnums, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_minnumh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_minnums, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_minnumd, f64, f64, f64, fpst)
+DEF_HELPER_2(vfp_sqrth, f16, f16, fpst)
+DEF_HELPER_2(vfp_sqrts, f32, f32, fpst)
+DEF_HELPER_2(vfp_sqrtd, f64, f64, fpst)
+DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
+DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
+DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
+DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env)
+DEF_HELPER_3(vfp_cmpes, void, f32, f32, env)
+DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
+
+DEF_HELPER_2(vfp_fcvtds, f64, f32, fpst)
+DEF_HELPER_2(vfp_fcvtsd, f32, f64, fpst)
+DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, fpst)
+DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, fpst)
+
+DEF_HELPER_2(vfp_uitoh, f16, i32, fpst)
+DEF_HELPER_2(vfp_uitos, f32, i32, fpst)
+DEF_HELPER_2(vfp_uitod, f64, i32, fpst)
+DEF_HELPER_2(vfp_sitoh, f16, i32, fpst)
+DEF_HELPER_2(vfp_sitos, f32, i32, fpst)
+DEF_HELPER_2(vfp_sitod, f64, i32, fpst)
+
+DEF_HELPER_2(vfp_touih, i32, f16, fpst)
+DEF_HELPER_2(vfp_touis, i32, f32, fpst)
+DEF_HELPER_2(vfp_touid, i32, f64, fpst)
+DEF_HELPER_2(vfp_touizh, i32, f16, fpst)
+DEF_HELPER_2(vfp_touizs, i32, f32, fpst)
+DEF_HELPER_2(vfp_touizd, i32, f64, fpst)
+DEF_HELPER_2(vfp_tosih, s32, f16, fpst)
+DEF_HELPER_2(vfp_tosis, s32, f32, fpst)
+DEF_HELPER_2(vfp_tosid, s32, f64, fpst)
+DEF_HELPER_2(vfp_tosizh, s32, f16, fpst)
+DEF_HELPER_2(vfp_tosizs, s32, f32, fpst)
+DEF_HELPER_2(vfp_tosizd, s32, f64, fpst)
+
+DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_touhh, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toshh, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toulh, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_toslh, i32, f16, i32, fpst)
+DEF_HELPER_3(vfp_touqh, i64, f16, i32, fpst)
+DEF_HELPER_3(vfp_tosqh, i64, f16, i32, fpst)
+DEF_HELPER_3(vfp_toshs, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_tosls, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_tosqs, i64, f32, i32, fpst)
+DEF_HELPER_3(vfp_touhs, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_touls, i32, f32, i32, fpst)
+DEF_HELPER_3(vfp_touqs, i64, f32, i32, fpst)
+DEF_HELPER_3(vfp_toshd, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tosld, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tosqd, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_touhd, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_tould, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_touqd, i64, f64, i32, fpst)
+DEF_HELPER_3(vfp_shtos, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_sltos, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_sqtos, f32, i64, i32, fpst)
+DEF_HELPER_3(vfp_uhtos, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_ultos, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_uqtos, f32, i64, i32, fpst)
+DEF_HELPER_3(vfp_shtod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_sltod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_sqtod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_uhtod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_ultod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_uqtod, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_shtoh, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_sltoh, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_ultoh, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, fpst)
+DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, fpst)
+
+DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, fpst)
+DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, fpst)
+DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, fpst)
+DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, fpst)
+
+DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, fpst)
+
+DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, fpst, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, fpst, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, f16, fpst, i32)
+DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, fpst, i32)
+
+DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, fpst)
+DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, fpst)
+DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst)
+
+DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
+DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
+DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
+DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64)
+
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
+
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, fpst)
+DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, fpst)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, fpst)
+DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, fpst)
+
+DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env)
+DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, fpst)
+
+DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32)
+
+/* neon_helper.c */
+DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
+DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
+DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
+DEF_HELPER_2(neon_pmin_s16, i32, i32, i32)
+DEF_HELPER_2(neon_pmax_u8, i32, i32, i32)
+DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
+DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
+DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
+
+DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
+DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_s16, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_u32, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_s32, i32, i32, i32)
+DEF_HELPER_2(neon_rshl_u64, i64, i64, i64)
+DEF_HELPER_2(neon_rshl_s64, i64, i64, i64)
+DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
+DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
+DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_2(neon_add_u8, i32, i32, i32)
+DEF_HELPER_2(neon_add_u16, i32, i32, i32)
+DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
+DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
+DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
+DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
+
+DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
+DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
+DEF_HELPER_2(neon_tst_u32, i32, i32, i32)
+
+DEF_HELPER_1(neon_clz_u8, i32, i32)
+DEF_HELPER_1(neon_clz_u16, i32, i32)
+DEF_HELPER_1(neon_cls_s8, i32, i32)
+DEF_HELPER_1(neon_cls_s16, i32, i32)
+DEF_HELPER_1(neon_cls_s32, i32, i32)
+DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
+DEF_HELPER_4(neon_qrdmlah_s16, i32, env, i32, i32, i32)
+DEF_HELPER_4(neon_qrdmlsh_s16, i32, env, i32, i32, i32)
+DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32)
+DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
+DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32)
+DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32)
+
+DEF_HELPER_1(neon_narrow_u8, i64, i64)
+DEF_HELPER_1(neon_narrow_u16, i64, i64)
+DEF_HELPER_2(neon_unarrow_sat8, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u8, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s8, i64, env, i64)
+DEF_HELPER_2(neon_unarrow_sat16, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u16, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s16, i64, env, i64)
+DEF_HELPER_2(neon_unarrow_sat32, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u32, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s32, i64, env, i64)
+DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
+DEF_HELPER_1(neon_narrow_high_u16, i32, i64)
+DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64)
+DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64)
+DEF_HELPER_1(neon_widen_u8, i64, i32)
+DEF_HELPER_1(neon_widen_s8, i64, i32)
+DEF_HELPER_1(neon_widen_u16, i64, i32)
+DEF_HELPER_1(neon_widen_s16, i64, i32)
+
+DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
+DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
+DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
+DEF_HELPER_2(neon_abdl_s16, i64, i32, i32)
+DEF_HELPER_2(neon_abdl_u32, i64, i32, i32)
+DEF_HELPER_2(neon_abdl_s32, i64, i32, i32)
+DEF_HELPER_2(neon_abdl_u64, i64, i32, i32)
+DEF_HELPER_2(neon_abdl_s64, i64, i32, i32)
+DEF_HELPER_2(neon_mull_u8, i64, i32, i32)
+DEF_HELPER_2(neon_mull_s8, i64, i32, i32)
+DEF_HELPER_2(neon_mull_u16, i64, i32, i32)
+DEF_HELPER_2(neon_mull_s16, i64, i32, i32)
+
+DEF_HELPER_1(neon_negl_u16, i64, i64)
+DEF_HELPER_1(neon_negl_u32, i64, i64)
+
+DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, fpst)
+DEF_HELPER_3(neon_cge_f32, i32, i32, i32, fpst)
+DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, fpst)
+DEF_HELPER_3(neon_acge_f32, i32, i32, i32, fpst)
+DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst)
+DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst)
+DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst)
+
+/* iwmmxt_helper.c */
+DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64)
+DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64)
+
+#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \
+DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \
+DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \
+DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \
+
+DEF_IWMMXT_HELPER_SIZE_ENV(unpackl)
+DEF_IWMMXT_HELPER_SIZE_ENV(unpackh)
+
+DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64)
+DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64)
+
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq)
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu)
+DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts)
+
+DEF_IWMMXT_HELPER_SIZE_ENV(mins)
+DEF_IWMMXT_HELPER_SIZE_ENV(minu)
+DEF_IWMMXT_HELPER_SIZE_ENV(maxs)
+DEF_IWMMXT_HELPER_SIZE_ENV(maxu)
+
+DEF_IWMMXT_HELPER_SIZE_ENV(subn)
+DEF_IWMMXT_HELPER_SIZE_ENV(addn)
+DEF_IWMMXT_HELPER_SIZE_ENV(subu)
+DEF_IWMMXT_HELPER_SIZE_ENV(addu)
+DEF_IWMMXT_HELPER_SIZE_ENV(subs)
+DEF_IWMMXT_HELPER_SIZE_ENV(adds)
+
+DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
+
+DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
+DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
+
+DEF_HELPER_1(iwmmxt_bcstb, i64, i32)
+DEF_HELPER_1(iwmmxt_bcstw, i64, i32)
+DEF_HELPER_1(iwmmxt_bcstl, i64, i32)
+
+DEF_HELPER_1(iwmmxt_addcb, i64, i64)
+DEF_HELPER_1(iwmmxt_addcw, i64, i64)
+DEF_HELPER_1(iwmmxt_addcl, i64, i64)
+
+DEF_HELPER_1(iwmmxt_msbb, i32, i64)
+DEF_HELPER_1(iwmmxt_msbw, i32, i64)
+DEF_HELPER_1(iwmmxt_msbl, i32, i64)
+
+DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32)
+DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32)
+
+DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64)
+DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64)
+
+DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32)
+DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32)
+DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32)
+
+DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qunzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qunzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_zip8, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_zip16, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+
+DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, fpst)
+DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, fpst)
+DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, fpst)
+
+DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index f03977b..1ccec63 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -9,9 +9,13 @@
#include "cpu.h"
#include "internals.h"
#include "cpu-features.h"
-#include "exec/helper-proto.h"
+#include "exec/translation-block.h"
+#include "accel/tcg/cpu-ops.h"
#include "cpregs.h"
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
+
static inline bool fgt_svc(CPUARMState *env, int el)
{
/*
@@ -63,6 +67,15 @@ static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
#endif
}
+bool access_secure_reg(CPUARMState *env)
+{
+ bool ret = (arm_feature(env, ARM_FEATURE_EL3) &&
+ !arm_el_is_aa64(env, 3) &&
+ !(env->cp15.scr_el3 & SCR_NS));
+
+ return ret;
+}
+
static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
ARMMMUIdx mmu_idx,
CPUARMTBFlags flags)
@@ -404,6 +417,19 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
}
+ if (env->vfp.fpcr & FPCR_AH) {
+ DP_TBFLAG_A64(flags, AH, 1);
+ }
+ if (env->vfp.fpcr & FPCR_NEP) {
+ /*
+ * In streaming-SVE without FA64, NEP behaves as if zero;
+ * compare pseudocode IsMerging()
+ */
+ if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) {
+ DP_TBFLAG_A64(flags, NEP, 1);
+ }
+ }
+
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
}
@@ -476,7 +502,7 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx);
}
-void assert_hflags_rebuild_correctly(CPUARMState *env)
+static void assert_hflags_rebuild_correctly(CPUARMState *env)
{
#ifdef CONFIG_DEBUG_TCG
CPUARMTBFlags c = env->hflags;
@@ -484,10 +510,123 @@ void assert_hflags_rebuild_correctly(CPUARMState *env)
if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) {
fprintf(stderr, "TCG hflags mismatch "
- "(current:(0x%08x,0x" TARGET_FMT_lx ")"
- " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n",
+ "(current:(0x%08x,0x%016" PRIx64 ")"
+ " rebuilt:(0x%08x,0x%016" PRIx64 ")\n",
c.flags, c.flags2, r.flags, r.flags2);
abort();
}
#endif
}
+
+static bool mve_no_pred(CPUARMState *env)
+{
+ /*
+ * Return true if there is definitely no predication of MVE
+ * instructions by VPR or LTPSIZE. (Returning false even if there
+ * isn't any predication is OK; generated code will just be
+ * a little worse.)
+ * If the CPU does not implement MVE then this TB flag is always 0.
+ *
+ * NOTE: if you change this logic, the "recalculate s->mve_no_pred"
+ * logic in gen_update_fp_context() needs to be updated to match.
+ *
+ * We do not include the effect of the ECI bits here -- they are
+ * tracked in other TB flags. This simplifies the logic for
+ * "when did we emit code that changes the MVE_NO_PRED TB flag
+ * and thus need to end the TB?".
+ */
+ if (cpu_isar_feature(aa32_mve, env_archcpu(env))) {
+ return false;
+ }
+ if (env->v7m.vpr) {
+ return false;
+ }
+ if (env->v7m.ltpsize < 4) {
+ return false;
+ }
+ return true;
+}
+
+TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs)
+{
+ CPUARMState *env = cpu_env(cs);
+ CPUARMTBFlags flags;
+ vaddr pc;
+
+ assert_hflags_rebuild_correctly(env);
+ flags = env->hflags;
+
+ if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) {
+ pc = env->pc;
+ if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
+ DP_TBFLAG_A64(flags, BTYPE, env->btype);
+ }
+ } else {
+ pc = env->regs[15];
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+ FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
+ != env->v7m.secure) {
+ DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1);
+ }
+
+ if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
+ (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
+ (env->v7m.secure &&
+ !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+ /*
+ * ASPEN is set, but FPCA/SFPA indicate that there is no
+ * active FP context; we must create a new FP context before
+ * executing any FP insn.
+ */
+ DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1);
+ }
+
+ bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+ DP_TBFLAG_M32(flags, LSPACT, 1);
+ }
+
+ if (mve_no_pred(env)) {
+ DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
+ }
+ } else {
+ /*
+ * Note that XSCALE_CPAR shares bits with VECSTRIDE.
+ * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
+ */
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar);
+ } else {
+ DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
+ DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
+ }
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+ DP_TBFLAG_A32(flags, VFPEN, 1);
+ }
+ }
+
+ DP_TBFLAG_AM32(flags, THUMB, env->thumb);
+ DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits);
+ }
+
+ /*
+ * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+ * states defined in the ARM ARM for software singlestep:
+ * SS_ACTIVE PSTATE.SS State
+ * 0 x Inactive (the TB flag for SS is always 0)
+ * 1 0 Active-pending
+ * 1 1 Active-not-pending
+ * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB.
+ */
+ if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) {
+ DP_TBFLAG_ANY(flags, PSTATE__SS, 1);
+ }
+
+ return (TCGTBCPUState){
+ .pc = pc,
+ .flags = flags.flags,
+ .cs_base = flags.flags2,
+ };
+}
diff --git a/target/arm/tcg/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c
index 610b1b2..ba054b6 100644
--- a/target/arm/tcg/iwmmxt_helper.c
+++ b/target/arm/tcg/iwmmxt_helper.c
@@ -22,7 +22,9 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/helper-proto.h"
+
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
/* iwMMXt macros extracted from GNU gdb. */
diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
index 23d7f73..6614719 100644
--- a/target/arm/tcg/m_helper.c
+++ b/target/arm/tcg/m_helper.c
@@ -15,10 +15,9 @@
#include "qemu/main-loop.h"
#include "qemu/bitops.h"
#include "qemu/log.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
#ifdef CONFIG_TCG
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "semihosting/common-semi.h"
#endif
#if !defined(CONFIG_USER_ONLY)
@@ -222,7 +221,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
int exc;
bool exc_secure;
- if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &res, &fi)) {
+ if (get_phys_addr(env, addr, MMU_DATA_STORE, 0, mmu_idx, &res, &fi)) {
/* MPU/SAU lookup failed */
if (fi.type == ARMFault_QEMU_SFault) {
if (mode == STACK_LAZYFP) {
@@ -311,7 +310,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
bool exc_secure;
uint32_t value;
- if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) {
+ if (get_phys_addr(env, addr, MMU_DATA_LOAD, 0, mmu_idx, &res, &fi)) {
/* MPU/SAU lookup failed */
if (fi.type == ARMFault_QEMU_SFault) {
qemu_log_mask(CPU_LOG_INT,
@@ -2009,7 +2008,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool secure,
"...really SecureFault with SFSR.INVEP\n");
return false;
}
- if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &res, &fi)) {
+ if (get_phys_addr(env, addr, MMU_INST_FETCH, 0, mmu_idx, &res, &fi)) {
/* the MPU lookup failed */
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure);
@@ -2045,7 +2044,7 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx,
ARMMMUFaultInfo fi = {};
uint32_t value;
- if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) {
+ if (get_phys_addr(env, addr, MMU_DATA_LOAD, 0, mmu_idx, &res, &fi)) {
/* MPU/SAU lookup failed */
if (fi.type == ARMFault_QEMU_SFault) {
qemu_log_mask(CPU_LOG_INT,
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 508932a..c59f0f0 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -30,14 +30,9 @@ arm_ss.add(files(
'translate-mve.c',
'translate-neon.c',
'translate-vfp.c',
- 'crypto_helper.c',
- 'hflags.c',
- 'iwmmxt_helper.c',
'm_helper.c',
'mve_helper.c',
- 'neon_helper.c',
'op_helper.c',
- 'tlb_helper.c',
'vec_helper.c',
))
@@ -60,3 +55,26 @@ arm_system_ss.add(files(
arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c'))
+
+arm_common_ss.add(zlib)
+
+arm_common_ss.add(files(
+ 'arith_helper.c',
+ 'crypto_helper.c',
+))
+
+arm_common_system_ss.add(files(
+ 'hflags.c',
+ 'iwmmxt_helper.c',
+ 'neon_helper.c',
+ 'tlb_helper.c',
+ 'tlb-insns.c',
+ 'vfp_helper.c',
+))
+arm_user_ss.add(files(
+ 'hflags.c',
+ 'iwmmxt_helper.c',
+ 'neon_helper.c',
+ 'tlb_helper.c',
+ 'vfp_helper.c',
+))
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index 037ac6d..0efc18a 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -21,15 +21,21 @@
#include "qemu/log.h"
#include "cpu.h"
#include "internals.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
-#include "exec/ram_addr.h"
-#include "exec/cpu_ldst.h"
+#ifdef CONFIG_USER_ONLY
+#include "user/cpu_loop.h"
+#include "user/page-protection.h"
+#else
+#include "system/ram_addr.h"
+#endif
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "exec/helper-proto.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "exec/tlb-flags.h"
+#include "accel/tcg/cpu-ops.h"
#include "qapi/error.h"
#include "qemu/guest-random.h"
-
+#include "mte_helper.h"
static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
{
@@ -50,44 +56,13 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
return tag;
}
-/**
- * allocation_tag_mem_probe:
- * @env: the cpu environment
- * @ptr_mmu_idx: the addressing regime to use for the virtual address
- * @ptr: the virtual address for which to look up tag memory
- * @ptr_access: the access to use for the virtual address
- * @ptr_size: the number of bytes in the normal memory access
- * @tag_access: the access to use for the tag memory
- * @probe: true to merely probe, never taking an exception
- * @ra: the return address for exception handling
- *
- * Our tag memory is formatted as a sequence of little-endian nibbles.
- * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
- * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
- * for the higher addr.
- *
- * Here, resolve the physical address from the virtual address, and return
- * a pointer to the corresponding tag byte.
- *
- * If there is no tag storage corresponding to @ptr, return NULL.
- *
- * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
- * three options:
- * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
- * accessible, and do not take watchpoint traps. The calling code must
- * handle those cases in the right priority compared to MTE traps.
- * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
- * that the page is going to be accessible. We will take watchpoint traps.
- * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
- * traps and watchpoint traps.
- * (probe = true, ra != 0 is invalid and will assert.)
- */
-static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
- uint64_t ptr, MMUAccessType ptr_access,
- int ptr_size, MMUAccessType tag_access,
- bool probe, uintptr_t ra)
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+ uint64_t ptr, MMUAccessType ptr_access,
+ int ptr_size, MMUAccessType tag_access,
+ bool probe, uintptr_t ra)
{
#ifdef CONFIG_USER_ONLY
+ const size_t page_data_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1);
uint64_t clean_ptr = useronly_clean_ptr(ptr);
int flags = page_get_flags(clean_ptr);
uint8_t *tags;
@@ -96,6 +71,9 @@ static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
assert(!(probe && ra));
if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
+ if (probe) {
+ return NULL;
+ }
cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
!(flags & PAGE_VALID), ra);
}
@@ -105,7 +83,7 @@ static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
return NULL;
}
- tags = page_get_target_data(clean_ptr);
+ tags = page_get_target_data(clean_ptr, page_data_size);
index = extract32(ptr, LOG2_TAG_GRANULE + 1,
TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1);
@@ -284,7 +262,7 @@ uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
return address_with_allocation_tag(ptr + offset, rtag);
}
-static int load_tag1(uint64_t ptr, uint8_t *mem)
+int load_tag1(uint64_t ptr, uint8_t *mem)
{
int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
return extract32(*mem, ofs, 4);
@@ -318,7 +296,7 @@ static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
}
/* For use in a non-parallel context, store to the given nibble. */
-static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
{
int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
*mem = deposit32(*mem, ofs, 4, tag);
diff --git a/target/arm/tcg/mte_helper.h b/target/arm/tcg/mte_helper.h
new file mode 100644
index 0000000..1f471fb
--- /dev/null
+++ b/target/arm/tcg/mte_helper.h
@@ -0,0 +1,66 @@
+/*
+ * ARM MemTag operation helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TARGET_ARM_MTE_H
+#define TARGET_ARM_MTE_H
+
+#include "exec/mmu-access-type.h"
+
+/**
+ * allocation_tag_mem_probe:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @probe: true to merely probe, never taking an exception
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ *
+ * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
+ * three options:
+ * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
+ * accessible, and do not take watchpoint traps. The calling code must
+ * handle those cases in the right priority compared to MTE traps.
+ * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
+ * that the page is going to be accessible. We will take watchpoint traps.
+ * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
+ * traps and watchpoint traps.
+ * (probe = true, ra != 0 is invalid and will assert.)
+ */
+uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+ uint64_t ptr, MMUAccessType ptr_access,
+ int ptr_size, MMUAccessType tag_access,
+ bool probe, uintptr_t ra);
+
+/**
+ * load_tag1 - Load 1 tag (nibble) from byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ */
+int load_tag1(uint64_t ptr, uint8_t *mem);
+
+/**
+ * store_tag1 - Store 1 tag (nibble) into byte
+ * @ptr: The tagged address
+ * @mem: The tag address (packed, 2 tags in byte)
+ * @tag: The tag to be stored in the nibble
+ */
+void store_tag1(uint64_t ptr, uint8_t *mem, int tag);
+
+#endif /* TARGET_ARM_MTE_H */
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 8b99736..506d1c3 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -22,8 +22,7 @@
#include "internals.h"
#include "vec_internal.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/tcg.h"
#include "fpu/softfloat.h"
#include "crypto/clmul.h"
@@ -1115,21 +1114,21 @@ static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,
if (update_flags) {
/* Store C, clear NZV. */
- env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
- env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
+ env->vfp.fpsr &= ~FPSR_NZCV_MASK;
+ env->vfp.fpsr |= carry_in * FPSR_C;
}
mve_advance_vpt(env);
}
void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
{
- bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ bool carry_in = env->vfp.fpsr & FPSR_C;
do_vadc(env, vd, vn, vm, 0, carry_in, false);
}
void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
{
- bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ bool carry_in = env->vfp.fpsr & FPSR_C;
do_vadc(env, vd, vn, vm, -1, carry_in, false);
}
@@ -2814,8 +2813,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2888,8 +2886,7 @@ DO_2OP_FP_ALL(vminnma, minnuma)
r[e] = 0; \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(tm & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2926,8 +2923,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2964,8 +2960,7 @@ DO_VFMA(vfmss, 4, float32, true)
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
continue; \
} \
- fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
fpst1 = fpst0; \
if (!(mask & 1)) { \
scratch_fpst = *fpst0; \
@@ -3049,8 +3044,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3084,8 +3078,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3116,9 +3109,8 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
unsigned e; \
TYPE *m = vm; \
TYPE ra = (TYPE)ra_in; \
- float_status *fpst = (ESIZE == 2) ? \
- &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ float_status *fpst = \
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
if (mask & 1) { \
TYPE v = m[H##ESIZE(e)]; \
@@ -3168,8 +3160,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3202,8 +3193,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3267,8 +3257,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3300,9 +3289,8 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
unsigned e; \
float_status *fpst; \
float_status scratch_fpst; \
- float_status *base_fpst = (ESIZE == 2) ? \
- &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ float_status *base_fpst = \
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
set_float_rounding_mode(rmode, base_fpst); \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
@@ -3343,11 +3331,11 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
uint32_t *m = vm;
uint16_t r;
uint16_t mask = mve_element_mask(env);
- bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ bool ieee = !(env->vfp.fpcr & FPCR_AHP);
unsigned e;
float_status *fpst;
float_status scratch_fpst;
- float_status *base_fpst = &env->vfp.standard_fp_status;
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
bool old_fz = get_flush_to_zero(base_fpst);
set_flush_to_zero(false, base_fpst);
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
@@ -3373,11 +3361,11 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
uint16_t *m = vm;
uint32_t r;
uint16_t mask = mve_element_mask(env);
- bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ bool ieee = !(env->vfp.fpcr & FPCR_AHP);
unsigned e;
float_status *fpst;
float_status scratch_fpst;
- float_status *base_fpst = &env->vfp.standard_fp_status;
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
set_flush_inputs_to_zero(false, base_fpst);
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
@@ -3427,8 +3415,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
- &env->vfp.standard_fp_status; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode
index 788578c..e883c6a 100644
--- a/target/arm/tcg/neon-dp.decode
+++ b/target/arm/tcg/neon-dp.decode
@@ -291,17 +291,17 @@ VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
-VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
+VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b
-VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
-VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index 082bfd8..2cc8241 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -9,11 +9,13 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "fpu/softfloat.h"
#include "vec_internal.h"
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
+
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
@@ -130,17 +132,28 @@ void HELPER(name)(void *vd, void *vn, void *vm, uint32_t desc) \
}
#define NEON_GVEC_VOP2_ENV(name, vtype) \
-void HELPER(name)(void *vd, void *vn, void *vm, void *venv, uint32_t desc) \
+void HELPER(name)(void *vd, void *vn, void *vm, CPUARMState *env, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc); \
vtype *d = vd, *n = vn, *m = vm; \
- CPUARMState *env = venv; \
for (i = 0; i < opr_sz / sizeof(vtype); i++) { \
NEON_FN(d[i], n[i], m[i]); \
} \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
+#define NEON_GVEC_VOP2i_ENV(name, vtype) \
+void HELPER(name)(void *vd, void *vn, CPUARMState *env, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int imm = simd_data(desc); \
+ vtype *d = vd, *n = vn; \
+ for (i = 0; i < opr_sz / sizeof(vtype); i++) { \
+ NEON_FN(d[i], n[i], imm); \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
/* Pairwise operations. */
/* For 32-bit elements each segment only contains a single element, so
the elementwise and pairwise operations are the same. */
@@ -271,22 +284,26 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift)
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_b, uint8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_h, uint16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_s, uint32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_d, uint64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -303,22 +320,26 @@ uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_h, int16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_s, int32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_d, int64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -334,11 +355,13 @@ uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_h, int16_t)
#undef NEON_FN
uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -352,6 +375,16 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
}
#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_s, int32_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_d, int64_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t)
@@ -492,27 +525,6 @@ uint32_t HELPER(neon_cls_s32)(uint32_t x)
return count - 1;
}
-/* Bit count. */
-uint32_t HELPER(neon_cnt_u8)(uint32_t x)
-{
- x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f);
- return x;
-}
-
-/* Reverse bits in each 8 bit word */
-uint32_t HELPER(neon_rbit_u8)(uint32_t x)
-{
- x = ((x & 0xf0f0f0f0) >> 4)
- | ((x & 0x0f0f0f0f) << 4);
- x = ((x & 0x88888888) >> 3)
- | ((x & 0x44444444) >> 1)
- | ((x & 0x22222222) << 1)
- | ((x & 0x11111111) << 3);
- return x;
-}
-
#define NEON_QDMULH16(dest, src1, src2, round) do { \
uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
@@ -565,13 +577,15 @@ NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
#undef NEON_FN
#undef NEON_QDMULH32
-uint32_t HELPER(neon_narrow_u8)(uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_u8)(uint64_t x)
{
return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
| ((x >> 24) & 0xff000000u);
}
-uint32_t HELPER(neon_narrow_u16)(uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_u16)(uint64_t x)
{
return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
}
@@ -602,7 +616,8 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
}
-uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
{
uint16_t s;
uint8_t d;
@@ -629,7 +644,8 @@ uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
return res;
}
-uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
{
uint16_t s;
uint8_t d;
@@ -652,7 +668,8 @@ uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
return res;
}
-uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
{
int16_t s;
uint8_t d;
@@ -675,7 +692,8 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
return res;
}
-uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
{
uint32_t high;
uint32_t low;
@@ -695,10 +713,11 @@ uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
high = 0xffff;
SET_QC();
}
- return low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
{
uint32_t high;
uint32_t low;
@@ -712,10 +731,11 @@ uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
high = 0xffff;
SET_QC();
}
- return low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
{
int32_t low;
int32_t high;
@@ -729,10 +749,11 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
high = (high >> 31) ^ 0x7fff;
SET_QC();
}
- return (uint16_t)low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
{
if (x & 0x8000000000000000ull) {
SET_QC();
@@ -745,7 +766,8 @@ uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
return x;
}
-uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
{
if (x > 0xffffffffu) {
SET_QC();
@@ -754,13 +776,14 @@ uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
return x;
}
-uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
{
if ((int64_t)x != (int32_t)x) {
SET_QC();
- return ((int64_t)x >> 63) ^ 0x7fffffff;
+ return (uint32_t)((int64_t)x >> 63) ^ 0x7fffffff;
}
- return x;
+ return (uint32_t)x;
}
uint64_t HELPER(neon_widen_u8)(uint32_t x)
@@ -803,62 +826,47 @@ uint64_t HELPER(neon_widen_s16)(uint32_t x)
return ((uint32_t)(int16_t)x) | (high << 32);
}
-uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
-{
- uint64_t mask;
- mask = (a ^ b) & 0x8000800080008000ull;
- a &= ~0x8000800080008000ull;
- b &= ~0x8000800080008000ull;
- return (a + b) ^ mask;
-}
-
-uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
-{
- uint64_t mask;
- mask = (a ^ b) & 0x8000000080000000ull;
- a &= ~0x8000000080000000ull;
- b &= ~0x8000000080000000ull;
- return (a + b) ^ mask;
-}
-
-uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
-{
- uint64_t tmp;
- uint64_t tmp2;
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
- tmp = a & 0x0000ffff0000ffffull;
- tmp += (a >> 16) & 0x0000ffff0000ffffull;
- tmp2 = b & 0xffff0000ffff0000ull;
- tmp2 += (b << 16) & 0xffff0000ffff0000ull;
- return ( tmp & 0xffff)
- | ((tmp >> 16) & 0xffff0000ull)
- | ((tmp2 << 16) & 0xffff00000000ull)
- | ( tmp2 & 0xffff000000000000ull);
+ return res;
}
-uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{
- uint32_t low = a + (a >> 32);
- uint32_t high = b + (b >> 32);
- return low + ((uint64_t)high << 32);
-}
+ int32_t reslo, reshi;
-uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
-{
- uint64_t mask;
- mask = (a ^ ~b) & 0x8000800080008000ull;
- a |= 0x8000800080008000ull;
- b &= ~0x8000800080008000ull;
- return (a - b) ^ mask;
-}
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
-{
- uint64_t mask;
- mask = (a ^ ~b) & 0x8000000080000000ull;
- a |= 0x8000000080000000ull;
- b &= ~0x8000000080000000ull;
- return (a - b) ^ mask;
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
}
uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b)
@@ -1172,51 +1180,44 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
* Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
*/
-uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
}
-uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float32_le(make_float32(b), make_float32(a), fpst);
}
-uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
return -float32_lt(make_float32(b), make_float32(a), fpst);
}
-uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float32 f0 = float32_abs(make_float32(a));
float32 f1 = float32_abs(make_float32(b));
return -float32_le(f1, f0, fpst);
}
-uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
+uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float32 f0 = float32_abs(make_float32(a));
float32 f1 = float32_abs(make_float32(b));
return -float32_lt(f1, f0, fpst);
}
-uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp)
+uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float64 f0 = float64_abs(make_float64(a));
float64 f1 = float64_abs(make_float64(b));
return -float64_le(f1, f0, fpst);
}
-uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp)
+uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, float_status *fpst)
{
- float_status *fpst = fpstp;
float64 f0 = float64_abs(make_float64(a));
float64 f1 = float64_abs(make_float64(b));
return -float64_lt(f1, f0, fpst);
diff --git a/target/arm/op_addsub.h b/target/arm/tcg/op_addsub.c.inc
index ca4a189..ca4a189 100644
--- a/target/arm/op_addsub.h
+++ b/target/arm/tcg/op_addsub.c.inc
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index c083e5c..575e566 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -20,10 +20,11 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
+#include "exec/target_page.h"
#include "internals.h"
#include "cpu-features.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "cpregs.h"
#define SIGNBIT (uint32_t)0x80000000
@@ -313,15 +314,19 @@ void HELPER(check_bxj_trap)(CPUARMState *env, uint32_t rm)
}
#ifndef CONFIG_USER_ONLY
-/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
+/*
+ * Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
* The function returns the target EL (1-3) if the instruction is to be trapped;
* otherwise it returns 0 indicating it is not trapped.
+ * For a trap, *excp is updated with the EXCP_* trap type to use.
*/
-static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
+static inline int check_wfx_trap(CPUARMState *env, bool is_wfe, uint32_t *excp)
{
int cur_el = arm_current_el(env);
uint64_t mask;
+ *excp = EXCP_UDEF;
+
if (arm_feature(env, ARM_FEATURE_M)) {
/* M profile cores can never trap WFI/WFE. */
return 0;
@@ -331,18 +336,9 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
* WFx instructions being trapped to EL1. These trap bits don't exist in v7.
*/
if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) {
- int target_el;
-
mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI;
- if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) {
- /* Secure EL0 and Secure PL1 is at EL3 */
- target_el = 3;
- } else {
- target_el = 1;
- }
-
- if (!(env->cp15.sctlr_el[target_el] & mask)) {
- return target_el;
+ if (!(arm_sctlr(env, cur_el) & mask)) {
+ return exception_target_el(env);
}
}
@@ -358,9 +354,12 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
}
/* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */
- if (cur_el < 3) {
+ if (arm_feature(env, ARM_FEATURE_V8) && !arm_is_el3_or_mon(env)) {
mask = (is_wfe) ? SCR_TWE : SCR_TWI;
if (env->cp15.scr_el3 & mask) {
+ if (!arm_el_is_aa64(env, 3)) {
+ *excp = EXCP_MON_TRAP;
+ }
return 3;
}
}
@@ -383,7 +382,8 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
return;
#else
CPUState *cs = env_cpu(env);
- int target_el = check_wfx_trap(env, false);
+ uint32_t excp;
+ int target_el = check_wfx_trap(env, false, &excp);
if (cpu_has_work(cs)) {
/* Don't bother to go into our "low power state" if
@@ -399,7 +399,7 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
env->regs[15] -= insn_len;
}
- raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, insn_len == 2),
+ raise_exception(env, excp, syn_wfx(1, 0xe, 0, insn_len == 2),
target_el);
}
@@ -424,10 +424,17 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout)
#else
ARMCPU *cpu = env_archcpu(env);
CPUState *cs = env_cpu(env);
- int target_el = check_wfx_trap(env, false);
+ uint32_t excp;
+ int target_el = check_wfx_trap(env, false, &excp);
/* The WFIT should time out when CNTVCT_EL0 >= the specified value. */
uint64_t cntval = gt_get_countervalue(env);
- uint64_t offset = gt_virt_cnt_offset(env);
+ /*
+ * We want the value that we would get if we read CNTVCT_EL0 from
+ * the current exception level, so the direct_access offset, not
+ * the indirect_access one. Compare the pseudocode LocalTimeoutEvent(),
+ * which calls VirtualCounterTimer().
+ */
+ uint64_t offset = gt_direct_access_timer_offset(env, GTIMER_VIRT);
uint64_t cntvct = cntval - offset;
uint64_t nexttick;
@@ -441,8 +448,7 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout)
if (target_el) {
env->pc -= 4;
- raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, false),
- target_el);
+ raise_exception(env, excp, syn_wfx(1, 0xe, 0, false), target_el);
}
if (uadd64_overflow(timeout, offset, &nexttick)) {
@@ -758,12 +764,13 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key);
CPAccessResult res = CP_ACCESS_OK;
int target_el;
+ uint32_t excp;
assert(ri != NULL);
if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14
&& extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) {
- res = CP_ACCESS_TRAP;
+ res = CP_ACCESS_UNDEFINED;
goto fail;
}
@@ -780,7 +787,7 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
* the other trap takes priority. So we take the "check HSTR_EL2" path
* for all of those cases.)
*/
- if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) == 0) &&
+ if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) < 2) &&
arm_current_el(env) == 0) {
goto fail;
}
@@ -817,6 +824,7 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
unsigned int idx = FIELD_EX32(ri->fgt, FGT, IDX);
unsigned int bitpos = FIELD_EX32(ri->fgt, FGT, BITPOS);
bool rev = FIELD_EX32(ri->fgt, FGT, REV);
+ bool nxs = FIELD_EX32(ri->fgt, FGT, NXS);
bool trapbit;
if (ri->fgt & FGT_EXEC) {
@@ -830,7 +838,15 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
trapword = env->cp15.fgt_write[idx];
}
- trapbit = extract64(trapword, bitpos, 1);
+ if (nxs && (arm_hcrx_el2_eff(env) & HCRX_FGTNXS)) {
+ /*
+ * If HCRX_EL2.FGTnXS is 1 then the fine-grained trap for
+ * TLBI maintenance insns does *not* apply to the nXS variant.
+ */
+ trapbit = 0;
+ } else {
+ trapbit = extract64(trapword, bitpos, 1);
+ }
if (trapbit != rev) {
res = CP_ACCESS_TRAP_EL2;
goto fail;
@@ -842,12 +858,25 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
}
fail:
- switch (res & ~CP_ACCESS_EL_MASK) {
- case CP_ACCESS_TRAP:
+ excp = EXCP_UDEF;
+ switch (res) {
+ /* CP_ACCESS_TRAP* traps are always direct to a specified EL */
+ case CP_ACCESS_TRAP_EL3:
+ /*
+ * If EL3 is AArch32 then there's no syndrome register; the cases
+ * where we would raise a SystemAccessTrap to AArch64 EL3 all become
+ * raising a Monitor trap exception. (Because there's no visible
+ * syndrome it doesn't matter what we pass to raise_exception().)
+ */
+ if (!arm_el_is_aa64(env, 3)) {
+ excp = EXCP_MON_TRAP;
+ }
break;
- case CP_ACCESS_TRAP_UNCATEGORIZED:
- /* Only CP_ACCESS_TRAP traps are direct to a specified EL */
- assert((res & CP_ACCESS_EL_MASK) == 0);
+ case CP_ACCESS_TRAP_EL2:
+ case CP_ACCESS_TRAP_EL1:
+ break;
+ case CP_ACCESS_UNDEFINED:
+ /* CP_ACCESS_UNDEFINED is never direct to a specified EL */
if (cpu_isar_feature(aa64_ids, cpu) && isread &&
arm_cpreg_in_idspace(ri)) {
/*
@@ -867,6 +896,9 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
case 0:
target_el = exception_target_el(env);
break;
+ case 1:
+ assert(arm_current_el(env) < 2);
+ break;
case 2:
assert(arm_current_el(env) != 3);
assert(arm_is_el2_enabled(env));
@@ -875,11 +907,10 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
assert(arm_feature(env, ARM_FEATURE_EL3));
break;
default:
- /* No "direct" traps to EL1 */
g_assert_not_reached();
}
- raise_exception(env, EXCP_UDEF, syndrome, target_el);
+ raise_exception(env, excp, syndrome, target_el);
}
const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key)
@@ -912,7 +943,19 @@ void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome)
{
/* See arm_sctlr(), but we also need the sctlr el. */
ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
- int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+ int target_el;
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ target_el = 2;
+ break;
+ case ARMMMUIdx_E30_0:
+ target_el = 3;
+ break;
+ default:
+ target_el = 1;
+ break;
+ }
/*
* The bit is not valid unless the target el is aa64, but since the
@@ -1179,7 +1222,7 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
}
}
-void HELPER(probe_access)(CPUARMState *env, target_ulong ptr,
+void HELPER(probe_access)(CPUARMState *env, vaddr ptr,
uint32_t access_type, uint32_t mmu_idx,
uint32_t size)
{
diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c
index c4b1430..c591c30 100644
--- a/target/arm/tcg/pauth_helper.c
+++ b/target/arm/tcg/pauth_helper.c
@@ -21,8 +21,7 @@
#include "cpu.h"
#include "internals.h"
#include "cpu-features.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"
#include "qemu/xxhash.h"
diff --git a/target/arm/tcg/psci.c b/target/arm/tcg/psci.c
index 51d2ca3..cabed43 100644
--- a/target/arm/tcg/psci.c
+++ b/target/arm/tcg/psci.c
@@ -21,7 +21,7 @@
#include "exec/helper-proto.h"
#include "kvm-consts.h"
#include "qemu/main-loop.h"
-#include "sysemu/runstate.h"
+#include "system/runstate.h"
#include "internals.h"
#include "arm-powerctl.h"
#include "target/arm/multiprocessing.h"
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index e2e0575..de0c6e5 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -22,8 +22,8 @@
#include "internals.h"
#include "tcg/tcg-gvec-desc.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/helper-retaddr.h"
#include "qemu/int128.h"
#include "fpu/softfloat.h"
#include "vec_internal.h"
@@ -517,6 +517,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
clr_fn(za, 0, reg_off);
}
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -529,6 +531,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -543,6 +547,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -554,6 +560,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += esize;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -701,6 +709,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -711,6 +721,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -725,6 +737,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -734,6 +748,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += 1 << esz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -888,7 +904,7 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
}
void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, void *vst, uint32_t desc)
+ void *vpm, float_status *fpst_in, uint32_t desc)
{
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) << 31;
@@ -900,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
* update the cumulative fp exception status. It also produces
* default nans.
*/
- fpst = *(float_status *)vst;
+ fpst = *fpst_in;
set_default_nan_mode(true, &fpst);
for (row = 0; row < oprsz; ) {
@@ -916,7 +932,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
if (pb & 1) {
uint32_t *a = vza_row + H1_4(col);
uint32_t *m = vzm + H1_4(col);
- *a = float32_muladd(n, *m, *a, 0, vst);
+ *a = float32_muladd(n, *m, *a, 0, &fpst);
}
col += 4;
pb >>= 4;
@@ -930,13 +946,13 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
}
void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, void *vst, uint32_t desc)
+ void *vpm, float_status *fpst_in, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
uint64_t neg = (uint64_t)simd_data(desc) << 63;
uint64_t *za = vza, *zn = vzn, *zm = vzm;
uint8_t *pn = vpn, *pm = vpm;
- float_status fpst = *(float_status *)vst;
+ float_status fpst = *fpst_in;
set_default_nan_mode(true, &fpst);
@@ -976,12 +992,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
}
static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
- float_status *s_std, float_status *s_odd)
+ float_status *s_f16, float_status *s_std,
+ float_status *s_odd)
{
- float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
- float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
- float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
- float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
+ /*
+ * We need three different float_status for different parts of this
+ * operation:
+ * - the input conversion of the float16 values must use the
+ * f16-specific float_status, so that the FPCR.FZ16 control is applied
+ * - operations on float32 including the final accumulation must use
+ * the normal float_status, so that FPCR.FZ is applied
+ * - we have pre-set-up copy of s_std which is set to round-to-odd,
+ * for the multiply (see below)
+ */
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
float64 t64;
float32 t32;
@@ -1003,20 +1030,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
}
void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, void *vst, uint32_t desc)
+ void *vpm, CPUARMState *env, uint32_t desc)
{
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) * 0x80008000u;
uint16_t *pn = vpn, *pm = vpm;
- float_status fpst_odd, fpst_std;
+ float_status fpst_odd, fpst_std, fpst_f16;
/*
- * Make a copy of float_status because this operation does not
- * update the cumulative fp exception status. It also produces
- * default nans. Make a second copy with round-to-odd -- see above.
+ * Make copies of the fp status fields we use, because this operation
+ * does not update the cumulative fp exception status. It also
+ * produces default NaNs. We also need a second copy of fp_status with
+ * round-to-odd -- see above.
*/
- fpst_std = *(float_status *)vst;
+ fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
+ fpst_std = env->vfp.fp_status[FPST_A64];
set_default_nan_mode(true, &fpst_std);
+ set_default_nan_mode(true, &fpst_f16);
fpst_odd = fpst_std;
set_float_rounding_mode(float_round_to_odd, &fpst_odd);
@@ -1036,7 +1066,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
m = f16mop_adj_pair(m, pcol, 0);
- *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
+ *a = f16_dotadd(*a, n, m,
+ &fpst_f16, &fpst_std, &fpst_odd);
}
col += 4;
pcol >>= 4;
@@ -1048,38 +1079,68 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
}
}
-void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, uint32_t desc)
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm,
+ void *vpn, void *vpm, CPUARMState *env, uint32_t desc)
{
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) * 0x80008000u;
uint16_t *pn = vpn, *pm = vpm;
+ float_status fpst, fpst_odd;
- for (row = 0; row < oprsz; ) {
- uint16_t prow = pn[H2(row >> 4)];
- do {
- void *vza_row = vza + tile_vslice_offset(row);
- uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+ if (is_ebf(env, &fpst, &fpst_odd)) {
+ for (row = 0; row < oprsz; ) {
+ uint16_t prow = pn[H2(row >> 4)];
+ do {
+ void *vza_row = vza + tile_vslice_offset(row);
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
- n = f16mop_adj_pair(n, prow, neg);
+ n = f16mop_adj_pair(n, prow, neg);
- for (col = 0; col < oprsz; ) {
- uint16_t pcol = pm[H2(col >> 4)];
- do {
- if (prow & pcol & 0b0101) {
- uint32_t *a = vza_row + H1_4(col);
- uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+ for (col = 0; col < oprsz; ) {
+ uint16_t pcol = pm[H2(col >> 4)];
+ do {
+ if (prow & pcol & 0b0101) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
- m = f16mop_adj_pair(m, pcol, 0);
- *a = bfdotadd(*a, n, m);
- }
- col += 4;
- pcol >>= 4;
- } while (col & 15);
- }
- row += 4;
- prow >>= 4;
- } while (row & 15);
+ m = f16mop_adj_pair(m, pcol, 0);
+ *a = bfdotadd_ebf(*a, n, m, &fpst, &fpst_odd);
+ }
+ col += 4;
+ pcol >>= 4;
+ } while (col & 15);
+ }
+ row += 4;
+ prow >>= 4;
+ } while (row & 15);
+ }
+ } else {
+ for (row = 0; row < oprsz; ) {
+ uint16_t prow = pn[H2(row >> 4)];
+ do {
+ void *vza_row = vza + tile_vslice_offset(row);
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+
+ n = f16mop_adj_pair(n, prow, neg);
+
+ for (col = 0; col < oprsz; ) {
+ uint16_t pcol = pm[H2(col >> 4)];
+ do {
+ if (prow & pcol & 0b0101) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+
+ m = f16mop_adj_pair(m, pcol, 0);
+ *a = bfdotadd(*a, n, m, &fpst);
+ }
+ col += 4;
+ pcol >>= 4;
+ } while (col & 15);
+ }
+ row += 4;
+ prow >>= 4;
+ } while (row & 15);
+ }
}
}
@@ -1146,10 +1207,10 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
uint64_t sum = 0; \
/* Apply P to N as a mask, making the inactive elements 0. */ \
n &= expand_pred_h(p); \
- sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
- sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
- sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
- sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
+ sum += (int64_t)(NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
+ sum += (int64_t)(NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
+ sum += (int64_t)(NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
+ sum += (int64_t)(NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
return neg ? a - sum : a + sum; \
}
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index dd49e67..a2c363a 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -20,15 +20,22 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "internals.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
#include "exec/helper-proto.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
#include "tcg/tcg-gvec-desc.h"
#include "fpu/softfloat.h"
#include "tcg/tcg.h"
#include "vec_internal.h"
#include "sve_ldst_internal.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/helper-retaddr.h"
+#include "accel/tcg/cpu-ops.h"
+#include "accel/tcg/probe.h"
+#ifdef CONFIG_USER_ONLY
+#include "user/page-protection.h"
+#endif
/* Return a value for NZCV as per the ARM PredTest pseudofunction.
@@ -730,7 +737,7 @@ DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN)
#define DO_ZPZZ_PAIR_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc); \
for (i = 0; i < opr_sz; ) { \
@@ -876,12 +883,28 @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
+#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N))
+#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N))
+#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N))
+
+DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H)
+DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S)
+DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D)
+
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
+#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N))
+#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N))
+#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N))
+
+DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H)
+DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S)
+DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D)
+
#define DO_NOT(N) (~N)
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
@@ -2536,6 +2559,7 @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint16_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint16_t nn = n[i];
@@ -2543,13 +2567,17 @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float16_one;
}
- d[i] = nn ^ (mm & 2) << 14;
+ if (mm & 2) {
+ nn = float16_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint32_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint32_t nn = n[i];
@@ -2557,13 +2585,17 @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float32_one;
}
- d[i] = nn ^ (mm & 2) << 30;
+ if (mm & 2) {
+ nn = float32_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint64_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint64_t nn = n[i];
@@ -2571,7 +2603,10 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float64_one;
}
- d[i] = nn ^ (mm & 2) << 62;
+ if (mm & 2) {
+ nn = float64_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
@@ -4187,10 +4222,10 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
uintptr_t half = n / 2; \
TYPE lo = NAME##_reduce(data, status, half); \
TYPE hi = NAME##_reduce(data + half, status, half); \
- return TYPE##_##FUNC(lo, hi, status); \
+ return FUNC(lo, hi, status); \
} \
} \
-uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
+uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
{ \
uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \
TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
@@ -4205,34 +4240,45 @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
for (; i < maxsz; i += sizeof(TYPE)) { \
*(TYPE *)((void *)data + i) = IDENT; \
} \
- return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \
+ return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
}
-DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
-DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
-DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
+DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero)
+DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero)
+DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero)
/* Identity is floatN_default_nan, without the function call. */
-DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
-DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
-DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00)
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000)
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00)
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000)
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
-DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
-DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
-DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity)
+DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity)
+DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity)
-DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
-DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
-DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity))
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity))
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity))
-DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
-DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
-DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
+DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity)
+DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity)
+DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity)
+
+DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh,
+ float16_chs(float16_infinity))
+DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs,
+ float32_chs(float32_infinity))
+DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd,
+ float64_chs(float64_infinity))
#undef DO_REDUCE
uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc);
float16 result = nn;
@@ -4252,7 +4298,7 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
}
uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc);
float32 result = nn;
@@ -4272,7 +4318,7 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
}
uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
uint64_t *m = vm;
@@ -4292,7 +4338,7 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
*/
#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4333,6 +4379,14 @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
+DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
+DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
+DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
+
+DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
+DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
+DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
+
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
@@ -4356,9 +4410,31 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
return float64_abs(float64_sub(a, b, s));
}
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
+static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat)
+{
+ float16 r = float16_sub(op1, op2, stat);
+ return float16_is_any_nan(r) ? r : float16_abs(r);
+}
+
+static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat)
+{
+ float32 r = float32_sub(op1, op2, stat);
+ return float32_is_any_nan(r) ? r : float32_abs(r);
+}
+
+static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat)
+{
+ float64 r = float64_sub(op1, op2, stat);
+ return float64_is_any_nan(r) ? r : float64_abs(r);
+}
+
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
+DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h)
+DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s)
+DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d)
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
{
@@ -4381,7 +4457,7 @@ DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd)
*/
#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4445,11 +4521,20 @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
+DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh)
+DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs)
+DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd)
+
+DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh)
+DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins)
+DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind)
+
/* Fully general two-operand expander, controlled by a predicate,
* With the extra float_status parameter.
*/
#define DO_ZPZ_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4654,7 +4739,7 @@ static int16_t do_float16_logb_as_int(float16 a, float_status *s)
return -15 - clz32(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0x1f)) {
if (frac == 0) {
@@ -4682,7 +4767,7 @@ static int32_t do_float32_logb_as_int(float32 a, float_status *s)
return -127 - clz32(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0xff)) {
if (frac == 0) {
@@ -4710,7 +4795,7 @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s)
return -1023 - clz64(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0x7ff)) {
if (frac == 0) {
@@ -4733,7 +4818,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4748,7 +4833,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
- r = float16_muladd(e1, e2, e3, 0, status);
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4756,32 +4841,53 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4796,7 +4902,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
- r = float32_muladd(e1, e2, e3, 0, status);
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4804,32 +4910,53 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4844,7 +4971,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
e3 = *(uint64_t *)(va + i) ^ neg3;
- r = float64_muladd(e1, e2, e3, 0, status);
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4852,27 +4979,48 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
@@ -4882,7 +5030,7 @@ void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
*/
#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
uint64_t *d = vd, *g = vg; \
@@ -4944,7 +5092,7 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
*/
#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
uint64_t *d = vd, *g = vg; \
@@ -4982,27 +5130,37 @@ DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE)
/* FP Trig Multiply-Add. */
-void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float16 coeff[16] = {
0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float16 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float16 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float16_is_neg(mm)) {
- mm = float16_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float16_abs(mm);
+ }
xx += 8;
}
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
}
}
-void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float32 coeff[16] = {
0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
@@ -5011,20 +5169,29 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float32 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float32 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float32_is_neg(mm)) {
- mm = float32_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float32_abs(mm);
+ }
xx += 8;
}
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
}
}
-void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float64 coeff[16] = {
0x3ff0000000000000ull, 0xbfc5555555555543ull,
@@ -5037,16 +5204,24 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float64 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float64 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float64_is_neg(mm)) {
- mm = float64_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float64_abs(mm);
+ }
xx += 8;
}
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
}
}
@@ -5055,12 +5230,12 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
*/
void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
- float16 neg_real = float16_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5072,27 +5247,33 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float16);
e0 = *(float16 *)(vn + H1_2(i));
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float16 *)(vm + H1_2(j));
e2 = *(float16 *)(vn + H1_2(j));
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float16 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, vs);
+ *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, vs);
+ *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
}
void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
- float32 neg_real = float32_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5104,27 +5285,33 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float32);
e0 = *(float32 *)(vn + H1_2(i));
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float32 *)(vm + H1_2(j));
e2 = *(float32 *)(vn + H1_2(j));
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float32 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, vs);
+ *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, vs);
+ *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
}
void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
- float64 neg_real = float64_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5136,15 +5323,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float64);
e0 = *(float64 *)(vn + H1_2(i));
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float64 *)(vm + H1_2(j));
e2 = *(float64 *)(vn + H1_2(j));
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float64 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, vs);
+ *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, vs);
+ *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
@@ -5155,16 +5348,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
*/
void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float16 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float16 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5181,18 +5379,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
mi = *(float16 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float16 *)(va + H1_2(i));
- d = float16_muladd(e2, e1, d, 0, status);
+ d = float16_muladd(e2, e1, d, negf_real, status);
*(float16 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float16 *)(va + H1_2(j));
- d = float16_muladd(e4, e3, d, 0, status);
+ d = float16_muladd(e4, e3, d, negf_imag, status);
*(float16 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5200,16 +5398,21 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float32 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float32 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5226,18 +5429,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
mi = *(float32 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float32 *)(va + H1_2(i));
- d = float32_muladd(e2, e1, d, 0, status);
+ d = float32_muladd(e2, e1, d, negf_real, status);
*(float32 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float32 *)(va + H1_2(j));
- d = float32_muladd(e4, e3, d, 0, status);
+ d = float32_muladd(e4, e3, d, negf_imag, status);
*(float32 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5245,16 +5448,21 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float64 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float64 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5271,18 +5479,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
mi = *(float64 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float64 *)(va + H1_2(i));
- d = float64_muladd(e2, e1, d, 0, status);
+ d = float64_muladd(e2, e1, d, negf_real, status);
*(float64 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float64 *)(va + H1_2(j));
- d = float64_muladd(e4, e3, d, 0, status);
+ d = float64_muladd(e4, e3, d, negf_imag, status);
*(float64 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5738,6 +5946,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5752,6 +5962,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -5771,6 +5983,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5784,6 +5998,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -5934,15 +6150,11 @@ DO_LDN_2(4, dd, MO_64)
/*
* Load contiguous data, first-fault and no-fault.
*
- * For user-only, one could argue that we should hold the mmap_lock during
- * the operation so that there is no race between page_check_range and the
- * load operation. However, unmapping pages out from under a running thread
- * is extraordinarily unlikely. This theoretical race condition also affects
- * linux-user/ in its get_user/put_user macros.
- *
- * TODO: Construct some helpers, written in assembly, that interact with
- * host_signal_handler to produce memory ops which can properly report errors
- * without racing.
+ * For user-only, we control the race between page_check_range and
+ * another thread's munmap by using set/clear_helper_retaddr. Any
+ * SEGV that occurs between those markers is assumed to be because
+ * the guest page vanished. Keep that block as small as possible
+ * so that unrelated QEMU bugs are not blamed on the guest.
*/
/* Fault on byte I. All bits in FFR from I are cleared. The vector
@@ -6093,6 +6305,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
do {
@@ -6101,9 +6315,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
(cpu_watchpoint_address_matches
(env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
+ clear_helper_retaddr();
goto do_fault;
}
if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ clear_helper_retaddr();
goto do_fault;
}
host_fn(vd, reg_off, host + mem_off);
@@ -6113,6 +6329,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
} while (reg_off <= reg_last);
+ clear_helper_retaddr();
+
/*
* MemSingleNF is allowed to fail for any reason. We have special
* code above to handle the first element crossing a page boundary.
@@ -6307,9 +6525,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
flags = info.page[0].flags | info.page[1].flags;
if (unlikely(flags != 0)) {
-#ifdef CONFIG_USER_ONLY
- g_assert_not_reached();
-#else
/*
* At least one page includes MMIO.
* Any bus operation can fail with cpu_transaction_failed,
@@ -6340,7 +6555,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off & 63);
} while (reg_off <= reg_last);
return;
-#endif
}
mem_off = info.mem_off_first[0];
@@ -6348,6 +6562,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6362,6 +6578,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -6381,6 +6599,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6394,6 +6614,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -6560,7 +6782,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
if (unlikely(info.flags & TLB_MMIO)) {
tlb_fn(env, &scratch, reg_off, addr, retaddr);
} else {
+ set_helper_retaddr(retaddr);
host_fn(&scratch, reg_off, info.host);
+ clear_helper_retaddr();
}
} else {
/* Element crosses the page boundary. */
@@ -6782,7 +7006,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
goto fault;
}
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, info.host);
+ clear_helper_retaddr();
}
reg_off += esize;
} while (reg_off & 63);
@@ -6986,7 +7212,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
do {
void *h = host[i];
if (likely(h != NULL)) {
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, h);
+ clear_helper_retaddr();
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
@@ -7369,7 +7597,7 @@ void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc)
}
void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
@@ -7407,7 +7635,7 @@ void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
@@ -7443,7 +7671,8 @@ void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
}
#define DO_FCVTNT(NAME, TYPEW, TYPEN, HW, HN, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -7464,7 +7693,8 @@ DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32)
#define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h
index 4f159ec..f2243da 100644
--- a/target/arm/tcg/sve_ldst_internal.h
+++ b/target/arm/tcg/sve_ldst_internal.h
@@ -20,7 +20,7 @@
#ifndef TARGET_ARM_SVE_LDST_INTERNAL_H
#define TARGET_ARM_SVE_LDST_INTERNAL_H
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
/*
* Load one element into @vd + @reg_off from @host.
diff --git a/target/arm/tcg/tlb-insns.c b/target/arm/tcg/tlb-insns.c
new file mode 100644
index 0000000..95c26c6
--- /dev/null
+++ b/target/arm/tcg/tlb-insns.c
@@ -0,0 +1,1306 @@
+/*
+ * Helpers for TLBI insns
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "exec/cputlb.h"
+#include "exec/target_page.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "cpregs.h"
+
+/* Check for traps from EL1 due to HCR_EL2.TTLB. */
+static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TTLB)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_OK;
+}
+
+/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */
+static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 &&
+ (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_OK;
+}
+
+/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */
+static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 &&
+ (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_OK;
+}
+
+/* IS variants of TLB operations must affect all cores */
+static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_all_cpus_synced(cs);
+}
+
+static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_all_cpus_synced(cs);
+}
+
+static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
+}
+
+static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
+}
+
+/*
+ * Non-IS variants of TLB operations are upgraded to
+ * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
+ * force broadcast of these operations.
+ */
+static bool tlb_force_broadcast(CPUARMState *env)
+{
+ return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
+}
+
+static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate all (TLBIALL) */
+ CPUState *cs = env_cpu(env);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_all_cpus_synced(cs);
+ } else {
+ tlb_flush(cs);
+ }
+}
+
+static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
+ CPUState *cs = env_cpu(env);
+
+ value &= TARGET_PAGE_MASK;
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_all_cpus_synced(cs, value);
+ } else {
+ tlb_flush_page(cs, value);
+ }
+}
+
+static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate by ASID (TLBIASID) */
+ CPUState *cs = env_cpu(env);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_all_cpus_synced(cs);
+ } else {
+ tlb_flush(cs);
+ }
+}
+
+static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
+ CPUState *cs = env_cpu(env);
+
+ value &= TARGET_PAGE_MASK;
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_all_cpus_synced(cs, value);
+ } else {
+ tlb_flush_page(cs, value);
+ }
+}
+
+static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
+
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2);
+}
+
+static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
+
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ ARMMMUIdxBit_E2);
+}
+
+static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
+
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
+}
+
+static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
+
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2);
+}
+
+static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_by_mmuidx(cs, alle1_tlbmask(env));
+}
+
+static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env));
+}
+
+
+static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2);
+}
+
+static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2);
+}
+
+/*
+ * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
+ * Page D4-1736 (DDI0487A.b)
+ */
+
+static int vae1_tlbmask(CPUARMState *env)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ uint16_t mask;
+
+ assert(arm_feature(env, ARM_FEATURE_AARCH64));
+
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ mask = ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0;
+ } else {
+ /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */
+ mask = ARMMMUIdxBit_E10_1 |
+ ARMMMUIdxBit_E10_1_PAN |
+ ARMMMUIdxBit_E10_0;
+ }
+ return mask;
+}
+
+static int vae2_tlbmask(CPUARMState *env)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ uint16_t mask;
+
+ if (hcr & HCR_E2H) {
+ mask = ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0;
+ } else {
+ mask = ARMMMUIdxBit_E2;
+ }
+ return mask;
+}
+
+/* Return 56 if TBI is enabled, 64 otherwise. */
+static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx,
+ uint64_t addr)
+{
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ int tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+ int select = extract64(addr, 55, 1);
+
+ return (tbi >> select) & 1 ? 56 : 64;
+}
+
+static int vae1_tlbbits(CPUARMState *env, uint64_t addr)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ ARMMMUIdx mmu_idx;
+
+ assert(arm_feature(env, ARM_FEATURE_AARCH64));
+
+ /* Only the regime of the mmu_idx below is significant. */
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ mmu_idx = ARMMMUIdx_E20_0;
+ } else {
+ mmu_idx = ARMMMUIdx_E10_0;
+ }
+
+ return tlbbits_for_regime(env, mmu_idx, addr);
+}
+
+static int vae2_tlbbits(CPUARMState *env, uint64_t addr)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ ARMMMUIdx mmu_idx;
+
+ /*
+ * Only the regime of the mmu_idx below is significant.
+ * Regime EL2&0 has two ranges with separate TBI configuration, while EL2
+ * only has one.
+ */
+ if (hcr & HCR_E2H) {
+ mmu_idx = ARMMMUIdx_E20_2;
+ } else {
+ mmu_idx = ARMMMUIdx_E2;
+ }
+
+ return tlbbits_for_regime(env, mmu_idx, addr);
+}
+
+static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
+}
+
+static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
+ } else {
+ tlb_flush_by_mmuidx(cs, mask);
+ }
+}
+
+static int e2_tlbmask(CPUARMState *env)
+{
+ return (ARMMMUIdxBit_E20_0 |
+ ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E2);
+}
+
+static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = alle1_tlbmask(env);
+
+ tlb_flush_by_mmuidx(cs, mask);
+}
+
+static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = e2_tlbmask(env);
+
+ tlb_flush_by_mmuidx(cs, mask);
+}
+
+static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ CPUState *cs = CPU(cpu);
+
+ tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3);
+}
+
+static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = alle1_tlbmask(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
+}
+
+static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = e2_tlbmask(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
+}
+
+static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3);
+}
+
+static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA, EL2
+ * Currently handles both VAE2 and VALE2, since we don't support
+ * flush-last-level-only.
+ */
+ CPUState *cs = env_cpu(env);
+ int mask = vae2_tlbmask(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae2_tlbbits(env, pageaddr);
+
+ tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
+}
+
+static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA, EL3
+ * Currently handles both VAE3 and VALE3, since we don't support
+ * flush-last-level-only.
+ */
+ ARMCPU *cpu = env_archcpu(env);
+ CPUState *cs = CPU(cpu);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3);
+}
+
+static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae1_tlbbits(env, pageaddr);
+
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
+}
+
+static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA, EL1&0 (AArch64 version).
+ * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae1_tlbbits(env, pageaddr);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
+ } else {
+ tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
+ }
+}
+
+static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = vae2_tlbmask(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae2_tlbbits(env, pageaddr);
+
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
+}
+
+static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr);
+
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ ARMMMUIdxBit_E3, bits);
+}
+
+static int ipas2e1_tlbmask(CPUARMState *env, int64_t value)
+{
+ /*
+ * The MSB of value is the NS field, which only applies if SEL2
+ * is implemented and SCR_EL3.NS is not set (i.e. in secure mode).
+ */
+ return (value >= 0
+ && cpu_isar_feature(aa64_sel2, env_archcpu(env))
+ && arm_is_secure_below_el3(env)
+ ? ARMMMUIdxBit_Stage2_S
+ : ARMMMUIdxBit_Stage2);
+}
+
+static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = ipas2e1_tlbmask(env, value);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
+ } else {
+ tlb_flush_page_by_mmuidx(cs, pageaddr, mask);
+ }
+}
+
+static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = ipas2e1_tlbmask(env, value);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
+}
+
+static const ARMCPRegInfo tlbi_not_v7_cp_reginfo[] = {
+ /*
+ * MMU TLB control. Note that the wildcarding means we cover not just
+ * the unified TLB ops but also the dside/iside/inner-shareable variants.
+ */
+ { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY,
+ .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write,
+ .type = ARM_CP_NO_RAW },
+ { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY,
+ .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write,
+ .type = ARM_CP_NO_RAW },
+ { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY,
+ .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write,
+ .type = ARM_CP_NO_RAW },
+ { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY,
+ .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write,
+ .type = ARM_CP_NO_RAW },
+};
+
+static const ARMCPRegInfo tlbi_v7_cp_reginfo[] = {
+ /* 32 bit ITLB invalidates */
+ { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
+ { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
+ { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
+ /* 32 bit DTLB invalidates */
+ { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
+ { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
+ { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
+ /* 32 bit TLB invalidates */
+ { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
+ { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
+ { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
+ { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimvaa_write },
+};
+
+static const ARMCPRegInfo tlbi_v7mp_cp_reginfo[] = {
+ /* 32 bit TLB invalidates, Inner Shareable */
+ { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbiall_is_write },
+ { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimva_is_write },
+ { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbiasid_is_write },
+ { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimvaa_is_write },
+};
+
+static const ARMCPRegInfo tlbi_v8_cp_reginfo[] = {
+ /* AArch32 TLB invalidate last level of translation table walk */
+ { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimva_is_write },
+ { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimvaa_is_write },
+ { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
+ { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimvaa_write },
+ { .name = "TLBIMVALH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbimva_hyp_write },
+ { .name = "TLBIMVALHIS",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbimva_hyp_is_write },
+ { .name = "TLBIIPAS2",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiipas2_hyp_write },
+ { .name = "TLBIIPAS2IS",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiipas2is_hyp_write },
+ { .name = "TLBIIPAS2L",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiipas2_hyp_write },
+ { .name = "TLBIIPAS2LIS",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiipas2is_hyp_write },
+ /* AArch64 TLBI operations */
+ { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVMALLE1IS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAE1IS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIASIDE1IS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAAE1IS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVALE1IS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAALE1IS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVMALLE1,
+ .writefn = tlbi_aa64_vmalle1_write },
+ { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAE1,
+ .writefn = tlbi_aa64_vae1_write },
+ { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIASIDE1,
+ .writefn = tlbi_aa64_vmalle1_write },
+ { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAAE1,
+ .writefn = tlbi_aa64_vae1_write },
+ { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVALE1,
+ .writefn = tlbi_aa64_vae1_write },
+ { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAALE1,
+ .writefn = tlbi_aa64_vae1_write },
+ { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ipas2e1is_write },
+ { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ipas2e1is_write },
+ { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_VMALLS12E1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ipas2e1_write },
+ { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ipas2e1_write },
+ { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1_write },
+ { .name = "TLBI_VMALLS12E1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1is_write },
+};
+
+static const ARMCPRegInfo tlbi_el2_cp_reginfo[] = {
+ { .name = "TLBIALLNSNH",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiall_nsnh_write },
+ { .name = "TLBIALLNSNHIS",
+ .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiall_nsnh_is_write },
+ { .name = "TLBIALLH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiall_hyp_write },
+ { .name = "TLBIALLHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbiall_hyp_is_write },
+ { .name = "TLBIMVAH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbimva_hyp_write },
+ { .name = "TLBIMVAHIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1,
+ .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .writefn = tlbimva_hyp_is_write },
+ { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_alle2_write },
+ { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2_write },
+ { .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2_write },
+ { .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_alle2is_write },
+ { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+ { .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+};
+
+static const ARMCPRegInfo tlbi_el3_cp_reginfo[] = {
+ { .name = "TLBI_ALLE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 0,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle3is_write },
+ { .name = "TLBI_VAE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3is_write },
+ { .name = "TLBI_VALE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 3, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3is_write },
+ { .name = "TLBI_ALLE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 0,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle3_write },
+ { .name = "TLBI_VAE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3_write },
+ { .name = "TLBI_VALE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3_write },
+};
+
+typedef struct {
+ uint64_t base;
+ uint64_t length;
+} TLBIRange;
+
+static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg)
+{
+ /*
+ * Note that the TLBI range TG field encoding differs from both
+ * TG0 and TG1 encodings.
+ */
+ switch (tg) {
+ case 1:
+ return Gran4K;
+ case 2:
+ return Gran16K;
+ case 3:
+ return Gran64K;
+ default:
+ return GranInvalid;
+ }
+}
+
+static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
+ uint64_t value)
+{
+ unsigned int page_size_granule, page_shift, num, scale, exponent;
+ /* Extract one bit to represent the va selector in use. */
+ uint64_t select = sextract64(value, 36, 1);
+ ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false);
+ TLBIRange ret = { };
+ ARMGranuleSize gran;
+
+ page_size_granule = extract64(value, 46, 2);
+ gran = tlbi_range_tg_to_gran_size(page_size_granule);
+
+ /* The granule encoded in value must match the granule in use. */
+ if (gran != param.gran) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n",
+ page_size_granule);
+ return ret;
+ }
+
+ page_shift = arm_granule_bits(gran);
+ num = extract64(value, 39, 5);
+ scale = extract64(value, 44, 2);
+ exponent = (5 * scale) + 1;
+
+ ret.length = (num + 1) << (exponent + page_shift);
+
+ if (param.select) {
+ ret.base = sextract64(value, 0, 37);
+ } else {
+ ret.base = extract64(value, 0, 37);
+ }
+ if (param.ds) {
+ /*
+ * With DS=1, BaseADDR is always shifted 16 so that it is able
+ * to address all 52 va bits. The input address is perforce
+ * aligned on a 64k boundary regardless of translation granule.
+ */
+ page_shift = 16;
+ }
+ ret.base <<= page_shift;
+
+ return ret;
+}
+
+static void do_rvae_write(CPUARMState *env, uint64_t value,
+ int idxmap, bool synced)
+{
+ ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap);
+ TLBIRange range;
+ int bits;
+
+ range = tlbi_aa64_get_range(env, one_idx, value);
+ bits = tlbbits_for_regime(env, one_idx, range.base);
+
+ if (synced) {
+ tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env),
+ range.base,
+ range.length,
+ idxmap,
+ bits);
+ } else {
+ tlb_flush_range_by_mmuidx(env_cpu(env), range.base,
+ range.length, idxmap, bits);
+ }
+}
+
+static void tlbi_aa64_rvae1_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL1&0.
+ * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+
+ do_rvae_write(env, value, vae1_tlbmask(env),
+ tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae1is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, Inner/Outer Shareable EL1&0.
+ * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS,
+ * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support
+ * flush-for-specific-ASID-only, flush-last-level-only or inner/outer
+ * shareable specific flushes.
+ */
+
+ do_rvae_write(env, value, vae1_tlbmask(env), true);
+}
+
+static void tlbi_aa64_rvae2_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL2.
+ * Currently handles all of RVAE2 and RVALE2,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+
+ do_rvae_write(env, value, vae2_tlbmask(env),
+ tlb_force_broadcast(env));
+
+
+}
+
+static void tlbi_aa64_rvae2is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, Inner/Outer Shareable, EL2.
+ * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS,
+ * since we don't support flush-for-specific-ASID-only,
+ * flush-last-level-only or inner/outer shareable specific flushes.
+ */
+
+ do_rvae_write(env, value, vae2_tlbmask(env), true);
+
+}
+
+static void tlbi_aa64_rvae3_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL3.
+ * Currently handles all of RVAE3 and RVALE3,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+
+ do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae3is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL3, Inner/Outer Shareable.
+ * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS,
+ * since we don't support flush-for-specific-ASID-only,
+ * flush-last-level-only or inner/outer specific flushes.
+ */
+
+ do_rvae_write(env, value, ARMMMUIdxBit_E3, true);
+}
+
+static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ do_rvae_write(env, value, ipas2e1_tlbmask(env, value),
+ tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_ripas2e1is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true);
+}
+
+static const ARMCPRegInfo tlbirange_reginfo[] = {
+ { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAAE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVALE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbis,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAALE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAAE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVALE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAALE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAAE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVALE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlb,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIRVAALE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ripas2e1is_write },
+ { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ripas2e1is_write },
+ { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ripas2e1_write },
+ { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_ripas2e1_write },
+ { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2_write },
+ { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2_write },
+ { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3_write },
+ { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_rvae3_write },
+};
+
+static const ARMCPRegInfo tlbios_reginfo[] = {
+ { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVMALLE1OS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1,
+ .fgt = FGT_TLBIVAE1OS,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIASIDE1OS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAAE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVALE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbos,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .fgt = FGT_TLBIVAALE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_alle2is_write },
+ { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+ { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL2_W,
+ .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+ { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0,
+ .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS },
+ { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3,
+ .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS },
+ { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4,
+ .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS },
+ { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7,
+ .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_ADD_TLBI_NXS },
+ { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_alle3is_write },
+ { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3is_write },
+ { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_ADD_TLBI_NXS,
+ .writefn = tlbi_aa64_vae3is_write },
+};
+
+static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush(cs);
+}
+
+static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_all_cpus_synced(cs);
+}
+
+static const ARMCPRegInfo tlbi_rme_reginfo[] = {
+ { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paall_write },
+ { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+ /*
+ * QEMU does not have a way to invalidate by physical address, thus
+ * invalidating a range of physical addresses is accomplished by
+ * flushing all tlb entries in the outer shareable domain,
+ * just like PAALLOS.
+ */
+ { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+ { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+};
+
+void define_tlb_insn_regs(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+
+ if (!arm_feature(env, ARM_FEATURE_V7)) {
+ define_arm_cp_regs(cpu, tlbi_not_v7_cp_reginfo);
+ } else {
+ define_arm_cp_regs(cpu, tlbi_v7_cp_reginfo);
+ }
+ if (arm_feature(env, ARM_FEATURE_V7MP) &&
+ !arm_feature(env, ARM_FEATURE_PMSA)) {
+ define_arm_cp_regs(cpu, tlbi_v7mp_cp_reginfo);
+ }
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ define_arm_cp_regs(cpu, tlbi_v8_cp_reginfo);
+ }
+ /*
+ * We retain the existing logic for when to register these TLBI
+ * ops (i.e. matching the condition for el2_cp_reginfo[] in
+ * helper.c), but we will be able to simplify this later.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
+ define_arm_cp_regs(cpu, tlbi_el2_cp_reginfo);
+ }
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ define_arm_cp_regs(cpu, tlbi_el3_cp_reginfo);
+ }
+ if (cpu_isar_feature(aa64_tlbirange, cpu)) {
+ define_arm_cp_regs(cpu, tlbirange_reginfo);
+ }
+ if (cpu_isar_feature(aa64_tlbios, cpu)) {
+ define_arm_cp_regs(cpu, tlbios_reginfo);
+ }
+ if (cpu_isar_feature(aa64_rme, cpu)) {
+ define_arm_cp_regs(cpu, tlbi_rme_reginfo);
+ }
+}
diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c
index 885bf4e..23c72a9 100644
--- a/target/arm/tcg/tlb_helper.c
+++ b/target/arm/tcg/tlb_helper.c
@@ -9,9 +9,9 @@
#include "cpu.h"
#include "internals.h"
#include "cpu-features.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
/*
* Returns true if the stage 1 translation regime is using LPAE format page
@@ -277,7 +277,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
}
-void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc)
+void helper_exception_pc_alignment(CPUARMState *env, vaddr pc)
{
ARMMMUFaultInfo fi = { .type = ARMFault_Alignment };
int target_el = exception_target_el(env);
@@ -318,14 +318,13 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
}
-bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
+bool arm_cpu_tlb_fill_align(CPUState *cs, CPUTLBEntryFull *out, vaddr address,
+ MMUAccessType access_type, int mmu_idx,
+ MemOp memop, int size, bool probe, uintptr_t ra)
{
ARMCPU *cpu = ARM_CPU(cs);
GetPhysAddrResult res = {};
ARMMMUFaultInfo local_fi, *fi;
- int ret;
/*
* Allow S1_ptw_translate to see any fault generated here.
@@ -339,37 +338,27 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
}
/*
- * Walk the page table and (if the mapping exists) add the page
- * to the TLB. On success, return true. Otherwise, if probing,
- * return false. Otherwise populate fsr with ARM DFSR/IFSR fault
- * register format, and signal the fault.
+ * Per R_XCHFJ, alignment fault not due to memory type has
+ * highest precedence. Otherwise, walk the page table and
+ * and collect the page description.
*/
- ret = get_phys_addr(&cpu->env, address, access_type,
- core_to_arm_mmu_idx(&cpu->env, mmu_idx),
- &res, fi);
- if (likely(!ret)) {
- /*
- * Map a single [sub]page. Regions smaller than our declared
- * target page size are handled specially, so for those we
- * pass in the exact addresses.
- */
- if (res.f.lg_page_size >= TARGET_PAGE_BITS) {
- res.f.phys_addr &= TARGET_PAGE_MASK;
- address &= TARGET_PAGE_MASK;
- }
-
+ if (address & ((1 << memop_alignment_bits(memop)) - 1)) {
+ fi->type = ARMFault_Alignment;
+ } else if (!get_phys_addr(&cpu->env, address, access_type, memop,
+ core_to_arm_mmu_idx(&cpu->env, mmu_idx),
+ &res, fi)) {
res.f.extra.arm.pte_attrs = res.cacheattrs.attrs;
res.f.extra.arm.shareability = res.cacheattrs.shareability;
-
- tlb_set_page_full(cs, mmu_idx, address, &res.f);
+ *out = res.f;
return true;
- } else if (probe) {
+ }
+ if (probe) {
return false;
- } else {
- /* now we have a real cpu fault */
- cpu_restore_state(cs, retaddr);
- arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
}
+
+ /* Now we have a real cpu fault. */
+ cpu_restore_state(cs, ra);
+ arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
}
#else
void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
diff --git a/target/arm/tcg/translate-a32.h b/target/arm/tcg/translate-a32.h
index 19de6e0..0b1fa57 100644
--- a/target/arm/tcg/translate-a32.h
+++ b/target/arm/tcg/translate-a32.h
@@ -83,6 +83,13 @@ void store_cpu_offset(TCGv_i32 var, int offset, int size);
sizeof_field(CPUARMState, name)); \
})
+/* Store to the low half of a 64-bit field from a TCGv_i32 */
+#define store_cpu_field_low32(val, name) \
+ ({ \
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8); \
+ store_cpu_offset(val, offsetoflow32(CPUARMState, name), 4); \
+ })
+
#define store_cpu_field_constant(val, name) \
store_cpu_field(tcg_constant_i32(val), name)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 93543da..815225b 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -17,8 +17,7 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-
-#include "exec/exec-all.h"
+#include "exec/target_page.h"
#include "translate.h"
#include "translate-a64.h"
#include "qemu/log.h"
@@ -75,17 +74,6 @@ static int scale_by_log2_tag_granule(DisasContext *s, int x)
#include "decode-sme-fa64.c.inc"
#include "decode-a64.c.inc"
-/* Table based decoder typedefs - used when the relevant bits for decode
- * are too awkwardly scattered across the instruction (eg SIMD).
- */
-typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
-
-typedef struct AArch64DecodeTable {
- uint32_t pattern;
- uint32_t mask;
- AArch64DecodeFn *disas_fn;
-} AArch64DecodeTable;
-
/* initialize TCG globals. */
void a64_translate_init(void)
{
@@ -294,7 +282,7 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
- desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
ret = tcg_temp_new_i64();
@@ -326,7 +314,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
- desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
ret = tcg_temp_new_i64();
@@ -445,12 +433,6 @@ static void gen_rebuild_hflags(DisasContext *s)
gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
}
-static void gen_exception_internal(int excp)
-{
- assert(excp_is_internal(excp));
- gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
-}
-
static void gen_exception_internal_insn(DisasContext *s, int excp)
{
gen_a64_update_pc(s, 0);
@@ -628,7 +610,16 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
return v;
}
-/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
+static void clear_vec(DisasContext *s, int rd)
+{
+ unsigned ofs = fp_reg_offset(s, rd, MO_64);
+ unsigned vsz = vec_full_reg_size(s);
+
+ tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
+}
+
+/*
+ * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
* If SVE is not enabled, then there are only 128 bits in the vector.
*/
static void clear_vec_high(DisasContext *s, bool is_q, int rd)
@@ -656,6 +647,68 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
write_fp_dreg(s, reg, tmp);
}
+/*
+ * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
+ * - if FPCR.NEP == 0, clear the high elements of reg
+ * - if FPCR.NEP == 1, set the high elements of reg from mergereg
+ * (i.e. merge the result with those high elements)
+ * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
+ */
+static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
+ TCGv_i64 v)
+{
+ if (!s->fpcr_nep) {
+ write_fp_dreg(s, reg, v);
+ return;
+ }
+
+ /*
+ * Move from mergereg to reg; this sets the high elements and
+ * clears the bits above 128 as a side effect.
+ */
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
+ vec_full_reg_offset(s, mergereg),
+ 16, vec_full_reg_size(s));
+ tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
+}
+
+/*
+ * Write a single-prec result, but only clear the higher elements
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
+ */
+static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
+ TCGv_i32 v)
+{
+ if (!s->fpcr_nep) {
+ write_fp_sreg(s, reg, v);
+ return;
+ }
+
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
+ vec_full_reg_offset(s, mergereg),
+ 16, vec_full_reg_size(s));
+ tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
+}
+
+/*
+ * Write a half-prec result, but only clear the higher elements
+ * of the destination register if FPCR.NEP is 0; otherwise preserve them.
+ * The caller must ensure that the top 16 bits of v are zero.
+ */
+static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
+ TCGv_i32 v)
+{
+ if (!s->fpcr_nep) {
+ write_fp_sreg(s, reg, v);
+ return;
+ }
+
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
+ vec_full_reg_offset(s, mergereg),
+ 16, vec_full_reg_size(s));
+ tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
+}
+
/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
GVecGen2Fn *gvec_fn, int vece)
@@ -714,10 +767,10 @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
* an out-of-line helper.
*/
static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
- int rm, bool is_fp16, int data,
+ int rm, ARMFPStatusFlavour fpsttype, int data,
gen_helper_gvec_3_ptr *fn)
{
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm), fpst,
@@ -736,14 +789,31 @@ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
}
/*
+ * Expand a 4-operand operation using an out-of-line helper that takes
+ * a pointer to the CPU env.
+ */
+static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, int ra, int data,
+ gen_helper_gvec_4_ptr *fn)
+{
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
+/*
* Expand a 4-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
*/
static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
- int rm, int ra, bool is_fp16, int data,
+ int rm, int ra, ARMFPStatusFlavour fpsttype,
+ int data,
gen_helper_gvec_4_ptr *fn)
{
- TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+ TCGv_ptr fpst = fpstatus_ptr(fpsttype);
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm),
@@ -751,6 +821,111 @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
+/*
+ * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
+ * These functions implement
+ * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
+ * which for float32 is
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
+ * and similarly for the other float sizes.
+ */
+static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
+{
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
+
+ gen_vfp_negh(chs_s, s);
+ gen_vfp_absh(abs_s, s);
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i32(0x7c00),
+ s, chs_s);
+}
+
+static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
+{
+ TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
+
+ gen_vfp_negs(chs_s, s);
+ gen_vfp_abss(abs_s, s);
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i32(0x7f800000UL),
+ s, chs_s);
+}
+
+static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
+{
+ TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
+
+ gen_vfp_negd(chs_s, s);
+ gen_vfp_absd(abs_s, s);
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
+ s, chs_s);
+}
+
+/*
+ * These functions implement
+ * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
+ * which for float32 is
+ * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
+ * and similarly for the other float sizes.
+ */
+static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
+{
+ TCGv_i32 abs_s = tcg_temp_new_i32();
+
+ gen_vfp_absh(abs_s, s);
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i32(0x7c00),
+ s, abs_s);
+}
+
+static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
+{
+ TCGv_i32 abs_s = tcg_temp_new_i32();
+
+ gen_vfp_abss(abs_s, s);
+ tcg_gen_movcond_i32(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i32(0x7f800000UL),
+ s, abs_s);
+}
+
+static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
+{
+ TCGv_i64 abs_s = tcg_temp_new_i64();
+
+ gen_vfp_absd(abs_s, s);
+ tcg_gen_movcond_i64(TCG_COND_GTU, d,
+ abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
+ s, abs_s);
+}
+
+static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
+{
+ if (dc->fpcr_ah) {
+ gen_vfp_ah_negh(d, s);
+ } else {
+ gen_vfp_negh(d, s);
+ }
+}
+
+static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
+{
+ if (dc->fpcr_ah) {
+ gen_vfp_ah_negs(d, s);
+ } else {
+ gen_vfp_negs(d, s);
+ }
+}
+
+static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
+{
+ if (dc->fpcr_ah) {
+ gen_vfp_ah_negd(d, s);
+ } else {
+ gen_vfp_negd(d, s);
+ }
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@@ -894,11 +1069,9 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
TCGv_i64 cf_64 = tcg_temp_new_i64();
TCGv_i64 vf_64 = tcg_temp_new_i64();
TCGv_i64 tmp = tcg_temp_new_i64();
- TCGv_i64 zero = tcg_constant_i64(0);
tcg_gen_extu_i32_i64(cf_64, cpu_CF);
- tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
- tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
+ tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64);
tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
gen_set_NZ64(result);
@@ -912,12 +1085,10 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
TCGv_i32 t0_32 = tcg_temp_new_i32();
TCGv_i32 t1_32 = tcg_temp_new_i32();
TCGv_i32 tmp = tcg_temp_new_i32();
- TCGv_i32 zero = tcg_constant_i32(0);
tcg_gen_extrl_i64_i32(t0_32, t0);
tcg_gen_extrl_i64_i32(t1_32, t1);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
+ tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF);
tcg_gen_mov_i32(cpu_ZF, cpu_NF);
tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
@@ -1199,14 +1370,14 @@ static bool fp_access_check_only(DisasContext *s)
{
if (s->fp_excp_el) {
assert(!s->fp_access_checked);
- s->fp_access_checked = true;
+ s->fp_access_checked = -1;
gen_exception_insn_el(s, 0, EXCP_UDEF,
syn_fp_access_trap(1, 0xe, false, 0),
s->fp_excp_el);
return false;
}
- s->fp_access_checked = true;
+ s->fp_access_checked = 1;
return true;
}
@@ -1224,6 +1395,49 @@ static bool fp_access_check(DisasContext *s)
}
/*
+ * Return <0 for non-supported element sizes, with MO_16 controlled by
+ * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
+ */
+static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
+{
+ switch (esz) {
+ case MO_64:
+ case MO_32:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ }
+ return fp_access_check(s);
+}
+
+/* Likewise, but vector MO_64 must have two elements. */
+static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
+{
+ switch (esz) {
+ case MO_64:
+ if (!is_q) {
+ return -1;
+ }
+ break;
+ case MO_32:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ }
+ return fp_access_check(s);
+}
+
+/*
* Check that SVE access is enabled. If it is, return true.
* If not, emit code to generate an appropriate exception and return false.
* This function corresponds to CheckSVEEnabled().
@@ -1231,23 +1445,23 @@ static bool fp_access_check(DisasContext *s)
bool sve_access_check(DisasContext *s)
{
if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
+ bool ret;
+
assert(dc_isar_feature(aa64_sme, s));
- if (!sme_sm_enabled_check(s)) {
- goto fail_exit;
- }
- } else if (s->sve_excp_el) {
+ ret = sme_sm_enabled_check(s);
+ s->sve_access_checked = (ret ? 1 : -1);
+ return ret;
+ }
+ if (s->sve_excp_el) {
+ /* Assert that we only raise one exception per instruction. */
+ assert(!s->sve_access_checked);
gen_exception_insn_el(s, 0, EXCP_UDEF,
syn_sve_access_trap(), s->sve_excp_el);
- goto fail_exit;
+ s->sve_access_checked = -1;
+ return false;
}
- s->sve_access_checked = true;
+ s->sve_access_checked = 1;
return fp_access_check(s);
-
- fail_exit:
- /* Assert that we only raise one exception per instruction. */
- assert(!s->sve_access_checked);
- s->sve_access_checked = true;
- return false;
}
/*
@@ -1275,8 +1489,9 @@ bool sme_enabled_check(DisasContext *s)
* sme_excp_el by itself for cpregs access checks.
*/
if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
- s->fp_access_checked = true;
- return sme_access_check(s);
+ bool ret = sme_access_check(s);
+ s->fp_access_checked = (ret ? 1 : -1);
+ return ret;
}
return fp_access_check_only(s);
}
@@ -1398,31 +1613,6 @@ static inline void gen_check_sp_alignment(DisasContext *s)
}
/*
- * This provides a simple table based table lookup decoder. It is
- * intended to be used when the relevant bits for decode are too
- * awkwardly placed and switch/if based logic would be confusing and
- * deeply nested. Since it's a linear search through the table, tables
- * should be kept small.
- *
- * It returns the first handler where insn & mask == pattern, or
- * NULL if there is no match.
- * The table is terminated by an empty mask (i.e. 0)
- */
-static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
- uint32_t insn)
-{
- const AArch64DecodeTable *tptr = table;
-
- while (tptr->mask) {
- if ((insn & tptr->mask) == tptr->pattern) {
- return tptr->disas_fn;
- }
- tptr++;
- }
- return NULL;
-}
-
-/*
* The instruction disassembly implemented here matches
* the instruction encoding classifications in chapter C4
* of the ARM Architecture Reference Manual (DDI0487B_a);
@@ -1507,7 +1697,14 @@ static void set_btype_for_br(DisasContext *s, int rn)
{
if (dc_isar_feature(aa64_bti, s)) {
/* BR to {x16,x17} or !guard -> 1, else 3. */
- set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
+ if (rn == 16 || rn == 17) {
+ set_btype(s, 1);
+ } else {
+ TCGv_i64 pc = tcg_temp_new_i64();
+ gen_pc_plus_diff(s, pc, 0);
+ gen_helper_guarded_page_br(tcg_env, pc);
+ s->btype = -1;
+ }
}
}
@@ -1521,8 +1718,8 @@ static void set_btype_for_blr(DisasContext *s)
static bool trans_BR(DisasContext *s, arg_r *a)
{
- gen_a64_set_pc(s, cpu_reg(s, a->rn));
set_btype_for_br(s, a->rn);
+ gen_a64_set_pc(s, cpu_reg(s, a->rn));
s->base.is_jmp = DISAS_JUMP;
return true;
}
@@ -1581,8 +1778,8 @@ static bool trans_BRAZ(DisasContext *s, arg_braz *a)
}
dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
- gen_a64_set_pc(s, dst);
set_btype_for_br(s, a->rn);
+ gen_a64_set_pc(s, dst);
s->base.is_jmp = DISAS_JUMP;
return true;
}
@@ -1613,6 +1810,10 @@ static bool trans_RETA(DisasContext *s, arg_reta *a)
{
TCGv_i64 dst;
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
+ }
+
dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
gen_a64_set_pc(s, dst);
s->base.is_jmp = DISAS_JUMP;
@@ -1936,6 +2137,15 @@ static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
return true;
}
+static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
+{
+ if (!dc_isar_feature(aa64_xs, s)) {
+ return false;
+ }
+ tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+ return true;
+}
+
static bool trans_ISB(DisasContext *s, arg_ISB *a)
{
/*
@@ -3543,7 +3753,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- mop = check_atomic_align(s, a->rn, a->sz);
+ mop = check_ordered_align(s, a->rn, 0, false, a->sz);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
a->rn != 31, mop);
/*
@@ -4657,6 +4867,88 @@ static bool trans_EXTR(DisasContext *s, arg_extract *a)
return true;
}
+static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
+{
+ if (fp_access_check(s)) {
+ int len = (a->len + 1) * 16;
+
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rm), tcg_env,
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ (len << 6) | (a->tbx << 5) | a->rn,
+ gen_helper_simd_tblx);
+ }
+ return true;
+}
+
+typedef int simd_permute_idx_fn(int i, int part, int elements);
+
+static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
+ simd_permute_idx_fn *fn, int part)
+{
+ MemOp esz = a->esz;
+ int datasize = a->q ? 16 : 8;
+ int elements = datasize >> esz;
+ TCGv_i64 tcg_res[2], tcg_ele;
+
+ if (esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
+ tcg_ele = tcg_temp_new_i64();
+
+ for (int i = 0; i < elements; i++) {
+ int o, w, idx;
+
+ idx = fn(i, part, elements);
+ read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
+ idx & (elements - 1), esz);
+
+ w = (i << (esz + 3)) / 64;
+ o = (i << (esz + 3)) % 64;
+ if (o == 0) {
+ tcg_gen_mov_i64(tcg_res[w], tcg_ele);
+ } else {
+ tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
+ }
+ }
+
+ for (int i = a->q; i >= 0; --i) {
+ write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
+ }
+ clear_vec_high(s, a->q, a->rd);
+ return true;
+}
+
+static int permute_load_uzp(int i, int part, int elements)
+{
+ return 2 * i + part;
+}
+
+TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
+TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
+
+static int permute_load_trn(int i, int part, int elements)
+{
+ return (i & 1) * elements + (i & ~1) + part;
+}
+
+TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
+TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
+
+static int permute_load_zip(int i, int part, int elements)
+{
+ return (i & 1) * elements + ((part * elements + i) >> 1);
+}
+
+TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
+TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
+
/*
* Cryptographic AES, SHA, SHA512
*/
@@ -4703,7 +4995,6 @@ static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_op3 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
- unsigned vsz, dofs;
read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
@@ -4715,9 +5006,7 @@ static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
/* Clear the whole register first, then store bits [127:96]. */
- vsz = vec_full_reg_size(s);
- dofs = vec_full_reg_offset(s, a->rd);
- tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
+ clear_vec(s, a->rd);
write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
}
return true;
@@ -4898,23 +5187,25 @@ typedef struct FPScalar {
void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
} FPScalar;
-static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
+static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *f, int mergereg,
+ ARMFPStatusFlavour fpsttype)
{
switch (a->esz) {
case MO_64:
if (fp_access_check(s)) {
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
TCGv_i64 t1 = read_fp_dreg(s, a->rm);
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
- write_fp_dreg(s, a->rd, t0);
+ f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
+ write_fp_dreg_merging(s, a->rd, mergereg, t0);
}
break;
case MO_32:
if (fp_access_check(s)) {
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
TCGv_i32 t1 = read_fp_sreg(s, a->rm);
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
- write_fp_sreg(s, a->rd, t0);
+ f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
+ write_fp_sreg_merging(s, a->rd, mergereg, t0);
}
break;
case MO_16:
@@ -4924,8 +5215,8 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
if (fp_access_check(s)) {
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
TCGv_i32 t1 = read_fp_hreg(s, a->rm);
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
- write_fp_sreg(s, a->rd, t0);
+ f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
+ write_fp_hreg_merging(s, a->rd, mergereg, t0);
}
break;
default:
@@ -4934,68 +5225,103 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
return true;
}
+static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
+ int mergereg)
+{
+ return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
+ a->esz == MO_16 ?
+ FPST_A64_F16 : FPST_A64);
+}
+
+static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *fnormal, const FPScalar *fah,
+ int mergereg)
+{
+ return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
+ mergereg, select_ah_fpst(s, a->esz));
+}
+
+/* Some insns need to call different helpers when FPCR.AH == 1 */
+static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *fnormal,
+ const FPScalar *fah,
+ int mergereg)
+{
+ return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
+}
+
static const FPScalar f_scalar_fadd = {
gen_helper_vfp_addh,
gen_helper_vfp_adds,
gen_helper_vfp_addd,
};
-TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
+TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
static const FPScalar f_scalar_fsub = {
gen_helper_vfp_subh,
gen_helper_vfp_subs,
gen_helper_vfp_subd,
};
-TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
+TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
static const FPScalar f_scalar_fdiv = {
gen_helper_vfp_divh,
gen_helper_vfp_divs,
gen_helper_vfp_divd,
};
-TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
+TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
static const FPScalar f_scalar_fmul = {
gen_helper_vfp_mulh,
gen_helper_vfp_muls,
gen_helper_vfp_muld,
};
-TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
+TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
static const FPScalar f_scalar_fmax = {
- gen_helper_advsimd_maxh,
+ gen_helper_vfp_maxh,
gen_helper_vfp_maxs,
gen_helper_vfp_maxd,
};
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
+static const FPScalar f_scalar_fmax_ah = {
+ gen_helper_vfp_ah_maxh,
+ gen_helper_vfp_ah_maxs,
+ gen_helper_vfp_ah_maxd,
+};
+TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
static const FPScalar f_scalar_fmin = {
- gen_helper_advsimd_minh,
+ gen_helper_vfp_minh,
gen_helper_vfp_mins,
gen_helper_vfp_mind,
};
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
+static const FPScalar f_scalar_fmin_ah = {
+ gen_helper_vfp_ah_minh,
+ gen_helper_vfp_ah_mins,
+ gen_helper_vfp_ah_mind,
+};
+TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
static const FPScalar f_scalar_fmaxnm = {
- gen_helper_advsimd_maxnumh,
+ gen_helper_vfp_maxnumh,
gen_helper_vfp_maxnums,
gen_helper_vfp_maxnumd,
};
-TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
+TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
static const FPScalar f_scalar_fminnm = {
- gen_helper_advsimd_minnumh,
+ gen_helper_vfp_minnumh,
gen_helper_vfp_minnums,
gen_helper_vfp_minnumd,
};
-TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
+TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
static const FPScalar f_scalar_fmulx = {
gen_helper_advsimd_mulxh,
gen_helper_vfp_mulxs,
gen_helper_vfp_mulxd,
};
-TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
+TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
{
@@ -5015,47 +5341,70 @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
gen_vfp_negd(d, d);
}
+static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_mulh(d, n, m, s);
+ gen_vfp_ah_negh(d, d);
+}
+
+static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_muls(d, n, m, s);
+ gen_vfp_ah_negs(d, d);
+}
+
+static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_muld(d, n, m, s);
+ gen_vfp_ah_negd(d, d);
+}
+
static const FPScalar f_scalar_fnmul = {
gen_fnmul_h,
gen_fnmul_s,
gen_fnmul_d,
};
-TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
+static const FPScalar f_scalar_ah_fnmul = {
+ gen_fnmul_ah_h,
+ gen_fnmul_ah_s,
+ gen_fnmul_ah_d,
+};
+TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
static const FPScalar f_scalar_fcmeq = {
gen_helper_advsimd_ceq_f16,
gen_helper_neon_ceq_f32,
gen_helper_neon_ceq_f64,
};
-TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
+TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
static const FPScalar f_scalar_fcmge = {
gen_helper_advsimd_cge_f16,
gen_helper_neon_cge_f32,
gen_helper_neon_cge_f64,
};
-TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
+TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
static const FPScalar f_scalar_fcmgt = {
gen_helper_advsimd_cgt_f16,
gen_helper_neon_cgt_f32,
gen_helper_neon_cgt_f64,
};
-TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
+TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
static const FPScalar f_scalar_facge = {
gen_helper_advsimd_acge_f16,
gen_helper_neon_acge_f32,
gen_helper_neon_acge_f64,
};
-TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
+TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
static const FPScalar f_scalar_facgt = {
gen_helper_advsimd_acgt_f16,
gen_helper_neon_acgt_f32,
gen_helper_neon_acgt_f64,
};
-TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
+TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
{
@@ -5075,26 +5424,116 @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
gen_vfp_absd(d, d);
}
+static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subh(d, n, m, s);
+ gen_vfp_ah_absh(d, d);
+}
+
+static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subs(d, n, m, s);
+ gen_vfp_ah_abss(d, d);
+}
+
+static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subd(d, n, m, s);
+ gen_vfp_ah_absd(d, d);
+}
+
static const FPScalar f_scalar_fabd = {
gen_fabd_h,
gen_fabd_s,
gen_fabd_d,
};
-TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
+static const FPScalar f_scalar_ah_fabd = {
+ gen_fabd_ah_h,
+ gen_fabd_ah_s,
+ gen_fabd_ah_d,
+};
+TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
static const FPScalar f_scalar_frecps = {
gen_helper_recpsf_f16,
gen_helper_recpsf_f32,
gen_helper_recpsf_f64,
};
-TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
+static const FPScalar f_scalar_ah_frecps = {
+ gen_helper_recpsf_ah_f16,
+ gen_helper_recpsf_ah_f32,
+ gen_helper_recpsf_ah_f64,
+};
+TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
static const FPScalar f_scalar_frsqrts = {
gen_helper_rsqrtsf_f16,
gen_helper_rsqrtsf_f32,
gen_helper_rsqrtsf_f64,
};
-TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
+static const FPScalar f_scalar_ah_frsqrts = {
+ gen_helper_rsqrtsf_ah_f16,
+ gen_helper_rsqrtsf_ah_f32,
+ gen_helper_rsqrtsf_ah_f64,
+};
+TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
+
+static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
+ const FPScalar *f, bool swap)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = read_fp_dreg(s, a->rn);
+ TCGv_i64 t1 = tcg_constant_i64(0);
+ if (swap) {
+ f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
+ } else {
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
+ }
+ write_fp_dreg(s, a->rd, t0);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_sreg(s, a->rn);
+ TCGv_i32 t1 = tcg_constant_i32(0);
+ if (swap) {
+ f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
+ } else {
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
+ }
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = read_fp_hreg(s, a->rn);
+ TCGv_i32 t1 = tcg_constant_i32(0);
+ if (swap) {
+ f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
+ } else {
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
+ }
+ write_fp_sreg(s, a->rd, t0);
+ }
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
+TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
+TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
+TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
+TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
MemOp sgn_n, MemOp sgn_m,
@@ -5235,6 +5674,43 @@ static const ENVScalar2 f_scalar_sqrdmulh = {
};
TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
+typedef struct ENVScalar3 {
+ NeonGenThreeOpEnvFn *gen_hs[2];
+} ENVScalar3;
+
+static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
+ const ENVScalar3 *f)
+{
+ TCGv_i32 t0, t1, t2;
+
+ if (a->esz != MO_16 && a->esz != MO_32) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ read_vec_element_i32(s, t0, a->rn, 0, a->esz);
+ read_vec_element_i32(s, t1, a->rm, 0, a->esz);
+ read_vec_element_i32(s, t2, a->rd, 0, a->esz);
+ f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
+ write_fp_sreg(s, a->rd, t0);
+ return true;
+}
+
+static const ENVScalar3 f_scalar_sqrdmlah = {
+ { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
+};
+TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
+
+static const ENVScalar3 f_scalar_sqrdmlsh = {
+ { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
+};
+TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
+
static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
{
if (fp_access_check(s)) {
@@ -5253,201 +5729,253 @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
-static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
- gen_helper_gvec_3_ptr * const fns[3])
+static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
+ int data,
+ gen_helper_gvec_3_ptr * const fns[3],
+ ARMFPStatusFlavour fpsttype)
{
MemOp esz = a->esz;
+ int check = fp_access_check_vector_hsd(s, a->q, esz);
- switch (esz) {
- case MO_64:
- if (!a->q) {
- return false;
- }
- break;
- case MO_32:
- break;
- case MO_16:
- if (!dc_isar_feature(aa64_fp16, s)) {
- return false;
- }
- break;
- default:
- return false;
- }
- if (fp_access_check(s)) {
- gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
- esz == MO_16, 0, fns[esz - 1]);
+ if (check <= 0) {
+ return check == 0;
}
+
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
+ data, fns[esz - 1]);
return true;
}
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
+ gen_helper_gvec_3_ptr * const fns[3])
+{
+ return do_fp3_vector_with_fpsttype(s, a, data, fns,
+ a->esz == MO_16 ?
+ FPST_A64_F16 : FPST_A64);
+}
+
+static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
+ gen_helper_gvec_3_ptr * const fnormal[3],
+ gen_helper_gvec_3_ptr * const fah[3])
+{
+ return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
+}
+
+static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
+ gen_helper_gvec_3_ptr * const fnormal[3],
+ gen_helper_gvec_3_ptr * const fah[3])
+{
+ return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
+ select_ah_fpst(s, a->esz));
+}
+
static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
gen_helper_gvec_fadd_h,
gen_helper_gvec_fadd_s,
gen_helper_gvec_fadd_d,
};
-TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd)
+TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
gen_helper_gvec_fsub_h,
gen_helper_gvec_fsub_s,
gen_helper_gvec_fsub_d,
};
-TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub)
+TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
gen_helper_gvec_fdiv_h,
gen_helper_gvec_fdiv_s,
gen_helper_gvec_fdiv_d,
};
-TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv)
+TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
gen_helper_gvec_fmul_h,
gen_helper_gvec_fmul_s,
gen_helper_gvec_fmul_d,
};
-TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul)
+TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
gen_helper_gvec_fmax_h,
gen_helper_gvec_fmax_s,
gen_helper_gvec_fmax_d,
};
-TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax)
+static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
+ gen_helper_gvec_ah_fmax_h,
+ gen_helper_gvec_ah_fmax_s,
+ gen_helper_gvec_ah_fmax_d,
+};
+TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
gen_helper_gvec_fmin_h,
gen_helper_gvec_fmin_s,
gen_helper_gvec_fmin_d,
};
-TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin)
+static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
+ gen_helper_gvec_ah_fmin_h,
+ gen_helper_gvec_ah_fmin_s,
+ gen_helper_gvec_ah_fmin_d,
+};
+TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
gen_helper_gvec_fmaxnum_h,
gen_helper_gvec_fmaxnum_s,
gen_helper_gvec_fmaxnum_d,
};
-TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm)
+TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
gen_helper_gvec_fminnum_h,
gen_helper_gvec_fminnum_s,
gen_helper_gvec_fminnum_d,
};
-TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm)
+TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
gen_helper_gvec_fmulx_h,
gen_helper_gvec_fmulx_s,
gen_helper_gvec_fmulx_d,
};
-TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx)
+TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
gen_helper_gvec_vfma_h,
gen_helper_gvec_vfma_s,
gen_helper_gvec_vfma_d,
};
-TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla)
+TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
gen_helper_gvec_vfms_h,
gen_helper_gvec_vfms_s,
gen_helper_gvec_vfms_d,
};
-TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls)
+static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
+ gen_helper_gvec_ah_vfms_h,
+ gen_helper_gvec_ah_vfms_s,
+ gen_helper_gvec_ah_vfms_d,
+};
+TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
gen_helper_gvec_fceq_h,
gen_helper_gvec_fceq_s,
gen_helper_gvec_fceq_d,
};
-TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq)
+TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
gen_helper_gvec_fcge_h,
gen_helper_gvec_fcge_s,
gen_helper_gvec_fcge_d,
};
-TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge)
+TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
gen_helper_gvec_fcgt_h,
gen_helper_gvec_fcgt_s,
gen_helper_gvec_fcgt_d,
};
-TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt)
+TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
gen_helper_gvec_facge_h,
gen_helper_gvec_facge_s,
gen_helper_gvec_facge_d,
};
-TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge)
+TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
gen_helper_gvec_facgt_h,
gen_helper_gvec_facgt_s,
gen_helper_gvec_facgt_d,
};
-TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
+TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
gen_helper_gvec_fabd_h,
gen_helper_gvec_fabd_s,
gen_helper_gvec_fabd_d,
};
-TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
+static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
+ gen_helper_gvec_ah_fabd_h,
+ gen_helper_gvec_ah_fabd_s,
+ gen_helper_gvec_ah_fabd_d,
+};
+TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
gen_helper_gvec_recps_h,
gen_helper_gvec_recps_s,
gen_helper_gvec_recps_d,
};
-TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
+static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
+ gen_helper_gvec_ah_recps_h,
+ gen_helper_gvec_ah_recps_s,
+ gen_helper_gvec_ah_recps_d,
+};
+TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
gen_helper_gvec_rsqrts_h,
gen_helper_gvec_rsqrts_s,
gen_helper_gvec_rsqrts_d,
};
-TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
+static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
+ gen_helper_gvec_ah_rsqrts_h,
+ gen_helper_gvec_ah_rsqrts_s,
+ gen_helper_gvec_ah_rsqrts_d,
+};
+TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
gen_helper_gvec_faddp_h,
gen_helper_gvec_faddp_s,
gen_helper_gvec_faddp_d,
};
-TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp)
+TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
gen_helper_gvec_fmaxp_h,
gen_helper_gvec_fmaxp_s,
gen_helper_gvec_fmaxp_d,
};
-TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp)
+static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
+ gen_helper_gvec_ah_fmaxp_h,
+ gen_helper_gvec_ah_fmaxp_s,
+ gen_helper_gvec_ah_fmaxp_d,
+};
+TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
gen_helper_gvec_fminp_h,
gen_helper_gvec_fminp_s,
gen_helper_gvec_fminp_d,
};
-TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp)
+static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
+ gen_helper_gvec_ah_fminp_h,
+ gen_helper_gvec_ah_fminp_s,
+ gen_helper_gvec_ah_fminp_d,
+};
+TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
gen_helper_gvec_fmaxnump_h,
gen_helper_gvec_fmaxnump_s,
gen_helper_gvec_fmaxnump_d,
};
-TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp)
+TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
gen_helper_gvec_fminnump_h,
gen_helper_gvec_fminnump_s,
gen_helper_gvec_fminnump_d,
};
-TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp)
+TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
{
@@ -5552,6 +6080,438 @@ TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
+TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
+TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
+
+static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_4 *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+ }
+ return true;
+}
+
+static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+ }
+ return true;
+}
+
+TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
+TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
+TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
+TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
+TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
+TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
+TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
+TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
+
+static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
+{
+ if (!dc_isar_feature(aa64_bf16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ /* Q bit selects BFMLALB vs BFMLALT. */
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
+ s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
+ gen_helper_gvec_bfmlal);
+ }
+ return true;
+}
+
+static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
+ gen_helper_gvec_fcaddh,
+ gen_helper_gvec_fcadds,
+ gen_helper_gvec_fcaddd,
+};
+/*
+ * Encode FPCR.AH into the data so the helper knows whether the
+ * negations it does should avoid flipping the sign bit on a NaN
+ */
+TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
+ f_vector_fcadd)
+TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
+ f_vector_fcadd)
+
+static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
+{
+ static gen_helper_gvec_4_ptr * const fn[] = {
+ [MO_16] = gen_helper_gvec_fcmlah,
+ [MO_32] = gen_helper_gvec_fcmlas,
+ [MO_64] = gen_helper_gvec_fcmlad,
+ };
+ int check;
+
+ if (!dc_isar_feature(aa64_fcma, s)) {
+ return false;
+ }
+
+ check = fp_access_check_vector_hsd(s, a->q, a->esz);
+ if (check <= 0) {
+ return check == 0;
+ }
+
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
+ a->rot | (s->fpcr_ah << 2), fn[a->esz]);
+ return true;
+}
+
+/*
+ * Widening vector x vector/indexed.
+ *
+ * These read from the top or bottom half of a 128-bit vector.
+ * After widening, optionally accumulate with a 128-bit vector.
+ * Implement these inline, as the number of elements are limited
+ * and the related SVE and SME operations on larger vectors use
+ * even/odd elements instead of top/bottom half.
+ *
+ * If idx >= 0, operand 2 is indexed, otherwise vector.
+ * If acc, operand 0 is loaded with rd.
+ */
+
+/* For low half, iterating up. */
+static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
+ int rd, int rn, int rm, int idx,
+ NeonGenTwo64OpFn *fn, bool acc)
+{
+ TCGv_i64 tcg_op0 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ MemOp esz = memop & MO_SIZE;
+ int half = 8 >> esz;
+ int top_swap, top_half;
+
+ /* There are no 64x64->128 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ if (idx >= 0) {
+ read_vec_element(s, tcg_op2, rm, idx, memop);
+ }
+
+ /*
+ * For top half inputs, iterate forward; backward for bottom half.
+ * This means the store to the destination will not occur until
+ * overlapping input inputs are consumed.
+ * Use top_swap to conditionally invert the forward iteration index.
+ */
+ top_swap = top ? 0 : half - 1;
+ top_half = top ? half : 0;
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
+ if (idx < 0) {
+ read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
+ }
+ if (acc) {
+ read_vec_element(s, tcg_op0, rd, elt, memop + 1);
+ }
+ fn(tcg_op0, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_op0, rd, elt, esz + 1);
+ }
+ clear_vec_high(s, 1, rd);
+ return true;
+}
+
+static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t, n, m);
+ tcg_gen_sub_i64(d, d, t);
+}
+
+TRANS(SMULL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_mul_i64, false)
+TRANS(UMULL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_mul_i64, false)
+TRANS(SMLAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_muladd_i64, true)
+TRANS(UMLAL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_muladd_i64, true)
+TRANS(SMLSL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_mulsub_i64, true)
+TRANS(UMLSL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_mulsub_i64, true)
+
+TRANS(SMULL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ tcg_gen_mul_i64, false)
+TRANS(UMULL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ tcg_gen_mul_i64, false)
+TRANS(SMLAL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_muladd_i64, true)
+TRANS(UMLAL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_muladd_i64, true)
+TRANS(SMLSL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_mulsub_i64, true)
+TRANS(UMLSL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_mulsub_i64, true)
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t1, n, m);
+ tcg_gen_sub_i64(t2, m, n);
+ tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t1, n, m);
+ tcg_gen_sub_i64(t2, m, n);
+ tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
+}
+
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+TRANS(SADDL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_add_i64, false)
+TRANS(UADDL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_add_i64, false)
+TRANS(SSUBL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_sub_i64, false)
+TRANS(USUBL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_sub_i64, false)
+TRANS(SABDL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_sabd_i64, false)
+TRANS(UABDL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_uabd_i64, false)
+TRANS(SABAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_saba_i64, true)
+TRANS(UABAL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_uaba_i64, true)
+
+static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_mul_i64(d, n, m);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
+}
+
+static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_mul_i64(d, n, m);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
+}
+
+static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
+ tcg_gen_neg_i64(t, t);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
+ tcg_gen_neg_i64(t, t);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
+}
+
+TRANS(SQDMULL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
+TRANS(SQDMULL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
+static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
+ MemOp sign, bool sub)
+{
+ TCGv_i64 tcg_op0, tcg_op1;
+ MemOp esz = a->esz;
+ int half = 8 >> esz;
+ bool top = a->q;
+ int top_swap = top ? 0 : half - 1;
+ int top_half = top ? half : 0;
+
+ /* There are no 64x64->128 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ tcg_op0 = tcg_temp_new_i64();
+ tcg_op1 = tcg_temp_new_i64();
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
+ read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+ if (sub) {
+ tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+ } else {
+ tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+ }
+ write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
+ }
+ clear_vec_high(s, 1, a->rd);
+ return true;
+}
+
+TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
+TRANS(UADDW, do_addsub_wide, a, 0, false)
+TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
+TRANS(USUBW, do_addsub_wide, a, 0, true)
+
+static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
+ bool sub, bool round)
+{
+ TCGv_i64 tcg_op0, tcg_op1;
+ MemOp esz = a->esz;
+ int half = 8 >> esz;
+ bool top = a->q;
+ int ebits = 8 << esz;
+ uint64_t rbit = 1ull << (ebits - 1);
+ int top_swap, top_half;
+
+ /* There are no 128x128->64 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ tcg_op0 = tcg_temp_new_i64();
+ tcg_op1 = tcg_temp_new_i64();
+
+ /*
+ * For top half inputs, iterate backward; forward for bottom half.
+ * This means the store to the destination will not occur until
+ * overlapping input inputs are consumed.
+ */
+ top_swap = top ? half - 1 : 0;
+ top_half = top ? half : 0;
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
+ read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+ if (sub) {
+ tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+ } else {
+ tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+ }
+ if (round) {
+ tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
+ }
+ tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
+ write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
+ }
+ clear_vec_high(s, top, a->rd);
+ return true;
+}
+
+TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
+TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
+TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
+TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
+
+static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
+{
+ if (fp_access_check(s)) {
+ /* The Q field specifies lo/hi half input for these insns. */
+ gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
+ }
+ return true;
+}
+
+TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
+TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
/*
* Advanced SIMD scalar/vector x indexed element
@@ -5566,8 +6526,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i64 t1 = tcg_temp_new_i64();
read_vec_element(s, t1, a->rm, a->idx, MO_64);
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
- write_fp_dreg(s, a->rd, t0);
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
+ write_fp_dreg_merging(s, a->rd, a->rn, t0);
}
break;
case MO_32:
@@ -5576,8 +6536,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i32 t1 = tcg_temp_new_i32();
read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
- write_fp_sreg(s, a->rd, t0);
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
+ write_fp_sreg_merging(s, a->rd, a->rn, t0);
}
break;
case MO_16:
@@ -5589,8 +6549,8 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i32 t1 = tcg_temp_new_i32();
read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
- write_fp_sreg(s, a->rd, t0);
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
+ write_fp_hreg_merging(s, a->rd, a->rn, t0);
}
break;
default:
@@ -5613,10 +6573,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element(s, t2, a->rm, a->idx, MO_64);
if (neg) {
- gen_vfp_negd(t1, t1);
+ gen_vfp_maybe_ah_negd(s, t1, t1);
}
- gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
- write_fp_dreg(s, a->rd, t0);
+ gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
+ write_fp_dreg_merging(s, a->rd, a->rd, t0);
}
break;
case MO_32:
@@ -5627,10 +6587,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
if (neg) {
- gen_vfp_negs(t1, t1);
+ gen_vfp_maybe_ah_negs(s, t1, t1);
}
- gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
- write_fp_sreg(s, a->rd, t0);
+ gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
+ write_fp_sreg_merging(s, a->rd, a->rd, t0);
}
break;
case MO_16:
@@ -5644,11 +6604,11 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
if (neg) {
- gen_vfp_negh(t1, t1);
+ gen_vfp_maybe_ah_negh(s, t1, t1);
}
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
- fpstatus_ptr(FPST_FPCR_F16));
- write_fp_sreg(s, a->rd, t0);
+ fpstatus_ptr(FPST_A64_F16));
+ write_fp_hreg_merging(s, a->rd, a->rd, t0);
}
break;
default:
@@ -5681,31 +6641,71 @@ static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
+static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
+ const ENVScalar3 *f)
+{
+ if (a->esz < MO_16 || a->esz > MO_32) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t0, a->rn, 0, a->esz);
+ read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
+ read_vec_element_i32(s, t2, a->rd, 0, a->esz);
+ f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
+ write_fp_sreg(s, a->rd, t0);
+ }
+ return true;
+}
+
+TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
+TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
+
+static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
+ NeonGenTwo64OpFn *fn, bool acc)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ if (acc) {
+ read_vec_element(s, t0, a->rd, 0, a->esz + 1);
+ }
+ read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
+ read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
+ fn(t0, t1, t2);
+
+ /* Clear the whole register first, then store scalar. */
+ clear_vec(s, a->rd);
+ write_vec_element(s, t0, a->rd, 0, a->esz + 1);
+ }
+ return true;
+}
+
+TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
MemOp esz = a->esz;
+ int check = fp_access_check_vector_hsd(s, a->q, esz);
- switch (esz) {
- case MO_64:
- if (!a->q) {
- return false;
- }
- break;
- case MO_32:
- break;
- case MO_16:
- if (!dc_isar_feature(aa64_fp16, s)) {
- return false;
- }
- break;
- default:
- g_assert_not_reached();
- }
- if (fp_access_check(s)) {
- gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
- esz == MO_16, a->idx, fns[esz - 1]);
+ if (check <= 0) {
+ return check == 0;
}
+
+ gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
+ a->idx, fns[esz - 1]);
return true;
}
@@ -5725,34 +6725,27 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
{
- static gen_helper_gvec_4_ptr * const fns[3] = {
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
+ static gen_helper_gvec_4_ptr * const fns[3][3] = {
+ { gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d },
+ { gen_helper_gvec_fmls_idx_h,
+ gen_helper_gvec_fmls_idx_s,
+ gen_helper_gvec_fmls_idx_d },
+ { gen_helper_gvec_ah_fmls_idx_h,
+ gen_helper_gvec_ah_fmls_idx_s,
+ gen_helper_gvec_ah_fmls_idx_d },
};
MemOp esz = a->esz;
+ int check = fp_access_check_vector_hsd(s, a->q, esz);
- switch (esz) {
- case MO_64:
- if (!a->q) {
- return false;
- }
- break;
- case MO_32:
- break;
- case MO_16:
- if (!dc_isar_feature(aa64_fp16, s)) {
- return false;
- }
- break;
- default:
- g_assert_not_reached();
- }
- if (fp_access_check(s)) {
- gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- esz == MO_16, (a->idx << 1) | neg,
- fns[esz - 1]);
+ if (check <= 0) {
+ return check == 0;
}
+
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
+ a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
return true;
}
@@ -5838,6 +6831,90 @@ static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
};
TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
+static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
+ gen_helper_neon_sqrdmlah_idx_h,
+ gen_helper_neon_sqrdmlah_idx_s,
+};
+TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
+ f_vector_idx_sqrdmlah)
+
+static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
+ gen_helper_neon_sqrdmlsh_idx_h,
+ gen_helper_neon_sqrdmlsh_idx_s,
+};
+TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
+ f_vector_idx_sqrdmlsh)
+
+static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
+ gen_helper_gvec_4 *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
+ }
+ return true;
+}
+
+static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
+ }
+ return true;
+}
+
+TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
+TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
+TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
+ gen_helper_gvec_sudot_idx_b)
+TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
+ gen_helper_gvec_usdot_idx_b)
+TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
+ gen_helper_gvec_bfdot_idx)
+
+static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
+{
+ if (!dc_isar_feature(aa64_bf16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ /* Q bit selects BFMLALB vs BFMLALT. */
+ gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
+ s->fpcr_ah ? FPST_AH : FPST_A64,
+ (a->idx << 1) | a->q,
+ gen_helper_gvec_bfmlal_idx);
+ }
+ return true;
+}
+
+static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
+{
+ gen_helper_gvec_4_ptr *fn;
+
+ if (!dc_isar_feature(aa64_fcma, s)) {
+ return false;
+ }
+ switch (a->esz) {
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ fn = gen_helper_gvec_fcmlah_idx;
+ break;
+ case MO_32:
+ fn = gen_helper_gvec_fcmlas_idx;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
+ (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
+ }
+ return true;
+}
+
/*
* Advanced SIMD scalar pairwise
*/
@@ -5852,7 +6929,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element(s, t0, a->rn, 0, MO_64);
read_vec_element(s, t1, a->rn, 1, MO_64);
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_dreg(s, a->rd, t0);
}
break;
@@ -5863,7 +6940,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element_i32(s, t0, a->rn, 0, MO_32);
read_vec_element_i32(s, t1, a->rn, 1, MO_32);
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_sreg(s, a->rd, t0);
}
break;
@@ -5877,7 +6954,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element_i32(s, t0, a->rn, 0, MO_16);
read_vec_element_i32(s, t1, a->rn, 1, MO_16);
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
write_fp_sreg(s, a->rd, t0);
}
break;
@@ -5887,9 +6964,16 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
return true;
}
+static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
+ const FPScalar *fnormal,
+ const FPScalar *fah)
+{
+ return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
+}
+
TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
-TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
-TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
+TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
+TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
@@ -5915,22 +6999,10 @@ static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
{
TCGv_i64 t_true, t_false;
DisasCompare64 c;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- switch (a->esz) {
- case MO_32:
- case MO_64:
- break;
- case MO_16:
- if (!dc_isar_feature(aa64_fp16, s)) {
- return false;
- }
- break;
- default:
- return false;
- }
-
- if (!fp_access_check(s)) {
- return true;
+ if (check <= 0) {
+ return check == 0;
}
/* Zero extend sreg & hreg inputs to 64 bits now. */
@@ -5952,6 +7024,54 @@ static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
}
/*
+ * Advanced SIMD Extract
+ */
+
+static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 lo = read_fp_dreg(s, a->rn);
+ if (a->imm != 0) {
+ TCGv_i64 hi = read_fp_dreg(s, a->rm);
+ tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
+ }
+ write_fp_dreg(s, a->rd, lo);
+ }
+ return true;
+}
+
+static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
+{
+ TCGv_i64 lo, hi;
+ int pos = (a->imm & 7) * 8;
+ int elt = a->imm >> 3;
+
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ lo = tcg_temp_new_i64();
+ hi = tcg_temp_new_i64();
+
+ read_vec_element(s, lo, a->rn, elt, MO_64);
+ elt++;
+ read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
+ elt++;
+
+ if (pos != 0) {
+ TCGv_i64 hh = tcg_temp_new_i64();
+ tcg_gen_extract2_i64(lo, lo, hi, pos);
+ read_vec_element(s, hh, a->rm, elt & 1, MO_64);
+ tcg_gen_extract2_i64(hi, hi, hh, pos);
+ }
+
+ write_vec_element(s, lo, a->rd, 0, MO_64);
+ write_vec_element(s, hi, a->rd, 1, MO_64);
+ clear_vec_high(s, true, a->rd);
+ return true;
+}
+
+/*
* Floating-point data-processing (3 source)
*/
@@ -5972,14 +7092,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i64 ta = read_fp_dreg(s, a->ra);
if (neg_a) {
- gen_vfp_negd(ta, ta);
+ gen_vfp_maybe_ah_negd(s, ta, ta);
}
if (neg_n) {
- gen_vfp_negd(tn, tn);
+ gen_vfp_maybe_ah_negd(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
- write_fp_dreg(s, a->rd, ta);
+ write_fp_dreg_merging(s, a->rd, a->ra, ta);
}
break;
@@ -5990,14 +7110,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i32 ta = read_fp_sreg(s, a->ra);
if (neg_a) {
- gen_vfp_negs(ta, ta);
+ gen_vfp_maybe_ah_negs(s, ta, ta);
}
if (neg_n) {
- gen_vfp_negs(tn, tn);
+ gen_vfp_maybe_ah_negs(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
- write_fp_sreg(s, a->rd, ta);
+ write_fp_sreg_merging(s, a->rd, a->ra, ta);
}
break;
@@ -6011,14 +7131,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i32 ta = read_fp_hreg(s, a->ra);
if (neg_a) {
- gen_vfp_negh(ta, ta);
+ gen_vfp_maybe_ah_negh(s, ta, ta);
}
if (neg_n) {
- gen_vfp_negh(tn, tn);
+ gen_vfp_maybe_ah_negh(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A64_F16);
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
- write_fp_sreg(s, a->rd, ta);
+ write_fp_hreg_merging(s, a->rd, a->ra, ta);
}
break;
@@ -6033,6543 +7153,2879 @@ TRANS(FNMADD, do_fmadd, a, true, true)
TRANS(FMSUB, do_fmadd, a, false, true)
TRANS(FNMSUB, do_fmadd, a, true, false)
-/* Shift a TCGv src by TCGv shift_amount, put result in dst.
- * Note that it is the caller's responsibility to ensure that the
- * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
- * mandated semantics for out of range shifts.
- */
-static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
- enum a64_shift_type shift_type, TCGv_i64 shift_amount)
-{
- switch (shift_type) {
- case A64_SHIFT_TYPE_LSL:
- tcg_gen_shl_i64(dst, src, shift_amount);
- break;
- case A64_SHIFT_TYPE_LSR:
- tcg_gen_shr_i64(dst, src, shift_amount);
- break;
- case A64_SHIFT_TYPE_ASR:
- if (!sf) {
- tcg_gen_ext32s_i64(dst, src);
- }
- tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
- break;
- case A64_SHIFT_TYPE_ROR:
- if (sf) {
- tcg_gen_rotr_i64(dst, src, shift_amount);
- } else {
- TCGv_i32 t0, t1;
- t0 = tcg_temp_new_i32();
- t1 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t0, src);
- tcg_gen_extrl_i64_i32(t1, shift_amount);
- tcg_gen_rotr_i32(t0, t0, t1);
- tcg_gen_extu_i32_i64(dst, t0);
- }
- break;
- default:
- assert(FALSE); /* all shift types should be handled */
- break;
- }
-
- if (!sf) { /* zero extend final result */
- tcg_gen_ext32u_i64(dst, dst);
- }
-}
-
-/* Shift a TCGv src by immediate, put result in dst.
- * The shift amount must be in range (this should always be true as the
- * relevant instructions will UNDEF on bad shift immediates).
+/*
+ * Advanced SIMD Across Lanes
*/
-static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
- enum a64_shift_type shift_type, unsigned int shift_i)
-{
- assert(shift_i < (sf ? 64 : 32));
- if (shift_i == 0) {
- tcg_gen_mov_i64(dst, src);
- } else {
- shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
- }
-}
-
-/* Logical (shifted register)
- * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
- * +----+-----+-----------+-------+---+------+--------+------+------+
- * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
- * +----+-----+-----------+-------+---+------+--------+------+------+
- */
-static void disas_logic_reg(DisasContext *s, uint32_t insn)
+static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
+ MemOp src_sign, NeonGenTwo64OpFn *fn)
{
- TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
- unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
-
- sf = extract32(insn, 31, 1);
- opc = extract32(insn, 29, 2);
- shift_type = extract32(insn, 22, 2);
- invert = extract32(insn, 21, 1);
- rm = extract32(insn, 16, 5);
- shift_amount = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
-
- if (!sf && (shift_amount & (1 << 5))) {
- unallocated_encoding(s);
- return;
- }
-
- tcg_rd = cpu_reg(s, rd);
+ TCGv_i64 tcg_res, tcg_elt;
+ MemOp src_mop = a->esz | src_sign;
+ int elements = (a->q ? 16 : 8) >> a->esz;
- if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
- /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
- * register-register MOV and MVN, so it is worth special casing.
- */
- tcg_rm = cpu_reg(s, rm);
- if (invert) {
- tcg_gen_not_i64(tcg_rd, tcg_rm);
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
- }
- } else {
- if (sf) {
- tcg_gen_mov_i64(tcg_rd, tcg_rm);
- } else {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
- }
- }
- return;
+ /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
+ if (elements < 4) {
+ return false;
}
-
- tcg_rm = read_cpu_reg(s, rm, sf);
-
- if (shift_amount) {
- shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
+ if (!fp_access_check(s)) {
+ return true;
}
- tcg_rn = cpu_reg(s, rn);
-
- switch (opc | (invert << 2)) {
- case 0: /* AND */
- case 3: /* ANDS */
- tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- case 1: /* ORR */
- tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- case 2: /* EOR */
- tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- case 4: /* BIC */
- case 7: /* BICS */
- tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- case 5: /* ORN */
- tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- case 6: /* EON */
- tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
- break;
- default:
- assert(FALSE);
- break;
- }
+ tcg_res = tcg_temp_new_i64();
+ tcg_elt = tcg_temp_new_i64();
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ read_vec_element(s, tcg_res, a->rn, 0, src_mop);
+ for (int i = 1; i < elements; i++) {
+ read_vec_element(s, tcg_elt, a->rn, i, src_mop);
+ fn(tcg_res, tcg_res, tcg_elt);
}
- if (opc == 3) {
- gen_logic_CC(sf, tcg_rd);
- }
+ tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
+ write_fp_dreg(s, a->rd, tcg_res);
+ return true;
}
+TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
+TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
+TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
+TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
+TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
+TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
+TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
+
/*
- * Add/subtract (extended register)
- *
- * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
- * +--+--+--+-----------+-----+--+-------+------+------+----+----+
- * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
- * +--+--+--+-----------+-----+--+-------+------+------+----+----+
+ * do_fp_reduction helper
*
- * sf: 0 -> 32bit, 1 -> 64bit
- * op: 0 -> add , 1 -> sub
- * S: 1 -> set flags
- * opt: 00
- * option: extension type (see DecodeRegExtend)
- * imm3: optional shift to Rm
+ * This mirrors the Reduce() pseudocode in the ARM ARM. It is
+ * important for correct NaN propagation that we do these
+ * operations in exactly the order specified by the pseudocode.
*
- * Rd = Rn + LSL(extend(Rm), amount)
+ * This is a recursive function.
*/
-static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int imm3 = extract32(insn, 10, 3);
- int option = extract32(insn, 13, 3);
- int rm = extract32(insn, 16, 5);
- int opt = extract32(insn, 22, 2);
- bool setflags = extract32(insn, 29, 1);
- bool sub_op = extract32(insn, 30, 1);
- bool sf = extract32(insn, 31, 1);
-
- TCGv_i64 tcg_rm, tcg_rn; /* temps */
- TCGv_i64 tcg_rd;
- TCGv_i64 tcg_result;
-
- if (imm3 > 4 || opt != 0) {
- unallocated_encoding(s);
- return;
- }
-
- /* non-flag setting ops may use SP */
- if (!setflags) {
- tcg_rd = cpu_reg_sp(s, rd);
+static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
+ int ebase, int ecount, TCGv_ptr fpst,
+ NeonGenTwoSingleOpFn *fn)
+{
+ if (ecount == 1) {
+ TCGv_i32 tcg_elem = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
+ return tcg_elem;
} else {
- tcg_rd = cpu_reg(s, rd);
- }
- tcg_rn = read_cpu_reg_sp(s, rn, sf);
-
- tcg_rm = read_cpu_reg(s, rm, sf);
- ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
+ int half = ecount >> 1;
+ TCGv_i32 tcg_hi, tcg_lo, tcg_res;
- tcg_result = tcg_temp_new_i64();
+ tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
+ tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
+ tcg_res = tcg_temp_new_i32();
- if (!setflags) {
- if (sub_op) {
- tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
- } else {
- tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
- }
- } else {
- if (sub_op) {
- gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
- } else {
- gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
- }
+ fn(tcg_res, tcg_lo, tcg_hi, fpst);
+ return tcg_res;
}
+}
- if (sf) {
- tcg_gen_mov_i64(tcg_rd, tcg_result);
- } else {
- tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
+ NeonGenTwoSingleOpFn *fnormal,
+ NeonGenTwoSingleOpFn *fah)
+{
+ if (fp_access_check(s)) {
+ MemOp esz = a->esz;
+ int elts = (a->q ? 16 : 8) >> esz;
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+ TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
+ s->fpcr_ah ? fah : fnormal);
+ write_fp_sreg(s, a->rd, res);
}
+ return true;
}
-/*
- * Add/subtract (shifted register)
- *
- * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
- * +--+--+--+-----------+-----+--+-------+---------+------+------+
- * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
- * +--+--+--+-----------+-----+--+-------+---------+------+------+
- *
- * sf: 0 -> 32bit, 1 -> 64bit
- * op: 0 -> add , 1 -> sub
- * S: 1 -> set flags
- * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
- * imm6: Shift amount to apply to Rm before the add/sub
- */
-static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int imm6 = extract32(insn, 10, 6);
- int rm = extract32(insn, 16, 5);
- int shift_type = extract32(insn, 22, 2);
- bool setflags = extract32(insn, 29, 1);
- bool sub_op = extract32(insn, 30, 1);
- bool sf = extract32(insn, 31, 1);
-
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_rn, tcg_rm;
- TCGv_i64 tcg_result;
-
- if ((shift_type == 3) || (!sf && (imm6 > 31))) {
- unallocated_encoding(s);
- return;
- }
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
+ gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
+ gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
+ gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
+ gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
- tcg_rn = read_cpu_reg(s, rn, sf);
- tcg_rm = read_cpu_reg(s, rm, sf);
+TRANS(FMAXNMV_s, do_fp_reduction, a,
+ gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
+TRANS(FMINNMV_s, do_fp_reduction, a,
+ gen_helper_vfp_minnums, gen_helper_vfp_minnums)
+TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
+TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
- shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
+/*
+ * Floating-point Immediate
+ */
- tcg_result = tcg_temp_new_i64();
+static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
+{
+ int check = fp_access_check_scalar_hsd(s, a->esz);
+ uint64_t imm;
- if (!setflags) {
- if (sub_op) {
- tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
- } else {
- tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
- }
- } else {
- if (sub_op) {
- gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
- } else {
- gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
- }
+ if (check <= 0) {
+ return check == 0;
}
- if (sf) {
- tcg_gen_mov_i64(tcg_rd, tcg_result);
- } else {
- tcg_gen_ext32u_i64(tcg_rd, tcg_result);
- }
+ imm = vfp_expand_imm(a->esz, a->imm);
+ write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
+ return true;
}
-/* Data-processing (3 source)
- *
- * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
- * +--+------+-----------+------+------+----+------+------+------+
- * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
- * +--+------+-----------+------+------+----+------+------+------+
+/*
+ * Floating point compare, conditional compare
*/
-static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int ra = extract32(insn, 10, 5);
- int rm = extract32(insn, 16, 5);
- int op_id = (extract32(insn, 29, 3) << 4) |
- (extract32(insn, 21, 3) << 1) |
- extract32(insn, 15, 1);
- bool sf = extract32(insn, 31, 1);
- bool is_sub = extract32(op_id, 0, 1);
- bool is_high = extract32(op_id, 2, 1);
- bool is_signed = false;
- TCGv_i64 tcg_op1;
- TCGv_i64 tcg_op2;
- TCGv_i64 tcg_tmp;
-
- /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
- switch (op_id) {
- case 0x42: /* SMADDL */
- case 0x43: /* SMSUBL */
- case 0x44: /* SMULH */
- is_signed = true;
- break;
- case 0x0: /* MADD (32bit) */
- case 0x1: /* MSUB (32bit) */
- case 0x40: /* MADD (64bit) */
- case 0x41: /* MSUB (64bit) */
- case 0x4a: /* UMADDL */
- case 0x4b: /* UMSUBL */
- case 0x4c: /* UMULH */
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- if (is_high) {
- TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_rn = cpu_reg(s, rn);
- TCGv_i64 tcg_rm = cpu_reg(s, rm);
+static void handle_fp_compare(DisasContext *s, int size,
+ unsigned int rn, unsigned int rm,
+ bool cmp_with_zero, bool signal_all_nans)
+{
+ TCGv_i64 tcg_flags = tcg_temp_new_i64();
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
- if (is_signed) {
- tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
+ if (size == MO_64) {
+ TCGv_i64 tcg_vn, tcg_vm;
+
+ tcg_vn = read_fp_dreg(s, rn);
+ if (cmp_with_zero) {
+ tcg_vm = tcg_constant_i64(0);
} else {
- tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
+ tcg_vm = read_fp_dreg(s, rm);
}
- return;
- }
-
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_tmp = tcg_temp_new_i64();
-
- if (op_id < 0x42) {
- tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
- tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
- } else {
- if (is_signed) {
- tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
- tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
+ if (signal_all_nans) {
+ gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
} else {
- tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
- tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
+ gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
}
- }
-
- if (ra == 31 && !is_sub) {
- /* Special-case MADD with rA == XZR; it is the standard MUL alias */
- tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
} else {
- tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
- if (is_sub) {
- tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
+ TCGv_i32 tcg_vn = tcg_temp_new_i32();
+ TCGv_i32 tcg_vm = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_vn, rn, 0, size);
+ if (cmp_with_zero) {
+ tcg_gen_movi_i32(tcg_vm, 0);
} else {
- tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
+ read_vec_element_i32(s, tcg_vm, rm, 0, size);
}
- }
- if (!sf) {
- tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
+ switch (size) {
+ case MO_32:
+ if (signal_all_nans) {
+ gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ } else {
+ gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ }
+ break;
+ case MO_16:
+ if (signal_all_nans) {
+ gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ } else {
+ gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-}
-/* Add/subtract (with carry)
- * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
- * +--+--+--+------------------------+------+-------------+------+-----+
- * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
- * +--+--+--+------------------------+------+-------------+------+-----+
- */
+ gen_set_nzcv(tcg_flags);
+}
-static void disas_adc_sbc(DisasContext *s, uint32_t insn)
+/* FCMP, FCMPE */
+static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
{
- unsigned int sf, op, setflags, rm, rn, rd;
- TCGv_i64 tcg_y, tcg_rn, tcg_rd;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- sf = extract32(insn, 31, 1);
- op = extract32(insn, 30, 1);
- setflags = extract32(insn, 29, 1);
- rm = extract32(insn, 16, 5);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
-
- tcg_rd = cpu_reg(s, rd);
- tcg_rn = cpu_reg(s, rn);
-
- if (op) {
- tcg_y = tcg_temp_new_i64();
- tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
- } else {
- tcg_y = cpu_reg(s, rm);
+ if (check <= 0) {
+ return check == 0;
}
- if (setflags) {
- gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
- } else {
- gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
- }
+ handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
+ return true;
}
-/*
- * Rotate right into flags
- * 31 30 29 21 15 10 5 4 0
- * +--+--+--+-----------------+--------+-----------+------+--+------+
- * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
- * +--+--+--+-----------------+--------+-----------+------+--+------+
- */
-static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
+/* FCCMP, FCCMPE */
+static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
{
- int mask = extract32(insn, 0, 4);
- int o2 = extract32(insn, 4, 1);
- int rn = extract32(insn, 5, 5);
- int imm6 = extract32(insn, 15, 6);
- int sf_op_s = extract32(insn, 29, 3);
- TCGv_i64 tcg_rn;
- TCGv_i32 nzcv;
+ TCGLabel *label_continue = NULL;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
- unallocated_encoding(s);
- return;
+ if (check <= 0) {
+ return check == 0;
}
- tcg_rn = read_cpu_reg(s, rn, 1);
- tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
+ if (a->cond < 0x0e) { /* not always */
+ TCGLabel *label_match = gen_new_label();
+ label_continue = gen_new_label();
+ arm_gen_test_cc(a->cond, label_match);
+ /* nomatch: */
+ gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
+ tcg_gen_br(label_continue);
+ gen_set_label(label_match);
+ }
- nzcv = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
+ handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
- if (mask & 8) { /* N */
- tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
- }
- if (mask & 4) { /* Z */
- tcg_gen_not_i32(cpu_ZF, nzcv);
- tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
- }
- if (mask & 2) { /* C */
- tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
- }
- if (mask & 1) { /* V */
- tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
+ if (label_continue) {
+ gen_set_label(label_continue);
}
+ return true;
}
/*
- * Evaluate into flags
- * 31 30 29 21 15 14 10 5 4 0
- * +--+--+--+-----------------+---------+----+---------+------+--+------+
- * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
- * +--+--+--+-----------------+---------+----+---------+------+--+------+
+ * Advanced SIMD Modified Immediate
*/
-static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
-{
- int o3_mask = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int o2 = extract32(insn, 15, 6);
- int sz = extract32(insn, 14, 1);
- int sf_op_s = extract32(insn, 29, 3);
- TCGv_i32 tmp;
- int shift;
- if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
- !dc_isar_feature(aa64_condm_4, s)) {
- unallocated_encoding(s);
- return;
+static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
+{
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
}
- shift = sz ? 16 : 24; /* SETF16 or SETF8 */
+ if (fp_access_check(s)) {
+ tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ vfp_expand_imm(MO_16, a->abcdefgh));
+ }
+ return true;
+}
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
- tcg_gen_shli_i32(cpu_NF, tmp, shift);
- tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
+static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
}
-/* Conditional compare (immediate / register)
- * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
- * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
- * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
- * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
- * [1] y [0] [0]
- */
-static void disas_cc(DisasContext *s, uint32_t insn)
+static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
{
- unsigned int sf, op, y, cond, rn, nzcv, is_imm;
- TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
- TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
- DisasCompare c;
+ GVecGen2iFn *fn;
- if (!extract32(insn, 29, 1)) {
- unallocated_encoding(s);
- return;
+ /* Handle decode of cmode/op here between ORR/BIC/MOVI */
+ if ((a->cmode & 1) && a->cmode < 12) {
+ /* For op=1, the imm will be inverted, so BIC becomes AND. */
+ fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+ } else {
+ /* There is one unallocated cmode/op combination in this space */
+ if (a->cmode == 15 && a->op == 1 && a->q == 0) {
+ return false;
+ }
+ fn = gen_movi;
}
- if (insn & (1 << 10 | 1 << 4)) {
- unallocated_encoding(s);
- return;
+
+ if (fp_access_check(s)) {
+ uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
+ gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
}
- sf = extract32(insn, 31, 1);
- op = extract32(insn, 30, 1);
- is_imm = extract32(insn, 11, 1);
- y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
- cond = extract32(insn, 12, 4);
- rn = extract32(insn, 5, 5);
- nzcv = extract32(insn, 0, 4);
+ return true;
+}
- /* Set T0 = !COND. */
- tcg_t0 = tcg_temp_new_i32();
- arm_test_cc(&c, cond);
- tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
+/*
+ * Advanced SIMD Shift by Immediate
+ */
- /* Load the arguments for the new comparison. */
- if (is_imm) {
- tcg_y = tcg_temp_new_i64();
- tcg_gen_movi_i64(tcg_y, y);
- } else {
- tcg_y = cpu_reg(s, y);
+static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
}
- tcg_rn = cpu_reg(s, rn);
+ return true;
+}
- /* Set the flags for the new comparison. */
- tcg_tmp = tcg_temp_new_i64();
- if (op) {
- gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
- } else {
- gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
+TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
+TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
+TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
+TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
+TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
+TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
+TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
+TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
+TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
+TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
+TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
+TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
+TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
+TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
+
+static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
+{
+ TCGv_i64 tcg_rn, tcg_rd;
+ int esz = a->esz;
+ int esize;
+
+ if (!fp_access_check(s)) {
+ return true;
}
- /* If COND was false, force the flags to #nzcv. Compute two masks
- * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
- * For tcg hosts that support ANDC, we can make do with just T1.
- * In either case, allow the tcg optimizer to delete any unused mask.
+ /*
+ * For the LL variants the store is larger than the load,
+ * so if rd == rn we would overwrite parts of our input.
+ * So load everything right now and use shifts in the main loop.
*/
- tcg_t1 = tcg_temp_new_i32();
- tcg_t2 = tcg_temp_new_i32();
- tcg_gen_neg_i32(tcg_t1, tcg_t0);
- tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
+ tcg_rd = tcg_temp_new_i64();
+ tcg_rn = tcg_temp_new_i64();
+ read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
- if (nzcv & 8) { /* N */
- tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
- } else {
- if (TCG_TARGET_HAS_andc_i32) {
- tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
- } else {
- tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
- }
- }
- if (nzcv & 4) { /* Z */
- if (TCG_TARGET_HAS_andc_i32) {
- tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
- } else {
- tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
- }
- } else {
- tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
- }
- if (nzcv & 2) { /* C */
- tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
- } else {
- if (TCG_TARGET_HAS_andc_i32) {
- tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
- } else {
- tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
- }
- }
- if (nzcv & 1) { /* V */
- tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
- } else {
- if (TCG_TARGET_HAS_andc_i32) {
- tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
+ esize = 8 << esz;
+ for (int i = 0, elements = 8 >> esz; i < elements; i++) {
+ if (is_u) {
+ tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
} else {
- tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
+ tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
}
+ tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
+ write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
}
+ clear_vec_high(s, true, a->rd);
+ return true;
}
-/* Conditional select
- * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
- * +----+----+---+-----------------+------+------+-----+------+------+
- * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
- * +----+----+---+-----------------+------+------+-----+------+------+
- */
-static void disas_cond_select(DisasContext *s, uint32_t insn)
+TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
+TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
+
+static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
- TCGv_i64 tcg_rd, zero;
- DisasCompare64 c;
+ assert(shift >= 0 && shift <= 64);
+ tcg_gen_sari_i64(dst, src, MIN(shift, 63));
+}
- if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
- /* S == 1 or op2<1> == 1 */
- unallocated_encoding(s);
- return;
+static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 64) {
+ tcg_gen_movi_i64(dst, 0);
+ } else {
+ tcg_gen_shri_i64(dst, src, shift);
}
- sf = extract32(insn, 31, 1);
- else_inv = extract32(insn, 30, 1);
- rm = extract32(insn, 16, 5);
- cond = extract32(insn, 12, 4);
- else_inc = extract32(insn, 10, 1);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
+}
- tcg_rd = cpu_reg(s, rd);
+static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_sshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
- a64_test_cc(&c, cond);
- zero = tcg_constant_i64(0);
+static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_ushr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
- if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
- /* CSET & CSETM. */
- if (else_inv) {
- tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
- tcg_rd, c.value, zero);
- } else {
- tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
- tcg_rd, c.value, zero);
- }
+static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 32);
+ if (shift) {
+ TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
+ tcg_gen_add_i64(dst, src, rnd);
+ tcg_gen_sari_i64(dst, dst, shift);
} else {
- TCGv_i64 t_true = cpu_reg(s, rn);
- TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
- if (else_inv && else_inc) {
- tcg_gen_neg_i64(t_false, t_false);
- } else if (else_inv) {
- tcg_gen_not_i64(t_false, t_false);
- } else if (else_inc) {
- tcg_gen_addi_i64(t_false, t_false, 1);
- }
- tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
- }
-
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ tcg_gen_mov_i64(dst, src);
}
}
-static void handle_clz(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- TCGv_i64 tcg_rd, tcg_rn;
- tcg_rd = cpu_reg(s, rd);
- tcg_rn = cpu_reg(s, rn);
-
- if (sf) {
- tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
+ assert(shift >= 0 && shift <= 32);
+ if (shift) {
+ TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
+ tcg_gen_add_i64(dst, src, rnd);
+ tcg_gen_shri_i64(dst, dst, shift);
} else {
- TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
- tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
+ tcg_gen_mov_i64(dst, src);
}
}
-static void handle_cls(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- TCGv_i64 tcg_rd, tcg_rn;
- tcg_rd = cpu_reg(s, rd);
- tcg_rn = cpu_reg(s, rn);
-
- if (sf) {
- tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 0) {
+ tcg_gen_mov_i64(dst, src);
+ } else if (shift == 64) {
+ /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
+ tcg_gen_movi_i64(dst, 0);
} else {
- TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
- tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
+ TCGv_i64 rnd = tcg_temp_new_i64();
+ tcg_gen_extract_i64(rnd, src, shift - 1, 1);
+ tcg_gen_sari_i64(dst, src, shift);
+ tcg_gen_add_i64(dst, dst, rnd);
}
}
-static void handle_rbit(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- TCGv_i64 tcg_rd, tcg_rn;
- tcg_rd = cpu_reg(s, rd);
- tcg_rn = cpu_reg(s, rn);
-
- if (sf) {
- gen_helper_rbit64(tcg_rd, tcg_rn);
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 0) {
+ tcg_gen_mov_i64(dst, src);
+ } else if (shift == 64) {
+ /* Rounding will propagate bit 63 into bit 64. */
+ tcg_gen_shri_i64(dst, src, 63);
} else {
- TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
- gen_helper_rbit(tcg_tmp32, tcg_tmp32);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
+ TCGv_i64 rnd = tcg_temp_new_i64();
+ tcg_gen_extract_i64(rnd, src, shift - 1, 1);
+ tcg_gen_shri_i64(dst, src, shift);
+ tcg_gen_add_i64(dst, dst, rnd);
}
}
-/* REV with sf==1, opcode==3 ("REV64") */
-static void handle_rev64(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- if (!sf) {
- unallocated_encoding(s);
- return;
- }
- tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
+ gen_srshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
}
-/* REV with sf==0, opcode==2
- * REV32 (sf==1, opcode==2)
- */
-static void handle_rev32(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_rn = cpu_reg(s, rn);
+ gen_urshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
- if (sf) {
- tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
- tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
- } else {
- tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
+static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ /* If shift is 64, dst is unchanged. */
+ if (shift != 64) {
+ tcg_gen_shri_i64(src, src, shift);
+ tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
}
}
-/* REV16 (opcode==1) */
-static void handle_rev16(DisasContext *s, unsigned int sf,
- unsigned int rn, unsigned int rd)
+static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
{
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
- TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
-
- tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
- tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
- tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
- tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
- tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
+ tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
}
-/* Data-processing (1 source)
- * 31 30 29 28 21 20 16 15 10 9 5 4 0
- * +----+---+---+-----------------+---------+--------+------+------+
- * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
- * +----+---+---+-----------------+---------+--------+------+------+
- */
-static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
+static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
+ WideShiftImmFn * const fns[3], MemOp sign)
{
- unsigned int sf, opcode, opcode2, rn, rd;
- TCGv_i64 tcg_rd;
+ TCGv_i64 tcg_rn, tcg_rd;
+ int esz = a->esz;
+ int esize;
+ WideShiftImmFn *fn;
- if (extract32(insn, 29, 1)) {
- unallocated_encoding(s);
- return;
- }
+ tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
- sf = extract32(insn, 31, 1);
- opcode = extract32(insn, 10, 6);
- opcode2 = extract32(insn, 16, 5);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
+ if (!fp_access_check(s)) {
+ return true;
+ }
-#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ tcg_gen_movi_i64(tcg_rd, 0);
- switch (MAP(sf, opcode2, opcode)) {
- case MAP(0, 0x00, 0x00): /* RBIT */
- case MAP(1, 0x00, 0x00):
- handle_rbit(s, sf, rn, rd);
- break;
- case MAP(0, 0x00, 0x01): /* REV16 */
- case MAP(1, 0x00, 0x01):
- handle_rev16(s, sf, rn, rd);
- break;
- case MAP(0, 0x00, 0x02): /* REV/REV32 */
- case MAP(1, 0x00, 0x02):
- handle_rev32(s, sf, rn, rd);
- break;
- case MAP(1, 0x00, 0x03): /* REV64 */
- handle_rev64(s, sf, rn, rd);
- break;
- case MAP(0, 0x00, 0x04): /* CLZ */
- case MAP(1, 0x00, 0x04):
- handle_clz(s, sf, rn, rd);
- break;
- case MAP(0, 0x00, 0x05): /* CLS */
- case MAP(1, 0x00, 0x05):
- handle_cls(s, sf, rn, rd);
- break;
- case MAP(1, 0x01, 0x00): /* PACIA */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x01): /* PACIB */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x02): /* PACDA */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x03): /* PACDB */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x04): /* AUTIA */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x05): /* AUTIB */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x06): /* AUTDA */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x07): /* AUTDB */
- if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
- } else if (!dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- break;
- case MAP(1, 0x01, 0x08): /* PACIZA */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x09): /* PACIZB */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0a): /* PACDZA */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0b): /* PACDZB */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0c): /* AUTIZA */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0d): /* AUTIZB */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0e): /* AUTDZA */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x0f): /* AUTDZB */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
- }
- break;
- case MAP(1, 0x01, 0x10): /* XPACI */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
- }
- break;
- case MAP(1, 0x01, 0x11): /* XPACD */
- if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
- goto do_unallocated;
- } else if (s->pauth_active) {
- tcg_rd = cpu_reg(s, rd);
- gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
- }
- break;
- default:
- do_unallocated:
- unallocated_encoding(s);
- break;
+ fn = fns[esz];
+ esize = 8 << esz;
+ for (int i = 0, elements = 8 >> esz; i < elements; i++) {
+ read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
+ fn(tcg_rn, tcg_rn, a->imm);
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
}
-#undef MAP
+ write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
+ clear_vec_high(s, a->q, a->rd);
+ return true;
}
-static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
- unsigned int rm, unsigned int rn, unsigned int rd)
+static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- TCGv_i64 tcg_n, tcg_m, tcg_rd;
- tcg_rd = cpu_reg(s, rd);
-
- if (!sf && is_signed) {
- tcg_n = tcg_temp_new_i64();
- tcg_m = tcg_temp_new_i64();
- tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
- tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
- } else {
- tcg_n = read_cpu_reg(s, rn, sf);
- tcg_m = read_cpu_reg(s, rm, sf);
- }
-
- if (is_signed) {
- gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
- } else {
- gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
- }
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
+}
- if (!sf) { /* zero extend final result */
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
- }
+static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
}
-/* LSLV, LSRV, ASRV, RORV */
-static void handle_shift_reg(DisasContext *s,
- enum a64_shift_type shift_type, unsigned int sf,
- unsigned int rm, unsigned int rn, unsigned int rd)
+static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- TCGv_i64 tcg_shift = tcg_temp_new_i64();
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
+ gen_sshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
+}
- tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
- shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
+static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_shri_i64(d, s, i);
+ gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
}
-/* CRC32[BHWX], CRC32C[BHWX] */
-static void handle_crc32(DisasContext *s,
- unsigned int sf, unsigned int sz, bool crc32c,
- unsigned int rm, unsigned int rn, unsigned int rd)
+static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- TCGv_i64 tcg_acc, tcg_val;
- TCGv_i32 tcg_bytes;
+ tcg_gen_shri_i64(d, s, i);
+ gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
+}
- if (!dc_isar_feature(aa64_crc32, s)
- || (sf == 1 && sz != 3)
- || (sf == 0 && sz == 3)) {
- unallocated_encoding(s);
- return;
- }
+static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_ushr_d(d, s, i);
+ gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
+}
- if (sz == 3) {
- tcg_val = cpu_reg(s, rm);
- } else {
- uint64_t mask;
- switch (sz) {
- case 0:
- mask = 0xFF;
- break;
- case 1:
- mask = 0xFFFF;
- break;
- case 2:
- mask = 0xFFFFFFFF;
- break;
- default:
- g_assert_not_reached();
- }
- tcg_val = tcg_temp_new_i64();
- tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
- }
+static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_unarrow_sat8(d, tcg_env, d);
+}
- tcg_acc = cpu_reg(s, rn);
- tcg_bytes = tcg_constant_i32(1 << sz);
+static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_unarrow_sat16(d, tcg_env, d);
+}
- if (crc32c) {
- gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
- } else {
- gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
- }
+static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_sshr_d(d, s, i);
+ gen_helper_neon_unarrow_sat32(d, tcg_env, d);
}
-/* Data-processing (2 source)
- * 31 30 29 28 21 20 16 15 10 9 5 4 0
- * +----+---+---+-----------------+------+--------+------+------+
- * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
- * +----+---+---+-----------------+------+--------+------+------+
- */
-static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
+static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- unsigned int sf, rm, opcode, rn, rd, setflag;
- sf = extract32(insn, 31, 1);
- setflag = extract32(insn, 29, 1);
- rm = extract32(insn, 16, 5);
- opcode = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
+}
- if (setflag && opcode != 0) {
- unallocated_encoding(s);
- return;
- }
+static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
+}
- switch (opcode) {
- case 0: /* SUBP(S) */
- if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
- goto do_unallocated;
- } else {
- TCGv_i64 tcg_n, tcg_m, tcg_d;
+static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
+}
- tcg_n = read_cpu_reg_sp(s, rn, true);
- tcg_m = read_cpu_reg_sp(s, rm, true);
- tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
- tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
- tcg_d = cpu_reg(s, rd);
+static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_bhs(d, s, i);
+ gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
+}
- if (setflag) {
- gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
- } else {
- tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
- }
- }
- break;
- case 2: /* UDIV */
- handle_div(s, false, sf, rm, rn, rd);
- break;
- case 3: /* SDIV */
- handle_div(s, true, sf, rm, rn, rd);
- break;
- case 4: /* IRG */
- if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
- goto do_unallocated;
- }
- if (s->ata[0]) {
- gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
- cpu_reg_sp(s, rn), cpu_reg(s, rm));
- } else {
- gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
- cpu_reg_sp(s, rn));
- }
- break;
- case 5: /* GMI */
- if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
- goto do_unallocated;
- } else {
- TCGv_i64 t = tcg_temp_new_i64();
+static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_bhs(d, s, i);
+ gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
+}
- tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
- tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
- tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
- }
- break;
- case 8: /* LSLV */
- handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
- break;
- case 9: /* LSRV */
- handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
- break;
- case 10: /* ASRV */
- handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
- break;
- case 11: /* RORV */
- handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
- break;
- case 12: /* PACGA */
- if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
- goto do_unallocated;
- }
- gen_helper_pacga(cpu_reg(s, rd), tcg_env,
- cpu_reg(s, rn), cpu_reg_sp(s, rm));
- break;
- case 16:
- case 17:
- case 18:
- case 19:
- case 20:
- case 21:
- case 22:
- case 23: /* CRC32 */
- {
- int sz = extract32(opcode, 0, 2);
- bool crc32c = extract32(opcode, 2, 1);
- handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
- break;
- }
- default:
- do_unallocated:
- unallocated_encoding(s);
- break;
- }
+static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
}
-/*
- * Data processing - register
- * 31 30 29 28 25 21 20 16 10 0
- * +--+---+--+---+-------+-----+-------+-------+---------+
- * | |op0| |op1| 1 0 1 | op2 | | op3 | |
- * +--+---+--+---+-------+-----+-------+-------+---------+
- */
-static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
-{
- int op0 = extract32(insn, 30, 1);
- int op1 = extract32(insn, 28, 1);
- int op2 = extract32(insn, 21, 4);
- int op3 = extract32(insn, 10, 6);
-
- if (!op1) {
- if (op2 & 8) {
- if (op2 & 1) {
- /* Add/sub (extended register) */
- disas_add_sub_ext_reg(s, insn);
- } else {
- /* Add/sub (shifted register) */
- disas_add_sub_reg(s, insn);
- }
- } else {
- /* Logical (shifted register) */
- disas_logic_reg(s, insn);
- }
- return;
- }
+static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_unarrow_sat8(d, tcg_env, d);
+}
- switch (op2) {
- case 0x0:
- switch (op3) {
- case 0x00: /* Add/subtract (with carry) */
- disas_adc_sbc(s, insn);
- break;
+static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_unarrow_sat16(d, tcg_env, d);
+}
- case 0x01: /* Rotate right into flags */
- case 0x21:
- disas_rotate_right_into_flags(s, insn);
- break;
+static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_d(d, s, i);
+ gen_helper_neon_unarrow_sat32(d, tcg_env, d);
+}
- case 0x02: /* Evaluate into flags */
- case 0x12:
- case 0x22:
- case 0x32:
- disas_evaluate_into_flags(s, insn);
- break;
+static WideShiftImmFn * const shrn_fns[] = {
+ tcg_gen_shri_i64,
+ tcg_gen_shri_i64,
+ gen_ushr_d,
+};
+TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
- default:
- goto do_unallocated;
- }
- break;
+static WideShiftImmFn * const rshrn_fns[] = {
+ gen_urshr_bhs,
+ gen_urshr_bhs,
+ gen_urshr_d,
+};
+TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
- case 0x2: /* Conditional compare */
- disas_cc(s, insn); /* both imm and reg forms */
- break;
+static WideShiftImmFn * const sqshrn_fns[] = {
+ gen_sqshrn_b,
+ gen_sqshrn_h,
+ gen_sqshrn_s,
+};
+TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
- case 0x4: /* Conditional select */
- disas_cond_select(s, insn);
- break;
+static WideShiftImmFn * const uqshrn_fns[] = {
+ gen_uqshrn_b,
+ gen_uqshrn_h,
+ gen_uqshrn_s,
+};
+TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
- case 0x6: /* Data-processing */
- if (op0) { /* (1 source) */
- disas_data_proc_1src(s, insn);
- } else { /* (2 source) */
- disas_data_proc_2src(s, insn);
- }
- break;
- case 0x8 ... 0xf: /* (3 source) */
- disas_data_proc_3src(s, insn);
- break;
+static WideShiftImmFn * const sqshrun_fns[] = {
+ gen_sqshrun_b,
+ gen_sqshrun_h,
+ gen_sqshrun_s,
+};
+TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
- default:
- do_unallocated:
- unallocated_encoding(s);
- break;
- }
-}
+static WideShiftImmFn * const sqrshrn_fns[] = {
+ gen_sqrshrn_b,
+ gen_sqrshrn_h,
+ gen_sqrshrn_s,
+};
+TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
-static void handle_fp_compare(DisasContext *s, int size,
- unsigned int rn, unsigned int rm,
- bool cmp_with_zero, bool signal_all_nans)
-{
- TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+static WideShiftImmFn * const uqrshrn_fns[] = {
+ gen_uqrshrn_b,
+ gen_uqrshrn_h,
+ gen_uqrshrn_s,
+};
+TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
- if (size == MO_64) {
- TCGv_i64 tcg_vn, tcg_vm;
+static WideShiftImmFn * const sqrshrun_fns[] = {
+ gen_sqrshrun_b,
+ gen_sqrshrun_h,
+ gen_sqrshrun_s,
+};
+TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
- tcg_vn = read_fp_dreg(s, rn);
- if (cmp_with_zero) {
- tcg_vm = tcg_constant_i64(0);
- } else {
- tcg_vm = read_fp_dreg(s, rm);
- }
- if (signal_all_nans) {
- gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- } else {
- gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- }
- } else {
- TCGv_i32 tcg_vn = tcg_temp_new_i32();
- TCGv_i32 tcg_vm = tcg_temp_new_i32();
+/*
+ * Advanced SIMD Scalar Shift by Immediate
+ */
- read_vec_element_i32(s, tcg_vn, rn, 0, size);
- if (cmp_with_zero) {
- tcg_gen_movi_i32(tcg_vm, 0);
- } else {
- read_vec_element_i32(s, tcg_vm, rm, 0, size);
- }
+static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
+ WideShiftImmFn *fn, bool accumulate,
+ MemOp sign)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 rd = tcg_temp_new_i64();
+ TCGv_i64 rn = tcg_temp_new_i64();
- switch (size) {
- case MO_32:
- if (signal_all_nans) {
- gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- } else {
- gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- }
- break;
- case MO_16:
- if (signal_all_nans) {
- gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- } else {
- gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- }
- break;
- default:
- g_assert_not_reached();
+ read_vec_element(s, rn, a->rn, 0, a->esz | sign);
+ if (accumulate) {
+ read_vec_element(s, rd, a->rd, 0, a->esz | sign);
}
+ fn(rd, rn, a->imm);
+ write_fp_dreg(s, a->rd, rd);
}
-
- gen_set_nzcv(tcg_flags);
+ return true;
}
-/* Floating point compare
- * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
- * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
- */
-static void disas_fp_compare(DisasContext *s, uint32_t insn)
-{
- unsigned int mos, type, rm, op, rn, opc, op2r;
- int size;
+TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
+TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
+TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
+TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
+TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
+TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
+TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
+TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
+TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
- mos = extract32(insn, 29, 3);
- type = extract32(insn, 22, 2);
- rm = extract32(insn, 16, 5);
- op = extract32(insn, 14, 2);
- rn = extract32(insn, 5, 5);
- opc = extract32(insn, 3, 2);
- op2r = extract32(insn, 0, 3);
+TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
+TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
- if (mos || op || op2r) {
- unallocated_encoding(s);
- return;
- }
+static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
+ NeonGenTwoOpEnvFn *fn)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t, s);
+ fn(t, tcg_env, t, tcg_constant_i32(i));
+ tcg_gen_extu_i32_i64(d, t);
+}
- switch (type) {
- case 0:
- size = MO_32;
- break;
- case 1:
- size = MO_64;
- break;
- case 3:
- size = MO_16;
- if (dc_isar_feature(aa64_fp16, s)) {
- break;
- }
- /* fallthru */
- default:
- unallocated_encoding(s);
- return;
- }
+static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
+}
- if (!fp_access_check(s)) {
- return;
- }
+static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
+}
- handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
+static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
}
-/* Floating point conditional compare
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
- * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
- * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
- */
-static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
+static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- unsigned int mos, type, rm, cond, rn, op, nzcv;
- TCGLabel *label_continue = NULL;
- int size;
+ gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
- mos = extract32(insn, 29, 3);
- type = extract32(insn, 22, 2);
- rm = extract32(insn, 16, 5);
- cond = extract32(insn, 12, 4);
- rn = extract32(insn, 5, 5);
- op = extract32(insn, 4, 1);
- nzcv = extract32(insn, 0, 4);
+static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
+}
- if (mos) {
- unallocated_encoding(s);
- return;
- }
+static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
+}
- switch (type) {
- case 0:
- size = MO_32;
- break;
- case 1:
- size = MO_64;
- break;
- case 3:
- size = MO_16;
- if (dc_isar_feature(aa64_fp16, s)) {
- break;
- }
- /* fallthru */
- default:
- unallocated_encoding(s);
- return;
- }
+static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
+}
- if (!fp_access_check(s)) {
- return;
- }
+static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
+}
- if (cond < 0x0e) { /* not always */
- TCGLabel *label_match = gen_new_label();
- label_continue = gen_new_label();
- arm_gen_test_cc(cond, label_match);
- /* nomatch: */
- gen_set_nzcv(tcg_constant_i64(nzcv << 28));
- tcg_gen_br(label_continue);
- gen_set_label(label_match);
- }
+static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
+}
- handle_fp_compare(s, size, rn, rm, false, op);
+static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
+}
- if (cond < 0x0e) {
- gen_set_label(label_continue);
- }
+static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
}
-/* Floating-point data-processing (1 source) - half precision */
-static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
+static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
{
- TCGv_ptr fpst = NULL;
- TCGv_i32 tcg_op = read_fp_hreg(s, rn);
- TCGv_i32 tcg_res = tcg_temp_new_i32();
+ gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
- switch (opcode) {
- case 0x0: /* FMOV */
- tcg_gen_mov_i32(tcg_res, tcg_op);
- break;
- case 0x1: /* FABS */
- gen_vfp_absh(tcg_res, tcg_op);
- break;
- case 0x2: /* FNEG */
- gen_vfp_negh(tcg_res, tcg_op);
- break;
- case 0x3: /* FSQRT */
- fpst = fpstatus_ptr(FPST_FPCR_F16);
- gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
- break;
- case 0x8: /* FRINTN */
- case 0x9: /* FRINTP */
- case 0xa: /* FRINTM */
- case 0xb: /* FRINTZ */
- case 0xc: /* FRINTA */
- {
- TCGv_i32 tcg_rmode;
+static WideShiftImmFn * const f_scalar_sqshli[] = {
+ gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
+};
- fpst = fpstatus_ptr(FPST_FPCR_F16);
- tcg_rmode = gen_set_rmode(opcode & 7, fpst);
- gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
- gen_restore_rmode(tcg_rmode, fpst);
- break;
- }
- case 0xe: /* FRINTX */
- fpst = fpstatus_ptr(FPST_FPCR_F16);
- gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
- break;
- case 0xf: /* FRINTI */
- fpst = fpstatus_ptr(FPST_FPCR_F16);
- gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
- break;
- default:
- g_assert_not_reached();
- }
+static WideShiftImmFn * const f_scalar_uqshli[] = {
+ gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
+};
- write_fp_sreg(s, rd, tcg_res);
-}
+static WideShiftImmFn * const f_scalar_sqshlui[] = {
+ gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
+};
-/* Floating-point data-processing (1 source) - single precision */
-static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
-{
- void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
- TCGv_i32 tcg_op, tcg_res;
- TCGv_ptr fpst;
- int rmode = -1;
-
- tcg_op = read_fp_sreg(s, rn);
- tcg_res = tcg_temp_new_i32();
-
- switch (opcode) {
- case 0x0: /* FMOV */
- tcg_gen_mov_i32(tcg_res, tcg_op);
- goto done;
- case 0x1: /* FABS */
- gen_vfp_abss(tcg_res, tcg_op);
- goto done;
- case 0x2: /* FNEG */
- gen_vfp_negs(tcg_res, tcg_op);
- goto done;
- case 0x3: /* FSQRT */
- gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
- goto done;
- case 0x6: /* BFCVT */
- gen_fpst = gen_helper_bfcvt;
- break;
- case 0x8: /* FRINTN */
- case 0x9: /* FRINTP */
- case 0xa: /* FRINTM */
- case 0xb: /* FRINTZ */
- case 0xc: /* FRINTA */
- rmode = opcode & 7;
- gen_fpst = gen_helper_rints;
- break;
- case 0xe: /* FRINTX */
- gen_fpst = gen_helper_rints_exact;
- break;
- case 0xf: /* FRINTI */
- gen_fpst = gen_helper_rints;
- break;
- case 0x10: /* FRINT32Z */
- rmode = FPROUNDING_ZERO;
- gen_fpst = gen_helper_frint32_s;
- break;
- case 0x11: /* FRINT32X */
- gen_fpst = gen_helper_frint32_s;
- break;
- case 0x12: /* FRINT64Z */
- rmode = FPROUNDING_ZERO;
- gen_fpst = gen_helper_frint64_s;
- break;
- case 0x13: /* FRINT64X */
- gen_fpst = gen_helper_frint64_s;
- break;
- default:
- g_assert_not_reached();
- }
+/* Note that the helpers sign-extend their inputs, so don't do it here. */
+TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
+TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
+TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
- fpst = fpstatus_ptr(FPST_FPCR);
- if (rmode >= 0) {
- TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
- gen_fpst(tcg_res, tcg_op, fpst);
- gen_restore_rmode(tcg_rmode, fpst);
- } else {
- gen_fpst(tcg_res, tcg_op, fpst);
- }
+static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
+ WideShiftImmFn * const fns[3],
+ MemOp sign, bool zext)
+{
+ MemOp esz = a->esz;
- done:
- write_fp_sreg(s, rd, tcg_res);
-}
+ tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
-/* Floating-point data-processing (1 source) - double precision */
-static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
-{
- void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
- TCGv_i64 tcg_op, tcg_res;
- TCGv_ptr fpst;
- int rmode = -1;
+ if (fp_access_check(s)) {
+ TCGv_i64 rd = tcg_temp_new_i64();
+ TCGv_i64 rn = tcg_temp_new_i64();
- switch (opcode) {
- case 0x0: /* FMOV */
- gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
- return;
+ read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
+ fns[esz](rd, rn, a->imm);
+ if (zext) {
+ tcg_gen_ext_i64(rd, rd, esz);
+ }
+ write_fp_dreg(s, a->rd, rd);
}
+ return true;
+}
- tcg_op = read_fp_dreg(s, rn);
- tcg_res = tcg_temp_new_i64();
+TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
+TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
+TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
+TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
+TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
+TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
- switch (opcode) {
- case 0x1: /* FABS */
- gen_vfp_absd(tcg_res, tcg_op);
- goto done;
- case 0x2: /* FNEG */
- gen_vfp_negd(tcg_res, tcg_op);
- goto done;
- case 0x3: /* FSQRT */
- gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
- goto done;
- case 0x8: /* FRINTN */
- case 0x9: /* FRINTP */
- case 0xa: /* FRINTM */
- case 0xb: /* FRINTZ */
- case 0xc: /* FRINTA */
- rmode = opcode & 7;
- gen_fpst = gen_helper_rintd;
- break;
- case 0xe: /* FRINTX */
- gen_fpst = gen_helper_rintd_exact;
- break;
- case 0xf: /* FRINTI */
- gen_fpst = gen_helper_rintd;
- break;
- case 0x10: /* FRINT32Z */
- rmode = FPROUNDING_ZERO;
- gen_fpst = gen_helper_frint32_d;
- break;
- case 0x11: /* FRINT32X */
- gen_fpst = gen_helper_frint32_d;
- break;
- case 0x12: /* FRINT64Z */
- rmode = FPROUNDING_ZERO;
- gen_fpst = gen_helper_frint64_d;
- break;
- case 0x13: /* FRINT64X */
- gen_fpst = gen_helper_frint64_d;
- break;
- default:
- g_assert_not_reached();
+static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
+{
+ TCGv_i64 tcg_n, tcg_m, tcg_rd;
+ tcg_rd = cpu_reg(s, a->rd);
+
+ if (!a->sf && is_signed) {
+ tcg_n = tcg_temp_new_i64();
+ tcg_m = tcg_temp_new_i64();
+ tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
+ tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
+ } else {
+ tcg_n = read_cpu_reg(s, a->rn, a->sf);
+ tcg_m = read_cpu_reg(s, a->rm, a->sf);
}
- fpst = fpstatus_ptr(FPST_FPCR);
- if (rmode >= 0) {
- TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
- gen_fpst(tcg_res, tcg_op, fpst);
- gen_restore_rmode(tcg_rmode, fpst);
+ if (is_signed) {
+ gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
} else {
- gen_fpst(tcg_res, tcg_op, fpst);
+ gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
}
- done:
- write_fp_dreg(s, rd, tcg_res);
+ if (!a->sf) { /* zero extend final result */
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+ return true;
}
-static void handle_fp_fcvt(DisasContext *s, int opcode,
- int rd, int rn, int dtype, int ntype)
-{
- switch (ntype) {
- case 0x0:
- {
- TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
- if (dtype == 1) {
- /* Single to double */
- TCGv_i64 tcg_rd = tcg_temp_new_i64();
- gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
- write_fp_dreg(s, rd, tcg_rd);
- } else {
- /* Single to half */
- TCGv_i32 tcg_rd = tcg_temp_new_i32();
- TCGv_i32 ahp = get_ahp_flag();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
+TRANS(SDIV, do_div, a, true)
+TRANS(UDIV, do_div, a, false)
- gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
- /* write_fp_sreg is OK here because top half of tcg_rd is zero */
- write_fp_sreg(s, rd, tcg_rd);
- }
+/* Shift a TCGv src by TCGv shift_amount, put result in dst.
+ * Note that it is the caller's responsibility to ensure that the
+ * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
+ * mandated semantics for out of range shifts.
+ */
+static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
+ enum a64_shift_type shift_type, TCGv_i64 shift_amount)
+{
+ switch (shift_type) {
+ case A64_SHIFT_TYPE_LSL:
+ tcg_gen_shl_i64(dst, src, shift_amount);
break;
- }
- case 0x1:
- {
- TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
- TCGv_i32 tcg_rd = tcg_temp_new_i32();
- if (dtype == 0) {
- /* Double to single */
- gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
- } else {
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
- TCGv_i32 ahp = get_ahp_flag();
- /* Double to half */
- gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
- /* write_fp_sreg is OK here because top half of tcg_rd is zero */
+ case A64_SHIFT_TYPE_LSR:
+ tcg_gen_shr_i64(dst, src, shift_amount);
+ break;
+ case A64_SHIFT_TYPE_ASR:
+ if (!sf) {
+ tcg_gen_ext32s_i64(dst, src);
}
- write_fp_sreg(s, rd, tcg_rd);
+ tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
break;
- }
- case 0x3:
- {
- TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
- TCGv_i32 tcg_ahp = get_ahp_flag();
- tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
- if (dtype == 0) {
- /* Half to single */
- TCGv_i32 tcg_rd = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
- write_fp_sreg(s, rd, tcg_rd);
+ case A64_SHIFT_TYPE_ROR:
+ if (sf) {
+ tcg_gen_rotr_i64(dst, src, shift_amount);
} else {
- /* Half to double */
- TCGv_i64 tcg_rd = tcg_temp_new_i64();
- gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
- write_fp_dreg(s, rd, tcg_rd);
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t0, src);
+ tcg_gen_extrl_i64_i32(t1, shift_amount);
+ tcg_gen_rotr_i32(t0, t0, t1);
+ tcg_gen_extu_i32_i64(dst, t0);
}
break;
- }
default:
- g_assert_not_reached();
+ assert(FALSE); /* all shift types should be handled */
+ break;
+ }
+
+ if (!sf) { /* zero extend final result */
+ tcg_gen_ext32u_i64(dst, dst);
}
}
-/* Floating point data-processing (1 source)
- * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
- * +---+---+---+-----------+------+---+--------+-----------+------+------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+--------+-----------+------+------+
+/* Shift a TCGv src by immediate, put result in dst.
+ * The shift amount must be in range (this should always be true as the
+ * relevant instructions will UNDEF on bad shift immediates).
*/
-static void disas_fp_1src(DisasContext *s, uint32_t insn)
+static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
+ enum a64_shift_type shift_type, unsigned int shift_i)
{
- int mos = extract32(insn, 29, 3);
- int type = extract32(insn, 22, 2);
- int opcode = extract32(insn, 15, 6);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- if (mos) {
- goto do_unallocated;
- }
-
- switch (opcode) {
- case 0x4: case 0x5: case 0x7:
- {
- /* FCVT between half, single and double precision */
- int dtype = extract32(opcode, 0, 2);
- if (type == 2 || dtype == type) {
- goto do_unallocated;
- }
- if (!fp_access_check(s)) {
- return;
- }
+ assert(shift_i < (sf ? 64 : 32));
- handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
- break;
+ if (shift_i == 0) {
+ tcg_gen_mov_i64(dst, src);
+ } else {
+ shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
}
+}
- case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
- if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
- goto do_unallocated;
- }
- /* fall through */
- case 0x0 ... 0x3:
- case 0x8 ... 0xc:
- case 0xe ... 0xf:
- /* 32-to-32 and 64-to-64 ops */
- switch (type) {
- case 0:
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_1src_single(s, opcode, rd, rn);
- break;
- case 1:
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_1src_double(s, opcode, rd, rn);
- break;
- case 3:
- if (!dc_isar_feature(aa64_fp16, s)) {
- goto do_unallocated;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_1src_half(s, opcode, rd, rn);
- break;
- default:
- goto do_unallocated;
- }
- break;
-
- case 0x6:
- switch (type) {
- case 1: /* BFCVT */
- if (!dc_isar_feature(aa64_bf16, s)) {
- goto do_unallocated;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_fp_1src_single(s, opcode, rd, rn);
- break;
- default:
- goto do_unallocated;
- }
- break;
+static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
+ enum a64_shift_type shift_type)
+{
+ TCGv_i64 tcg_shift = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
- default:
- do_unallocated:
- unallocated_encoding(s);
- break;
- }
+ tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
+ shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
+ return true;
}
-/* Floating point immediate
- * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
- * +---+---+---+-----------+------+---+------------+-------+------+------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
- * +---+---+---+-----------+------+---+------------+-------+------+------+
- */
-static void disas_fp_imm(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int imm5 = extract32(insn, 5, 5);
- int imm8 = extract32(insn, 13, 8);
- int type = extract32(insn, 22, 2);
- int mos = extract32(insn, 29, 3);
- uint64_t imm;
- MemOp sz;
+TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
+TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
+TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
+TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
- if (mos || imm5) {
- unallocated_encoding(s);
- return;
- }
+static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
+{
+ TCGv_i64 tcg_acc, tcg_val, tcg_rd;
+ TCGv_i32 tcg_bytes;
- switch (type) {
- case 0:
- sz = MO_32;
+ switch (a->esz) {
+ case MO_8:
+ case MO_16:
+ case MO_32:
+ tcg_val = tcg_temp_new_i64();
+ tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
break;
- case 1:
- sz = MO_64;
+ case MO_64:
+ tcg_val = cpu_reg(s, a->rm);
break;
- case 3:
- sz = MO_16;
- if (dc_isar_feature(aa64_fp16, s)) {
- break;
- }
- /* fallthru */
default:
- unallocated_encoding(s);
- return;
+ g_assert_not_reached();
}
+ tcg_acc = cpu_reg(s, a->rn);
+ tcg_bytes = tcg_constant_i32(1 << a->esz);
+ tcg_rd = cpu_reg(s, a->rd);
- if (!fp_access_check(s)) {
- return;
+ if (crc32c) {
+ gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
+ } else {
+ gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
}
-
- imm = vfp_expand_imm(sz, imm8);
- write_fp_dreg(s, rd, tcg_constant_i64(imm));
+ return true;
}
-/* Handle floating point <=> fixed point conversions. Note that we can
- * also deal with fp <=> integer conversions as a special case (scale == 64)
- * OPTME: consider handling that special case specially or at least skipping
- * the call to scalbn in the helpers for zero shifts.
- */
-static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
- bool itof, int rmode, int scale, int sf, int type)
-{
- bool is_signed = !(opcode & 1);
- TCGv_ptr tcg_fpstatus;
- TCGv_i32 tcg_shift, tcg_single;
- TCGv_i64 tcg_double;
+TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
+TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
- tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
-
- tcg_shift = tcg_constant_i32(64 - scale);
-
- if (itof) {
- TCGv_i64 tcg_int = cpu_reg(s, rn);
- if (!sf) {
- TCGv_i64 tcg_extend = tcg_temp_new_i64();
-
- if (is_signed) {
- tcg_gen_ext32s_i64(tcg_extend, tcg_int);
- } else {
- tcg_gen_ext32u_i64(tcg_extend, tcg_int);
- }
-
- tcg_int = tcg_extend;
- }
-
- switch (type) {
- case 1: /* float64 */
- tcg_double = tcg_temp_new_i64();
- if (is_signed) {
- gen_helper_vfp_sqtod(tcg_double, tcg_int,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_uqtod(tcg_double, tcg_int,
- tcg_shift, tcg_fpstatus);
- }
- write_fp_dreg(s, rd, tcg_double);
- break;
-
- case 0: /* float32 */
- tcg_single = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_sqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_uqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpstatus);
- }
- write_fp_sreg(s, rd, tcg_single);
- break;
+static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
+{
+ TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
+ TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
+ TCGv_i64 tcg_d = cpu_reg(s, a->rd);
- case 3: /* float16 */
- tcg_single = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_sqtoh(tcg_single, tcg_int,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_uqtoh(tcg_single, tcg_int,
- tcg_shift, tcg_fpstatus);
- }
- write_fp_sreg(s, rd, tcg_single);
- break;
+ tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
+ tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
- default:
- g_assert_not_reached();
- }
+ if (setflag) {
+ gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
} else {
- TCGv_i64 tcg_int = cpu_reg(s, rd);
- TCGv_i32 tcg_rmode;
-
- if (extract32(opcode, 2, 1)) {
- /* There are too many rounding modes to all fit into rmode,
- * so FCVTA[US] is a special case.
- */
- rmode = FPROUNDING_TIEAWAY;
- }
-
- tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
-
- switch (type) {
- case 1: /* float64 */
- tcg_double = read_fp_dreg(s, rn);
- if (is_signed) {
- if (!sf) {
- gen_helper_vfp_tosld(tcg_int, tcg_double,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_tosqd(tcg_int, tcg_double,
- tcg_shift, tcg_fpstatus);
- }
- } else {
- if (!sf) {
- gen_helper_vfp_tould(tcg_int, tcg_double,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_touqd(tcg_int, tcg_double,
- tcg_shift, tcg_fpstatus);
- }
- }
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_int, tcg_int);
- }
- break;
+ tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
+ }
+ return true;
+}
- case 0: /* float32 */
- tcg_single = read_fp_sreg(s, rn);
- if (sf) {
- if (is_signed) {
- gen_helper_vfp_tosqs(tcg_int, tcg_single,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_touqs(tcg_int, tcg_single,
- tcg_shift, tcg_fpstatus);
- }
- } else {
- TCGv_i32 tcg_dest = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_tosls(tcg_dest, tcg_single,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_touls(tcg_dest, tcg_single,
- tcg_shift, tcg_fpstatus);
- }
- tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
- }
- break;
+TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
+TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
- case 3: /* float16 */
- tcg_single = read_fp_sreg(s, rn);
- if (sf) {
- if (is_signed) {
- gen_helper_vfp_tosqh(tcg_int, tcg_single,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_touqh(tcg_int, tcg_single,
- tcg_shift, tcg_fpstatus);
- }
- } else {
- TCGv_i32 tcg_dest = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_toslh(tcg_dest, tcg_single,
- tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_toulh(tcg_dest, tcg_single,
- tcg_shift, tcg_fpstatus);
- }
- tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
- }
- break;
+static bool trans_IRG(DisasContext *s, arg_rrr *a)
+{
+ if (dc_isar_feature(aa64_mte_insn_reg, s)) {
+ TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
- default:
- g_assert_not_reached();
+ if (s->ata[0]) {
+ gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
+ } else {
+ gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
}
-
- gen_restore_rmode(tcg_rmode, tcg_fpstatus);
+ return true;
}
+ return false;
}
-/* Floating point <-> fixed point conversions
- * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
- * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
- * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
- * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
- */
-static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int scale = extract32(insn, 10, 6);
- int opcode = extract32(insn, 16, 3);
- int rmode = extract32(insn, 19, 2);
- int type = extract32(insn, 22, 2);
- bool sbit = extract32(insn, 29, 1);
- bool sf = extract32(insn, 31, 1);
- bool itof;
-
- if (sbit || (!sf && scale < 32)) {
- unallocated_encoding(s);
- return;
- }
+static bool trans_GMI(DisasContext *s, arg_rrr *a)
+{
+ if (dc_isar_feature(aa64_mte_insn_reg, s)) {
+ TCGv_i64 t = tcg_temp_new_i64();
- switch (type) {
- case 0: /* float32 */
- case 1: /* float64 */
- break;
- case 3: /* float16 */
- if (dc_isar_feature(aa64_fp16, s)) {
- break;
- }
- /* fallthru */
- default:
- unallocated_encoding(s);
- return;
+ tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
+ tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
+ tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
+ return true;
}
+ return false;
+}
- switch ((rmode << 3) | opcode) {
- case 0x2: /* SCVTF */
- case 0x3: /* UCVTF */
- itof = true;
- break;
- case 0x18: /* FCVTZS */
- case 0x19: /* FCVTZU */
- itof = false;
- break;
- default:
- unallocated_encoding(s);
- return;
+static bool trans_PACGA(DisasContext *s, arg_rrr *a)
+{
+ if (dc_isar_feature(aa64_pauth, s)) {
+ gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
+ cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
+ return true;
}
+ return false;
+}
- if (!fp_access_check(s)) {
- return;
- }
+typedef void ArithOneOp(TCGv_i64, TCGv_i64);
- handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
+static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
+{
+ fn(cpu_reg(s, rd), cpu_reg(s, rn));
+ return true;
}
-static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
+static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
{
- /* FMOV: gpr to or from float, double, or top half of quad fp reg,
- * without conversion.
- */
+ TCGv_i32 t32 = tcg_temp_new_i32();
- if (itof) {
- TCGv_i64 tcg_rn = cpu_reg(s, rn);
- TCGv_i64 tmp;
+ tcg_gen_extrl_i64_i32(t32, tcg_rn);
+ gen_helper_rbit(t32, t32);
+ tcg_gen_extu_i32_i64(tcg_rd, t32);
+}
- switch (type) {
- case 0:
- /* 32 bit */
- tmp = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(tmp, tcg_rn);
- write_fp_dreg(s, rd, tmp);
- break;
- case 1:
- /* 64 bit */
- write_fp_dreg(s, rd, tcg_rn);
- break;
- case 2:
- /* 64 bit to top half. */
- tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
- clear_vec_high(s, true, rd);
- break;
- case 3:
- /* 16 bit */
- tmp = tcg_temp_new_i64();
- tcg_gen_ext16u_i64(tmp, tcg_rn);
- write_fp_dreg(s, rd, tmp);
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
+static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
+{
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- switch (type) {
- case 0:
- /* 32 bit */
- tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
- break;
- case 1:
- /* 64 bit */
- tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
- break;
- case 2:
- /* 64 bits from top half */
- tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
- break;
- case 3:
- /* 16 bit */
- tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
- break;
- default:
- g_assert_not_reached();
- }
- }
+ tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
+ tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
+ tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
+ tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
+ tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
}
-static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
+static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
{
- TCGv_i64 t = read_fp_dreg(s, rn);
- TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
+ gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
+}
- gen_helper_fjcvtzs(t, t, fpstatus);
+static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+ gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
+}
- tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
- tcg_gen_extrh_i64_i32(cpu_ZF, t);
- tcg_gen_movi_i32(cpu_CF, 0);
- tcg_gen_movi_i32(cpu_NF, 0);
- tcg_gen_movi_i32(cpu_VF, 0);
+static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+ tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
}
-/* Floating point <-> integer conversions
- * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
- * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
- * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
- * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
- */
-static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 16, 3);
- int rmode = extract32(insn, 19, 2);
- int type = extract32(insn, 22, 2);
- bool sbit = extract32(insn, 29, 1);
- bool sf = extract32(insn, 31, 1);
- bool itof = false;
-
- if (sbit) {
- goto do_unallocated;
- }
-
- switch (opcode) {
- case 2: /* SCVTF */
- case 3: /* UCVTF */
- itof = true;
- /* fallthru */
- case 4: /* FCVTAS */
- case 5: /* FCVTAU */
- if (rmode != 0) {
- goto do_unallocated;
- }
- /* fallthru */
- case 0: /* FCVT[NPMZ]S */
- case 1: /* FCVT[NPMZ]U */
- switch (type) {
- case 0: /* float32 */
- case 1: /* float64 */
- break;
- case 3: /* float16 */
- if (!dc_isar_feature(aa64_fp16, s)) {
- goto do_unallocated;
- }
- break;
- default:
- goto do_unallocated;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
- break;
+static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+ tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
+ tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
+}
- default:
- switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
- case 0b01100110: /* FMOV half <-> 32-bit int */
- case 0b01100111:
- case 0b11100110: /* FMOV half <-> 64-bit int */
- case 0b11100111:
- if (!dc_isar_feature(aa64_fp16, s)) {
- goto do_unallocated;
- }
- /* fallthru */
- case 0b00000110: /* FMOV 32-bit */
- case 0b00000111:
- case 0b10100110: /* FMOV 64-bit */
- case 0b10100111:
- case 0b11001110: /* FMOV top half of 128-bit */
- case 0b11001111:
- if (!fp_access_check(s)) {
- return;
- }
- itof = opcode & 1;
- handle_fmov(s, rd, rn, type, itof);
- break;
+TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
+TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
+TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
+TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
- case 0b00111110: /* FJCVTZS */
- if (!dc_isar_feature(aa64_jscvt, s)) {
- goto do_unallocated;
- } else if (fp_access_check(s)) {
- handle_fjcvtzs(s, rd, rn);
- }
- break;
+static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+ TCGv_i32 t32 = tcg_temp_new_i32();
- default:
- do_unallocated:
- unallocated_encoding(s);
- return;
- }
- break;
- }
+ tcg_gen_extrl_i64_i32(t32, tcg_rn);
+ tcg_gen_clzi_i32(t32, t32, 32);
+ tcg_gen_extu_i32_i64(tcg_rd, t32);
}
-/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
- * 31 30 29 28 25 24 0
- * +---+---+---+---------+-----------------------------+
- * | | 0 | | 1 1 1 1 | |
- * +---+---+---+---------+-----------------------------+
- */
-static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
+static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
{
- if (extract32(insn, 24, 1)) {
- unallocated_encoding(s); /* in decodetree */
- } else if (extract32(insn, 21, 1) == 0) {
- /* Floating point to fixed point conversions */
- disas_fp_fixed_conv(s, insn);
- } else {
- switch (extract32(insn, 10, 2)) {
- case 1:
- /* Floating point conditional compare */
- disas_fp_ccomp(s, insn);
- break;
- case 2:
- /* Floating point data-processing (2 source) */
- unallocated_encoding(s); /* in decodetree */
- break;
- case 3:
- /* Floating point conditional select */
- unallocated_encoding(s); /* in decodetree */
- break;
- case 0:
- switch (ctz32(extract32(insn, 12, 4))) {
- case 0: /* [15:12] == xxx1 */
- /* Floating point immediate */
- disas_fp_imm(s, insn);
- break;
- case 1: /* [15:12] == xx10 */
- /* Floating point compare */
- disas_fp_compare(s, insn);
- break;
- case 2: /* [15:12] == x100 */
- /* Floating point data-processing (1 source) */
- disas_fp_1src(s, insn);
- break;
- case 3: /* [15:12] == 1000 */
- unallocated_encoding(s);
- break;
- default: /* [15:12] == 0000 */
- /* Floating point <-> integer conversions */
- disas_fp_int_conv(s, insn);
- break;
- }
- break;
- }
- }
+ tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
}
-static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
- int pos)
+static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
{
- /* Extract 64 bits from the middle of two concatenated 64 bit
- * vector register slices left:right. The extracted bits start
- * at 'pos' bits into the right (least significant) side.
- * We return the result in tcg_right, and guarantee not to
- * trash tcg_left.
- */
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- assert(pos > 0 && pos < 64);
+ TCGv_i32 t32 = tcg_temp_new_i32();
- tcg_gen_shri_i64(tcg_right, tcg_right, pos);
- tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
- tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
+ tcg_gen_extrl_i64_i32(t32, tcg_rn);
+ tcg_gen_clrsb_i32(t32, t32);
+ tcg_gen_extu_i32_i64(tcg_rd, t32);
}
-/* EXT
- * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
- * +---+---+-------------+-----+---+------+---+------+---+------+------+
- * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
- * +---+---+-------------+-----+---+------+---+------+---+------+------+
- */
-static void disas_simd_ext(DisasContext *s, uint32_t insn)
-{
- int is_q = extract32(insn, 30, 1);
- int op2 = extract32(insn, 22, 2);
- int imm4 = extract32(insn, 11, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- int pos = imm4 << 3;
- TCGv_i64 tcg_resl, tcg_resh;
-
- if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
+TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
+TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
- tcg_resh = tcg_temp_new_i64();
- tcg_resl = tcg_temp_new_i64();
+static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
+{
+ TCGv_i64 tcg_rd, tcg_rn;
- /* Vd gets bits starting at pos bits into Vm:Vn. This is
- * either extracting 128 bits from a 128:128 concatenation, or
- * extracting 64 bits from a 64:64 concatenation.
- */
- if (!is_q) {
- read_vec_element(s, tcg_resl, rn, 0, MO_64);
- if (pos != 0) {
- read_vec_element(s, tcg_resh, rm, 0, MO_64);
- do_ext64(s, tcg_resh, tcg_resl, pos);
+ if (a->z) {
+ if (a->rn != 31) {
+ return false;
}
+ tcg_rn = tcg_constant_i64(0);
} else {
- TCGv_i64 tcg_hh;
- typedef struct {
- int reg;
- int elt;
- } EltPosns;
- EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
- EltPosns *elt = eltposns;
-
- if (pos >= 64) {
- elt++;
- pos -= 64;
- }
-
- read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
- elt++;
- read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
- elt++;
- if (pos != 0) {
- do_ext64(s, tcg_resh, tcg_resl, pos);
- tcg_hh = tcg_temp_new_i64();
- read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
- do_ext64(s, tcg_hh, tcg_resh, pos);
- }
+ tcg_rn = cpu_reg_sp(s, a->rn);
}
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- if (is_q) {
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, a->rd);
+ fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
}
- clear_vec_high(s, is_q, rd);
+ return true;
}
-/* TBL/TBX
- * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
- * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
- * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
- * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
- */
-static void disas_simd_tb(DisasContext *s, uint32_t insn)
-{
- int op2 = extract32(insn, 22, 2);
- int is_q = extract32(insn, 30, 1);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- int is_tbx = extract32(insn, 12, 1);
- int len = (extract32(insn, 13, 2) + 1) * 16;
+TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
+TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
+TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
+TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
- if (op2 != 0) {
- unallocated_encoding(s);
- return;
- }
+TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
+TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
+TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
+TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
- if (!fp_access_check(s)) {
- return;
+static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
+{
+ if (s->pauth_active) {
+ TCGv_i64 tcg_rd = cpu_reg(s, rd);
+ fn(tcg_rd, tcg_env, tcg_rd);
}
-
- tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rm), tcg_env,
- is_q ? 16 : 8, vec_full_reg_size(s),
- (len << 6) | (is_tbx << 5) | rn,
- gen_helper_simd_tblx);
+ return true;
}
-/* ZIP/UZP/TRN
- * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
- * +---+---+-------------+------+---+------+---+------------------+------+
- * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
- * +---+---+-------------+------+---+------+---+------------------+------+
- */
-static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- /* opc field bits [1:0] indicate ZIP/UZP/TRN;
- * bit 2 indicates 1 vs 2 variant of the insn.
- */
- int opcode = extract32(insn, 12, 2);
- bool part = extract32(insn, 14, 1);
- bool is_q = extract32(insn, 30, 1);
- int esize = 8 << size;
- int i;
- int datasize = is_q ? 128 : 64;
- int elements = datasize / esize;
- TCGv_i64 tcg_res[2], tcg_ele;
+TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
+TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
- if (opcode == 0 || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
+static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
+ ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
+{
+ TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
+
+ if (!a->sf && (a->sa & (1 << 5))) {
+ return false;
}
- if (!fp_access_check(s)) {
- return;
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_rn = cpu_reg(s, a->rn);
+
+ tcg_rm = read_cpu_reg(s, a->rm, a->sf);
+ if (a->sa) {
+ shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
}
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
- tcg_ele = tcg_temp_new_i64();
+ (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+ if (setflags) {
+ gen_logic_CC(a->sf, tcg_rd);
+ }
+ return true;
+}
- for (i = 0; i < elements; i++) {
- int o, w;
+static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
+{
+ /*
+ * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
+ * register-register MOV and MVN, so it is worth special casing.
+ */
+ if (a->sa == 0 && a->st == 0 && a->rn == 31) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
- switch (opcode) {
- case 1: /* UZP1/2 */
- {
- int midpoint = elements / 2;
- if (i < midpoint) {
- read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
- } else {
- read_vec_element(s, tcg_ele, rm,
- 2 * (i - midpoint) + part, size);
- }
- break;
- }
- case 2: /* TRN1/2 */
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
- } else {
- read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
+ if (a->n) {
+ tcg_gen_not_i64(tcg_rd, tcg_rm);
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
- break;
- case 3: /* ZIP1/2 */
- {
- int base = part * elements / 2;
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
+ } else {
+ if (a->sf) {
+ tcg_gen_mov_i64(tcg_rd, tcg_rm);
} else {
- read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
}
- break;
- }
- default:
- g_assert_not_reached();
- }
-
- w = (i * esize) / 64;
- o = (i * esize) % 64;
- if (o == 0) {
- tcg_gen_mov_i64(tcg_res[w], tcg_ele);
- } else {
- tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
- tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
}
+ return true;
}
- for (i = 0; i <= is_q; ++i) {
- write_vec_element(s, tcg_res[i], rd, i, MO_64);
- }
- clear_vec_high(s, is_q, rd);
+ return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
}
-/*
- * do_reduction_op helper
- *
- * This mirrors the Reduce() pseudocode in the ARM ARM. It is
- * important for correct NaN propagation that we do these
- * operations in exactly the order specified by the pseudocode.
- *
- * This is a recursive function, TCG temps should be freed by the
- * calling function once it is done with the values.
- */
-static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
- int esize, int size, int vmap, TCGv_ptr fpst)
+TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
+TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
+TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
+
+static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
+ bool sub_op, bool setflags)
{
- if (esize == size) {
- int element;
- MemOp msize = esize == 16 ? MO_16 : MO_32;
- TCGv_i32 tcg_elem;
+ TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
- /* We should have one register left here */
- assert(ctpop8(vmap) == 1);
- element = ctz32(vmap);
- assert(element < 8);
+ if (a->sa > 4) {
+ return false;
+ }
- tcg_elem = tcg_temp_new_i32();
- read_vec_element_i32(s, tcg_elem, rn, element, msize);
- return tcg_elem;
+ /* non-flag setting ops may use SP */
+ if (!setflags) {
+ tcg_rd = cpu_reg_sp(s, a->rd);
} else {
- int bits = size / 2;
- int shift = ctpop8(vmap) / 2;
- int vmap_lo = (vmap >> shift) & vmap;
- int vmap_hi = (vmap & ~vmap_lo);
- TCGv_i32 tcg_hi, tcg_lo, tcg_res;
-
- tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
- tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
- tcg_res = tcg_temp_new_i32();
-
- switch (fpopcode) {
- case 0x0c: /* fmaxnmv half-precision */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x0f: /* fmaxv half-precision */
- gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1c: /* fminnmv half-precision */
- gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1f: /* fminv half-precision */
- gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2c: /* fmaxnmv */
- gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2f: /* fmaxv */
- gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3c: /* fminnmv */
- gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3f: /* fminv */
- gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- return tcg_res;
+ tcg_rd = cpu_reg(s, a->rd);
}
-}
+ tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
-/* AdvSIMD across lanes
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- bool is_q = extract32(insn, 30, 1);
- bool is_u = extract32(insn, 29, 1);
- bool is_fp = false;
- bool is_min = false;
- int esize;
- int elements;
- int i;
- TCGv_i64 tcg_res, tcg_elt;
+ tcg_rm = read_cpu_reg(s, a->rm, a->sf);
+ ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
- switch (opcode) {
- case 0x1b: /* ADDV */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x3: /* SADDLV, UADDLV */
- case 0xa: /* SMAXV, UMAXV */
- case 0x1a: /* SMINV, UMINV */
- if (size == 3 || (size == 2 && !is_q)) {
- unallocated_encoding(s);
- return;
+ tcg_result = tcg_temp_new_i64();
+ if (!setflags) {
+ if (sub_op) {
+ tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
+ } else {
+ tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
}
- break;
- case 0xc: /* FMAXNMV, FMINNMV */
- case 0xf: /* FMAXV, FMINV */
- /* Bit 1 of size field encodes min vs max and the actual size
- * depends on the encoding of the U bit. If not set (and FP16
- * enabled) then we do half-precision float instead of single
- * precision.
- */
- is_min = extract32(size, 1, 1);
- is_fp = true;
- if (!is_u && dc_isar_feature(aa64_fp16, s)) {
- size = 1;
- } else if (!is_u || !is_q || extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
+ } else {
+ if (sub_op) {
+ gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
} else {
- size = 2;
+ gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
}
- break;
- default:
- unallocated_encoding(s);
- return;
}
- if (!fp_access_check(s)) {
- return;
+ if (a->sf) {
+ tcg_gen_mov_i64(tcg_rd, tcg_result);
+ } else {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_result);
}
+ return true;
+}
- esize = 8 << size;
- elements = (is_q ? 128 : 64) / esize;
-
- tcg_res = tcg_temp_new_i64();
- tcg_elt = tcg_temp_new_i64();
-
- /* These instructions operate across all lanes of a vector
- * to produce a single result. We can guarantee that a 64
- * bit intermediate is sufficient:
- * + for [US]ADDLV the maximum element size is 32 bits, and
- * the result type is 64 bits
- * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
- * same as the element size, which is 32 bits at most
- * For the integer operations we can choose to work at 64
- * or 32 bits and truncate at the end; for simplicity
- * we use 64 bits always. The floating point
- * ops do require 32 bit intermediates, though.
- */
- if (!is_fp) {
- read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
-
- for (i = 1; i < elements; i++) {
- read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
-
- switch (opcode) {
- case 0x03: /* SADDLV / UADDLV */
- case 0x1b: /* ADDV */
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
- break;
- case 0x0a: /* SMAXV / UMAXV */
- if (is_u) {
- tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- case 0x1a: /* SMINV / UMINV */
- if (is_u) {
- tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- default:
- g_assert_not_reached();
- }
+TRANS(ADD_ext, do_addsub_ext, a, false, false)
+TRANS(SUB_ext, do_addsub_ext, a, true, false)
+TRANS(ADDS_ext, do_addsub_ext, a, false, true)
+TRANS(SUBS_ext, do_addsub_ext, a, true, true)
- }
- } else {
- /* Floating point vector reduction ops which work across 32
- * bit (single) or 16 bit (half-precision) intermediates.
- * Note that correct NaN propagation requires that we do these
- * operations in exactly the order specified by the pseudocode.
- */
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- int fpopcode = opcode | is_min << 4 | is_u << 5;
- int vmap = (1 << elements) - 1;
- TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
- (is_q ? 128 : 64), vmap, fpst);
- tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
- }
+static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
+ bool sub_op, bool setflags)
+{
+ TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
- /* Now truncate the result to the width required for the final output */
- if (opcode == 0x03) {
- /* SADDLV, UADDLV: result is 2*esize */
- size++;
+ if (a->st == 3 || (!a->sf && (a->sa & 32))) {
+ return false;
}
- switch (size) {
- case 0:
- tcg_gen_ext8u_i64(tcg_res, tcg_res);
- break;
- case 1:
- tcg_gen_ext16u_i64(tcg_res, tcg_res);
- break;
- case 2:
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- break;
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_rn = read_cpu_reg(s, a->rn, a->sf);
+ tcg_rm = read_cpu_reg(s, a->rm, a->sf);
- write_fp_dreg(s, rd, tcg_res);
-}
+ shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
-/* AdvSIMD modified immediate
- * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
- * +---+---+----+---------------------+-----+-------+----+---+-------+------+
- * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
- * +---+---+----+---------------------+-----+-------+----+---+-------+------+
- *
- * There are a number of operations that can be carried out here:
- * MOVI - move (shifted) imm into register
- * MVNI - move inverted (shifted) imm into register
- * ORR - bitwise OR of (shifted) imm with register
- * BIC - bitwise clear of (shifted) imm with register
- * With ARMv8.2 we also have:
- * FMOV half-precision
- */
-static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int cmode = extract32(insn, 12, 4);
- int o2 = extract32(insn, 11, 1);
- uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
- bool is_neg = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
- uint64_t imm = 0;
-
- if (o2) {
- if (cmode != 0xf || is_neg) {
- unallocated_encoding(s);
- return;
- }
- /* FMOV (vector, immediate) - half-precision */
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
+ tcg_result = tcg_temp_new_i64();
+ if (!setflags) {
+ if (sub_op) {
+ tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
+ } else {
+ tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
}
- imm = vfp_expand_imm(MO_16, abcdefgh);
- /* now duplicate across the lanes */
- imm = dup_const(MO_16, imm);
} else {
- if (cmode == 0xf && is_neg && !is_q) {
- unallocated_encoding(s);
- return;
+ if (sub_op) {
+ gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
+ } else {
+ gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
}
- imm = asimd_imm_const(abcdefgh, cmode, is_neg);
- }
-
- if (!fp_access_check(s)) {
- return;
}
- if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
- /* MOVI or MVNI, with MVNI negation handled above. */
- tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
- vec_full_reg_size(s), imm);
+ if (a->sf) {
+ tcg_gen_mov_i64(tcg_rd, tcg_result);
} else {
- /* ORR or BIC, with BIC negation to AND handled above. */
- if (is_neg) {
- gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
- }
+ tcg_gen_ext32u_i64(tcg_rd, tcg_result);
}
+ return true;
}
-/*
- * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
- *
- * This code is handles the common shifting code and is used by both
- * the vector and scalar code.
- */
-static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
- TCGv_i64 tcg_rnd, bool accumulate,
- bool is_u, int size, int shift)
-{
- bool extended_result = false;
- bool round = tcg_rnd != NULL;
- int ext_lshift = 0;
- TCGv_i64 tcg_src_hi;
-
- if (round && size == 3) {
- extended_result = true;
- ext_lshift = 64 - shift;
- tcg_src_hi = tcg_temp_new_i64();
- } else if (shift == 64) {
- if (!accumulate && is_u) {
- /* result is zero */
- tcg_gen_movi_i64(tcg_res, 0);
- return;
- }
- }
+TRANS(ADD_r, do_addsub_reg, a, false, false)
+TRANS(SUB_r, do_addsub_reg, a, true, false)
+TRANS(ADDS_r, do_addsub_reg, a, false, true)
+TRANS(SUBS_r, do_addsub_reg, a, true, true)
- /* Deal with the rounding step */
- if (round) {
- if (extended_result) {
- TCGv_i64 tcg_zero = tcg_constant_i64(0);
- if (!is_u) {
- /* take care of sign extending tcg_res */
- tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
- tcg_gen_add2_i64(tcg_src, tcg_src_hi,
- tcg_src, tcg_src_hi,
- tcg_rnd, tcg_zero);
- } else {
- tcg_gen_add2_i64(tcg_src, tcg_src_hi,
- tcg_src, tcg_zero,
- tcg_rnd, tcg_zero);
- }
- } else {
- tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
- }
+static bool do_mulh(DisasContext *s, arg_rrr *a,
+ void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 discard = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+ TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
+
+ fn(discard, tcg_rd, tcg_rn, tcg_rm);
+ return true;
+}
+
+TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
+TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
+
+static bool do_muladd(DisasContext *s, arg_rrrr *a,
+ bool sf, bool is_sub, MemOp mop)
+{
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_op1, tcg_op2;
+
+ if (mop == MO_64) {
+ tcg_op1 = cpu_reg(s, a->rn);
+ tcg_op2 = cpu_reg(s, a->rm);
+ } else {
+ tcg_op1 = tcg_temp_new_i64();
+ tcg_op2 = tcg_temp_new_i64();
+ tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
+ tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
}
- /* Now do the shift right */
- if (round && extended_result) {
- /* extended case, >64 bit precision required */
- if (ext_lshift == 0) {
- /* special case, only high bits matter */
- tcg_gen_mov_i64(tcg_src, tcg_src_hi);
- } else {
- tcg_gen_shri_i64(tcg_src, tcg_src, shift);
- tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
- tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
- }
+ if (a->ra == 31 && !is_sub) {
+ /* Special-case MADD with rA == XZR; it is the standard MUL alias */
+ tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
} else {
- if (is_u) {
- if (shift == 64) {
- /* essentially shifting in 64 zeros */
- tcg_gen_movi_i64(tcg_src, 0);
- } else {
- tcg_gen_shri_i64(tcg_src, tcg_src, shift);
- }
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+ TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
+
+ tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
+ if (is_sub) {
+ tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
} else {
- if (shift == 64) {
- /* effectively extending the sign-bit */
- tcg_gen_sari_i64(tcg_src, tcg_src, 63);
- } else {
- tcg_gen_sari_i64(tcg_src, tcg_src, shift);
- }
+ tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
}
}
- if (accumulate) {
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
- } else {
- tcg_gen_mov_i64(tcg_res, tcg_src);
+ if (!sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
+ return true;
}
-/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
-static void handle_scalar_simd_shri(DisasContext *s,
- bool is_u, int immh, int immb,
- int opcode, int rn, int rd)
-{
- const int size = 3;
- int immhb = immh << 3 | immb;
- int shift = 2 * (8 << size) - immhb;
- bool accumulate = false;
- bool round = false;
- bool insert = false;
- TCGv_i64 tcg_rn;
- TCGv_i64 tcg_rd;
- TCGv_i64 tcg_round;
+TRANS(MADD_w, do_muladd, a, false, false, MO_64)
+TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
+TRANS(MADD_x, do_muladd, a, true, false, MO_64)
+TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
- if (!extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
+TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
+TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
+TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
+TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
- if (!fp_access_check(s)) {
- return;
- }
+static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
+ bool is_sub, bool setflags)
+{
+ TCGv_i64 tcg_y, tcg_rn, tcg_rd;
- switch (opcode) {
- case 0x02: /* SSRA / USRA (accumulate) */
- accumulate = true;
- break;
- case 0x04: /* SRSHR / URSHR (rounding) */
- round = true;
- break;
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- accumulate = round = true;
- break;
- case 0x08: /* SRI */
- insert = true;
- break;
- }
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_rn = cpu_reg(s, a->rn);
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
+ if (is_sub) {
+ tcg_y = tcg_temp_new_i64();
+ tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
} else {
- tcg_round = NULL;
+ tcg_y = cpu_reg(s, a->rm);
}
- tcg_rn = read_fp_dreg(s, rn);
- tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
-
- if (insert) {
- /* shift count same as element size is valid but does nothing;
- * special case to avoid potential shift by 64.
- */
- int esize = 8 << size;
- if (shift != esize) {
- tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
- }
+ if (setflags) {
+ gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
} else {
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
+ gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
}
-
- write_fp_dreg(s, rd, tcg_rd);
+ return true;
}
-/* SHL/SLI - Scalar shift left */
-static void handle_scalar_simd_shli(DisasContext *s, bool insert,
- int immh, int immb, int opcode,
- int rn, int rd)
+TRANS(ADC, do_adc_sbc, a, false, false)
+TRANS(SBC, do_adc_sbc, a, true, false)
+TRANS(ADCS, do_adc_sbc, a, false, true)
+TRANS(SBCS, do_adc_sbc, a, true, true)
+
+static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
+ int mask = a->mask;
TCGv_i64 tcg_rn;
- TCGv_i64 tcg_rd;
+ TCGv_i32 nzcv;
- if (!extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
+ if (!dc_isar_feature(aa64_condm_4, s)) {
+ return false;
}
- if (!fp_access_check(s)) {
- return;
+ tcg_rn = read_cpu_reg(s, a->rn, 1);
+ tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
+
+ nzcv = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
+
+ if (mask & 8) { /* N */
+ tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
}
+ if (mask & 4) { /* Z */
+ tcg_gen_not_i32(cpu_ZF, nzcv);
+ tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
+ }
+ if (mask & 2) { /* C */
+ tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
+ }
+ if (mask & 1) { /* V */
+ tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
+ }
+ return true;
+}
- tcg_rn = read_fp_dreg(s, rn);
- tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+static bool do_setf(DisasContext *s, int rn, int shift)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
- if (insert) {
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
- } else {
- tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
- }
-
- write_fp_dreg(s, rd, tcg_rd);
-}
-
-/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
- * (signed/unsigned) narrowing */
-static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
- bool is_u_shift, bool is_u_narrow,
- int immh, int immb, int opcode,
- int rn, int rd)
-{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int esize = 8 << size;
- int shift = (2 * esize) - immhb;
- int elements = is_scalar ? 1 : (64 / esize);
- bool round = extract32(opcode, 0, 1);
- MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
- TCGv_i64 tcg_rn, tcg_rd, tcg_round;
- TCGv_i32 tcg_rd_narrowed;
- TCGv_i64 tcg_final;
-
- static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
- { gen_helper_neon_narrow_sat_s8,
- gen_helper_neon_unarrow_sat8 },
- { gen_helper_neon_narrow_sat_s16,
- gen_helper_neon_unarrow_sat16 },
- { gen_helper_neon_narrow_sat_s32,
- gen_helper_neon_unarrow_sat32 },
- { NULL, NULL },
- };
- static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
- gen_helper_neon_narrow_sat_u8,
- gen_helper_neon_narrow_sat_u16,
- gen_helper_neon_narrow_sat_u32,
- NULL
- };
- NeonGenNarrowEnvFn *narrowfn;
+ tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
+ tcg_gen_shli_i32(cpu_NF, tmp, shift);
+ tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
+ return true;
+}
- int i;
+TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
+TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
- assert(size < 4);
+/* CCMP, CCMN */
+static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
+{
+ TCGv_i32 tcg_t0 = tcg_temp_new_i32();
+ TCGv_i32 tcg_t1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_t2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+ TCGv_i64 tcg_rn, tcg_y;
+ DisasCompare c;
+ unsigned nzcv;
+ bool has_andc;
- if (extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
+ /* Set T0 = !COND. */
+ arm_test_cc(&c, a->cond);
+ tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
- if (!fp_access_check(s)) {
- return;
+ /* Load the arguments for the new comparison. */
+ if (a->imm) {
+ tcg_y = tcg_constant_i64(a->y);
+ } else {
+ tcg_y = cpu_reg(s, a->y);
}
+ tcg_rn = cpu_reg(s, a->rn);
- if (is_u_shift) {
- narrowfn = unsigned_narrow_fns[size];
+ /* Set the flags for the new comparison. */
+ if (a->op) {
+ gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
} else {
- narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
+ gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
}
- tcg_rn = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- tcg_rd_narrowed = tcg_temp_new_i32();
- tcg_final = tcg_temp_new_i64();
+ /*
+ * If COND was false, force the flags to #nzcv. Compute two masks
+ * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
+ * For tcg hosts that support ANDC, we can make do with just T1.
+ * In either case, allow the tcg optimizer to delete any unused mask.
+ */
+ tcg_gen_neg_i32(tcg_t1, tcg_t0);
+ tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
+ nzcv = a->nzcv;
+ has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0);
+ if (nzcv & 8) { /* N */
+ tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
} else {
- tcg_round = NULL;
+ if (has_andc) {
+ tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
+ } else {
+ tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
+ }
}
-
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, ldop);
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- false, is_u_shift, size+1, shift);
- narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
- if (i == 0) {
- tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
+ if (nzcv & 4) { /* Z */
+ if (has_andc) {
+ tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
} else {
- tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+ tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
}
+ } else {
+ tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
}
-
- if (!is_q) {
- write_vec_element(s, tcg_final, rd, 0, MO_64);
+ if (nzcv & 2) { /* C */
+ tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
+ } else {
+ if (has_andc) {
+ tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
+ } else {
+ tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
+ }
+ }
+ if (nzcv & 1) { /* V */
+ tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
} else {
- write_vec_element(s, tcg_final, rd, 1, MO_64);
+ if (has_andc) {
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
+ } else {
+ tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
+ }
}
- clear_vec_high(s, is_q, rd);
+ return true;
}
-/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
-static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
- bool src_unsigned, bool dst_unsigned,
- int immh, int immb, int rn, int rd)
+static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int shift = immhb - (8 << size);
- int pass;
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 zero = tcg_constant_i64(0);
+ DisasCompare64 c;
- assert(immh != 0);
- assert(!(scalar && is_q));
+ a64_test_cc(&c, a->cond);
- if (!scalar) {
- if (!is_q && extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
+ if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
+ /* CSET & CSETM. */
+ if (a->else_inv) {
+ tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
+ tcg_rd, c.value, zero);
+ } else {
+ tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
+ tcg_rd, c.value, zero);
}
+ } else {
+ TCGv_i64 t_true = cpu_reg(s, a->rn);
+ TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
- /* Since we use the variable-shift helpers we must
- * replicate the shift count into each element of
- * the tcg_shift value.
- */
- switch (size) {
- case 0:
- shift |= shift << 8;
- /* fall through */
- case 1:
- shift |= shift << 16;
- break;
- case 2:
- case 3:
- break;
- default:
- g_assert_not_reached();
+ if (a->else_inv && a->else_inc) {
+ tcg_gen_neg_i64(t_false, t_false);
+ } else if (a->else_inv) {
+ tcg_gen_not_i64(t_false, t_false);
+ } else if (a->else_inc) {
+ tcg_gen_addi_i64(t_false, t_false, 1);
}
+ tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
}
- if (!fp_access_check(s)) {
- return;
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
+ return true;
+}
- if (size == 3) {
- TCGv_i64 tcg_shift = tcg_constant_i64(shift);
- static NeonGenTwo64OpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
- { NULL, gen_helper_neon_qshl_u64 },
- };
- NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
- int maxpass = is_q ? 2 : 1;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
+typedef struct FPScalar1Int {
+ void (*gen_h)(TCGv_i32, TCGv_i32);
+ void (*gen_s)(TCGv_i32, TCGv_i32);
+ void (*gen_d)(TCGv_i64, TCGv_i64);
+} FPScalar1Int;
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- write_vec_element(s, tcg_op, rd, pass, MO_64);
- }
- clear_vec_high(s, is_q, rd);
- } else {
- TCGv_i32 tcg_shift = tcg_constant_i32(shift);
- static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
- {
- { gen_helper_neon_qshl_s8,
- gen_helper_neon_qshl_s16,
- gen_helper_neon_qshl_s32 },
- { gen_helper_neon_qshlu_s8,
- gen_helper_neon_qshlu_s16,
- gen_helper_neon_qshlu_s32 }
- }, {
- { NULL, NULL, NULL },
- { gen_helper_neon_qshl_u8,
- gen_helper_neon_qshl_u16,
- gen_helper_neon_qshl_u32 }
+static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
+ const FPScalar1Int *f,
+ bool merging)
+{
+ switch (a->esz) {
+ case MO_64:
+ if (fp_access_check(s)) {
+ TCGv_i64 t = read_fp_dreg(s, a->rn);
+ f->gen_d(t, t);
+ if (merging) {
+ write_fp_dreg_merging(s, a->rd, a->rd, t);
+ } else {
+ write_fp_dreg(s, a->rd, t);
}
- };
- NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
- MemOp memop = scalar ? size : MO_32;
- int maxpass = scalar ? 1 : is_q ? 4 : 2;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, memop);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- if (scalar) {
- switch (size) {
- case 0:
- tcg_gen_ext8u_i32(tcg_op, tcg_op);
- break;
- case 1:
- tcg_gen_ext16u_i32(tcg_op, tcg_op);
- break;
- case 2:
- break;
- default:
- g_assert_not_reached();
- }
- write_fp_sreg(s, rd, tcg_op);
+ }
+ break;
+ case MO_32:
+ if (fp_access_check(s)) {
+ TCGv_i32 t = read_fp_sreg(s, a->rn);
+ f->gen_s(t, t);
+ if (merging) {
+ write_fp_sreg_merging(s, a->rd, a->rd, t);
} else {
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ write_fp_sreg(s, a->rd, t);
}
}
-
- if (!scalar) {
- clear_vec_high(s, is_q, rd);
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
}
+ if (fp_access_check(s)) {
+ TCGv_i32 t = read_fp_hreg(s, a->rn);
+ f->gen_h(t, t);
+ if (merging) {
+ write_fp_hreg_merging(s, a->rd, a->rd, t);
+ } else {
+ write_fp_sreg(s, a->rd, t);
+ }
+ }
+ break;
+ default:
+ return false;
}
+ return true;
}
-/* Common vector code for handling integer to FP conversion */
-static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
- int elements, int is_signed,
- int fracbits, int size)
+static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
+ const FPScalar1Int *fnormal,
+ const FPScalar1Int *fah)
{
- TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- TCGv_i32 tcg_shift = NULL;
+ return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
+}
- MemOp mop = size | (is_signed ? MO_SIGN : 0);
- int pass;
+static const FPScalar1Int f_scalar_fmov = {
+ tcg_gen_mov_i32,
+ tcg_gen_mov_i32,
+ tcg_gen_mov_i64,
+};
+TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
- if (fracbits || size == MO_64) {
- tcg_shift = tcg_constant_i32(fracbits);
- }
+static const FPScalar1Int f_scalar_fabs = {
+ gen_vfp_absh,
+ gen_vfp_abss,
+ gen_vfp_absd,
+};
+static const FPScalar1Int f_scalar_ah_fabs = {
+ gen_vfp_ah_absh,
+ gen_vfp_ah_abss,
+ gen_vfp_ah_absd,
+};
+TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
- if (size == MO_64) {
- TCGv_i64 tcg_int64 = tcg_temp_new_i64();
- TCGv_i64 tcg_double = tcg_temp_new_i64();
+static const FPScalar1Int f_scalar_fneg = {
+ gen_vfp_negh,
+ gen_vfp_negs,
+ gen_vfp_negd,
+};
+static const FPScalar1Int f_scalar_ah_fneg = {
+ gen_vfp_ah_negh,
+ gen_vfp_ah_negs,
+ gen_vfp_ah_negd,
+};
+TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
- for (pass = 0; pass < elements; pass++) {
- read_vec_element(s, tcg_int64, rn, pass, mop);
+typedef struct FPScalar1 {
+ void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
+ void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
+ void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
+} FPScalar1;
- if (is_signed) {
- gen_helper_vfp_sqtod(tcg_double, tcg_int64,
- tcg_shift, tcg_fpst);
- } else {
- gen_helper_vfp_uqtod(tcg_double, tcg_int64,
- tcg_shift, tcg_fpst);
- }
- if (elements == 1) {
- write_fp_dreg(s, rd, tcg_double);
- } else {
- write_vec_element(s, tcg_double, rd, pass, MO_64);
- }
- }
- } else {
- TCGv_i32 tcg_int32 = tcg_temp_new_i32();
- TCGv_i32 tcg_float = tcg_temp_new_i32();
-
- for (pass = 0; pass < elements; pass++) {
- read_vec_element_i32(s, tcg_int32, rn, pass, mop);
-
- switch (size) {
- case MO_32:
- if (fracbits) {
- if (is_signed) {
- gen_helper_vfp_sltos(tcg_float, tcg_int32,
- tcg_shift, tcg_fpst);
- } else {
- gen_helper_vfp_ultos(tcg_float, tcg_int32,
- tcg_shift, tcg_fpst);
- }
- } else {
- if (is_signed) {
- gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
- } else {
- gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
- }
- }
- break;
- case MO_16:
- if (fracbits) {
- if (is_signed) {
- gen_helper_vfp_sltoh(tcg_float, tcg_int32,
- tcg_shift, tcg_fpst);
- } else {
- gen_helper_vfp_ultoh(tcg_float, tcg_int32,
- tcg_shift, tcg_fpst);
- }
- } else {
- if (is_signed) {
- gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
- } else {
- gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
+static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
+ const FPScalar1 *f, int rmode,
+ ARMFPStatusFlavour fpsttype)
+{
+ TCGv_i32 tcg_rmode = NULL;
+ TCGv_ptr fpst;
+ TCGv_i64 t64;
+ TCGv_i32 t32;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- if (elements == 1) {
- write_fp_sreg(s, rd, tcg_float);
- } else {
- write_vec_element_i32(s, tcg_float, rd, pass, size);
- }
- }
+ if (check <= 0) {
+ return check == 0;
}
- clear_vec_high(s, elements << size == 16, rd);
-}
-
-/* UCVTF/SCVTF - Integer to FP conversion */
-static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
- bool is_q, bool is_u,
- int immh, int immb, int opcode,
- int rn, int rd)
-{
- int size, elements, fracbits;
- int immhb = immh << 3 | immb;
-
- if (immh & 8) {
- size = MO_64;
- if (!is_scalar && !is_q) {
- unallocated_encoding(s);
- return;
- }
- } else if (immh & 4) {
- size = MO_32;
- } else if (immh & 2) {
- size = MO_16;
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
- } else {
- /* immh == 0 would be a failure of the decode logic */
- g_assert(immh == 1);
- unallocated_encoding(s);
- return;
+ fpst = fpstatus_ptr(fpsttype);
+ if (rmode >= 0) {
+ tcg_rmode = gen_set_rmode(rmode, fpst);
}
- if (is_scalar) {
- elements = 1;
- } else {
- elements = (8 << is_q) >> size;
+ switch (a->esz) {
+ case MO_64:
+ t64 = read_fp_dreg(s, a->rn);
+ f->gen_d(t64, t64, fpst);
+ write_fp_dreg_merging(s, a->rd, a->rd, t64);
+ break;
+ case MO_32:
+ t32 = read_fp_sreg(s, a->rn);
+ f->gen_s(t32, t32, fpst);
+ write_fp_sreg_merging(s, a->rd, a->rd, t32);
+ break;
+ case MO_16:
+ t32 = read_fp_hreg(s, a->rn);
+ f->gen_h(t32, t32, fpst);
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
+ break;
+ default:
+ g_assert_not_reached();
}
- fracbits = (16 << size) - immhb;
- if (!fp_access_check(s)) {
- return;
+ if (rmode >= 0) {
+ gen_restore_rmode(tcg_rmode, fpst);
}
+ return true;
+}
- handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
+static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
+ const FPScalar1 *f, int rmode)
+{
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
+ a->esz == MO_16 ?
+ FPST_A64_F16 : FPST_A64);
}
-/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
-static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
- bool is_q, bool is_u,
- int immh, int immb, int rn, int rd)
+static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
+ const FPScalar1 *f, int rmode)
{
- int immhb = immh << 3 | immb;
- int pass, size, fracbits;
- TCGv_ptr tcg_fpstatus;
- TCGv_i32 tcg_rmode, tcg_shift;
+ return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
+}
- if (immh & 0x8) {
- size = MO_64;
- if (!is_scalar && !is_q) {
- unallocated_encoding(s);
- return;
- }
- } else if (immh & 0x4) {
- size = MO_32;
- } else if (immh & 0x2) {
- size = MO_16;
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
- } else {
- /* Should have split out AdvSIMD modified immediate earlier. */
- assert(immh == 1);
- unallocated_encoding(s);
- return;
- }
+static const FPScalar1 f_scalar_fsqrt = {
+ gen_helper_vfp_sqrth,
+ gen_helper_vfp_sqrts,
+ gen_helper_vfp_sqrtd,
+};
+TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
- if (!fp_access_check(s)) {
- return;
- }
+static const FPScalar1 f_scalar_frint = {
+ gen_helper_advsimd_rinth,
+ gen_helper_rints,
+ gen_helper_rintd,
+};
+TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
+TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
+TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
+TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
+TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
+TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
+
+static const FPScalar1 f_scalar_frintx = {
+ gen_helper_advsimd_rinth_exact,
+ gen_helper_rints_exact,
+ gen_helper_rintd_exact,
+};
+TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
- assert(!(is_scalar && is_q));
+static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
+{
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
+ TCGv_i32 t32;
+ int check;
- tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
- fracbits = (16 << size) - immhb;
- tcg_shift = tcg_constant_i32(fracbits);
+ if (!dc_isar_feature(aa64_bf16, s)) {
+ return false;
+ }
- if (size == MO_64) {
- int maxpass = is_scalar ? 1 : 2;
+ check = fp_access_check_scalar_hsd(s, a->esz);
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
+ if (check <= 0) {
+ return check == 0;
+ }
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- if (is_u) {
- gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
- } else {
- gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
- }
- write_vec_element(s, tcg_op, rd, pass, MO_64);
- }
- clear_vec_high(s, is_q, rd);
- } else {
- void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
- int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
+ t32 = read_fp_sreg(s, a->rn);
+ gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
+ write_fp_hreg_merging(s, a->rd, a->rd, t32);
+ return true;
+}
- switch (size) {
- case MO_16:
- if (is_u) {
- fn = gen_helper_vfp_touhh;
- } else {
- fn = gen_helper_vfp_toshh;
- }
- break;
- case MO_32:
- if (is_u) {
- fn = gen_helper_vfp_touls;
- } else {
- fn = gen_helper_vfp_tosls;
- }
- break;
- default:
- g_assert_not_reached();
- }
+static const FPScalar1 f_scalar_frint32 = {
+ NULL,
+ gen_helper_frint32_s,
+ gen_helper_frint32_d,
+};
+TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
+ &f_scalar_frint32, FPROUNDING_ZERO)
+TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
+
+static const FPScalar1 f_scalar_frint64 = {
+ NULL,
+ gen_helper_frint64_s,
+ gen_helper_frint64_d,
+};
+TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
+ &f_scalar_frint64, FPROUNDING_ZERO)
+TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
+
+static const FPScalar1 f_scalar_frecpe = {
+ gen_helper_recpe_f16,
+ gen_helper_recpe_f32,
+ gen_helper_recpe_f64,
+};
+static const FPScalar1 f_scalar_frecpe_rpres = {
+ gen_helper_recpe_f16,
+ gen_helper_recpe_rpres_f32,
+ gen_helper_recpe_f64,
+};
+TRANS(FRECPE_s, do_fp1_scalar_ah, a,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
+ &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
+
+static const FPScalar1 f_scalar_frecpx = {
+ gen_helper_frecpx_f16,
+ gen_helper_frecpx_f32,
+ gen_helper_frecpx_f64,
+};
+TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
+static const FPScalar1 f_scalar_frsqrte = {
+ gen_helper_rsqrte_f16,
+ gen_helper_rsqrte_f32,
+ gen_helper_rsqrte_f64,
+};
+static const FPScalar1 f_scalar_frsqrte_rpres = {
+ gen_helper_rsqrte_f16,
+ gen_helper_rsqrte_rpres_f32,
+ gen_helper_rsqrte_f64,
+};
+TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
+ &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
- read_vec_element_i32(s, tcg_op, rn, pass, size);
- fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
- if (is_scalar) {
- if (size == MO_16 && !is_u) {
- tcg_gen_ext16u_i32(tcg_op, tcg_op);
- }
- write_fp_sreg(s, rd, tcg_op);
- } else {
- write_vec_element_i32(s, tcg_op, rd, pass, size);
- }
- }
- if (!is_scalar) {
- clear_vec_high(s, is_q, rd);
- }
- }
+static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
- gen_restore_rmode(tcg_rmode, tcg_fpstatus);
+ gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
+ }
+ return true;
}
-/* AdvSIMD scalar shift by immediate
- * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
- * +-----+---+-------------+------+------+--------+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
- * +-----+---+-------------+------+------+--------+---+------+------+
- *
- * This is the scalar version so it works on a fixed sized registers
- */
-static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
+static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 5);
- int immb = extract32(insn, 16, 3);
- int immh = extract32(insn, 19, 4);
- bool is_u = extract32(insn, 29, 1);
+ if (fp_access_check(s)) {
+ TCGv_i32 tmp = read_fp_sreg(s, a->rn);
+ TCGv_i32 ahp = get_ahp_flag();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
- if (immh == 0) {
- unallocated_encoding(s);
- return;
- }
-
- switch (opcode) {
- case 0x08: /* SRI */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x00: /* SSHR / USHR */
- case 0x02: /* SSRA / USRA */
- case 0x04: /* SRSHR / URSHR */
- case 0x06: /* SRSRA / URSRA */
- handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x0a: /* SHL / SLI */
- handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x1c: /* SCVTF, UCVTF */
- handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
- opcode, rn, rd);
- break;
- case 0x10: /* SQSHRUN, SQSHRUN2 */
- case 0x11: /* SQRSHRUN, SQRSHRUN2 */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_vec_simd_sqshrn(s, true, false, false, true,
- immh, immb, opcode, rn, rd);
- break;
- case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
- case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
- handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
- immh, immb, opcode, rn, rd);
- break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
- break;
- case 0x1f: /* FCVTZS, FCVTZU */
- handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
- break;
- default:
- unallocated_encoding(s);
- break;
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+ /* write_fp_hreg_merging is OK here because top half of result is zero */
+ write_fp_hreg_merging(s, a->rd, a->rd, tmp);
}
+ return true;
}
-/* AdvSIMD scalar three different
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +-----+---+-----------+------+---+------+--------+-----+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
- * +-----+---+-----------+------+---+------+--------+-----+------+------+
- */
-static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
+static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
{
- bool is_u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
+ TCGv_i32 tcg_rd = tcg_temp_new_i32();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
- if (is_u) {
- unallocated_encoding(s);
- return;
+ gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
}
+ return true;
+}
- switch (opcode) {
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- case 0xd: /* SQDMULL, SQDMULL2 */
- if (size == 0 || size == 3) {
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- unallocated_encoding(s);
- return;
- }
+static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
+ TCGv_i32 tcg_rd = tcg_temp_new_i32();
+ TCGv_i32 ahp = get_ahp_flag();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
- if (!fp_access_check(s)) {
- return;
+ gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
+ /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
+ write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
}
+ return true;
+}
- if (size == 2) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
- read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
-
- tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
-
- switch (opcode) {
- case 0xd: /* SQDMULL, SQDMULL2 */
- break;
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- tcg_gen_neg_i64(tcg_res, tcg_res);
- /* fall through */
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- read_vec_element(s, tcg_op1, rd, 0, MO_64);
- gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
- tcg_res, tcg_op1);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
- } else {
- TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
- TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
-
- switch (opcode) {
- case 0xd: /* SQDMULL, SQDMULL2 */
- break;
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- gen_helper_neon_negl_u32(tcg_res, tcg_res);
- /* fall through */
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- {
- TCGv_i64 tcg_op3 = tcg_temp_new_i64();
- read_vec_element(s, tcg_op3, rd, 0, MO_32);
- gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
- tcg_res, tcg_op3);
- break;
- }
- default:
- g_assert_not_reached();
- }
+static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
+ TCGv_i32 tcg_rd = tcg_temp_new_i32();
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
+ TCGv_i32 tcg_ahp = get_ahp_flag();
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- write_fp_dreg(s, rd, tcg_res);
+ gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
+ write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
}
+ return true;
}
-/* AdvSIMD scalar three same extra
- * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
- * +-----+---+-----------+------+---+------+---+--------+---+----+----+
- * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
- * +-----+---+-----------+------+---+------+---+--------+---+----+----+
- */
-static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
- uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 4);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- TCGv_i32 ele1, ele2, ele3;
- TCGv_i64 res;
- bool feature;
-
- switch (u * 16 + opcode) {
- case 0x10: /* SQRDMLAH (vector) */
- case 0x11: /* SQRDMLSH (vector) */
- if (size != 1 && size != 2) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_rdm, s);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
+static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
+ TCGv_i32 tcg_ahp = get_ahp_flag();
- /* Do a single operation on the lowest element in the vector.
- * We use the standard Neon helpers and rely on 0 OP 0 == 0
- * with no side effects for all these operations.
- * OPTME: special-purpose helpers would avoid doing some
- * unnecessary work in the helper for the 16 bit cases.
- */
- ele1 = tcg_temp_new_i32();
- ele2 = tcg_temp_new_i32();
- ele3 = tcg_temp_new_i32();
-
- read_vec_element_i32(s, ele1, rn, 0, size);
- read_vec_element_i32(s, ele2, rm, 0, size);
- read_vec_element_i32(s, ele3, rd, 0, size);
-
- switch (opcode) {
- case 0x0: /* SQRDMLAH */
- if (size == 1) {
- gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3);
- } else {
- gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3);
- }
- break;
- case 0x1: /* SQRDMLSH */
- if (size == 1) {
- gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3);
- } else {
- gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3);
- }
- break;
- default:
- g_assert_not_reached();
+ gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
+ write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
}
-
- res = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(res, ele3);
- write_fp_dreg(s, rd, res);
+ return true;
}
-static void handle_2misc_64(DisasContext *s, int opcode, bool u,
- TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
- TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
+static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
+ TCGv_i64 tcg_int, bool is_signed)
{
- /* Handle 64->64 opcodes which are shared between the scalar and
- * vector 2-reg-misc groups. We cover every integer opcode where size == 3
- * is valid in either group and also the double-precision fp ops.
- * The caller only need provide tcg_rmode and tcg_fpstatus if the op
- * requires them.
- */
- TCGCond cond;
+ TCGv_ptr tcg_fpstatus;
+ TCGv_i32 tcg_shift, tcg_single;
+ TCGv_i64 tcg_double;
+
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+ tcg_shift = tcg_constant_i32(shift);
- switch (opcode) {
- case 0x4: /* CLS, CLZ */
- if (u) {
- tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
+ switch (esz) {
+ case MO_64:
+ tcg_double = tcg_temp_new_i64();
+ if (is_signed) {
+ gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
} else {
- tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
+ gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
}
+ write_fp_dreg_merging(s, rd, rd, tcg_double);
break;
- case 0x5: /* NOT */
- /* This opcode is shared with CNT and RBIT but we have earlier
- * enforced that size == 3 if and only if this is the NOT insn.
- */
- tcg_gen_not_i64(tcg_rd, tcg_rn);
- break;
- case 0x7: /* SQABS, SQNEG */
- if (u) {
- gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
+
+ case MO_32:
+ tcg_single = tcg_temp_new_i32();
+ if (is_signed) {
+ gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
} else {
- gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
+ gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
}
+ write_fp_sreg_merging(s, rd, rd, tcg_single);
break;
- case 0xa: /* CMLT */
- cond = TCG_COND_LT;
- do_cmop:
- /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
- tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
- break;
- case 0x8: /* CMGT, CMGE */
- cond = u ? TCG_COND_GE : TCG_COND_GT;
- goto do_cmop;
- case 0x9: /* CMEQ, CMLE */
- cond = u ? TCG_COND_LE : TCG_COND_EQ;
- goto do_cmop;
- case 0xb: /* ABS, NEG */
- if (u) {
- tcg_gen_neg_i64(tcg_rd, tcg_rn);
+
+ case MO_16:
+ tcg_single = tcg_temp_new_i32();
+ if (is_signed) {
+ gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
} else {
- tcg_gen_abs_i64(tcg_rd, tcg_rn);
+ gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
}
+ write_fp_hreg_merging(s, rd, rd, tcg_single);
break;
- case 0x2f: /* FABS */
- gen_vfp_absd(tcg_rd, tcg_rn);
- break;
- case 0x6f: /* FNEG */
- gen_vfp_negd(tcg_rd, tcg_rn);
- break;
- case 0x7f: /* FSQRT */
- gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
- break;
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
- break;
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
- break;
- case 0x18: /* FRINTN */
- case 0x19: /* FRINTM */
- case 0x38: /* FRINTP */
- case 0x39: /* FRINTZ */
- case 0x58: /* FRINTA */
- case 0x79: /* FRINTI */
- gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
- break;
- case 0x59: /* FRINTX */
- gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
- break;
- case 0x1e: /* FRINT32Z */
- case 0x5e: /* FRINT32X */
- gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
- break;
- case 0x1f: /* FRINT64Z */
- case 0x5f: /* FRINT64X */
- gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
- break;
+
default:
g_assert_not_reached();
}
+ return true;
}
-static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
- bool is_scalar, bool is_u, bool is_q,
- int size, int rn, int rd)
+static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
{
- bool is_double = (size == MO_64);
- TCGv_ptr fpst;
+ TCGv_i64 tcg_int;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- if (!fp_access_check(s)) {
- return;
+ if (check <= 0) {
+ return check == 0;
}
- fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
-
- if (is_double) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_zero = tcg_constant_i64(0);
- TCGv_i64 tcg_res = tcg_temp_new_i64();
- NeonGenTwoDoubleOpFn *genfn;
- bool swap = false;
- int pass;
-
- switch (opcode) {
- case 0x2e: /* FCMLT (zero) */
- swap = true;
- /* fallthrough */
- case 0x2c: /* FCMGT (zero) */
- genfn = gen_helper_neon_cgt_f64;
- break;
- case 0x2d: /* FCMEQ (zero) */
- genfn = gen_helper_neon_ceq_f64;
- break;
- case 0x6d: /* FCMLE (zero) */
- swap = true;
- /* fall through */
- case 0x6c: /* FCMGE (zero) */
- genfn = gen_helper_neon_cge_f64;
- break;
- default:
- g_assert_not_reached();
- }
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- if (swap) {
- genfn(tcg_res, tcg_zero, tcg_op, fpst);
- } else {
- genfn(tcg_res, tcg_op, tcg_zero, fpst);
- }
- write_vec_element(s, tcg_res, rd, pass, MO_64);
- }
-
- clear_vec_high(s, !is_scalar, rd);
+ if (a->sf) {
+ tcg_int = cpu_reg(s, a->rn);
} else {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_zero = tcg_constant_i32(0);
- TCGv_i32 tcg_res = tcg_temp_new_i32();
- NeonGenTwoSingleOpFn *genfn;
- bool swap = false;
- int pass, maxpasses;
-
- if (size == MO_16) {
- switch (opcode) {
- case 0x2e: /* FCMLT (zero) */
- swap = true;
- /* fall through */
- case 0x2c: /* FCMGT (zero) */
- genfn = gen_helper_advsimd_cgt_f16;
- break;
- case 0x2d: /* FCMEQ (zero) */
- genfn = gen_helper_advsimd_ceq_f16;
- break;
- case 0x6d: /* FCMLE (zero) */
- swap = true;
- /* fall through */
- case 0x6c: /* FCMGE (zero) */
- genfn = gen_helper_advsimd_cge_f16;
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- switch (opcode) {
- case 0x2e: /* FCMLT (zero) */
- swap = true;
- /* fall through */
- case 0x2c: /* FCMGT (zero) */
- genfn = gen_helper_neon_cgt_f32;
- break;
- case 0x2d: /* FCMEQ (zero) */
- genfn = gen_helper_neon_ceq_f32;
- break;
- case 0x6d: /* FCMLE (zero) */
- swap = true;
- /* fall through */
- case 0x6c: /* FCMGE (zero) */
- genfn = gen_helper_neon_cge_f32;
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- if (is_scalar) {
- maxpasses = 1;
+ tcg_int = read_cpu_reg(s, a->rn, true);
+ if (is_signed) {
+ tcg_gen_ext32s_i64(tcg_int, tcg_int);
} else {
- int vector_size = 8 << is_q;
- maxpasses = vector_size >> size;
- }
-
- for (pass = 0; pass < maxpasses; pass++) {
- read_vec_element_i32(s, tcg_op, rn, pass, size);
- if (swap) {
- genfn(tcg_res, tcg_zero, tcg_op, fpst);
- } else {
- genfn(tcg_res, tcg_op, tcg_zero, fpst);
- }
- if (is_scalar) {
- write_fp_sreg(s, rd, tcg_res);
- } else {
- write_vec_element_i32(s, tcg_res, rd, pass, size);
- }
- }
-
- if (!is_scalar) {
- clear_vec_high(s, is_q, rd);
+ tcg_gen_ext32u_i64(tcg_int, tcg_int);
}
}
+ return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
}
-static void handle_2misc_reciprocal(DisasContext *s, int opcode,
- bool is_scalar, bool is_u, bool is_q,
- int size, int rn, int rd)
-{
- bool is_double = (size == 3);
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
+TRANS(SCVTF_g, do_cvtf_g, a, true)
+TRANS(UCVTF_g, do_cvtf_g, a, false)
- if (is_double) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
- int pass;
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- switch (opcode) {
- case 0x3d: /* FRECPE */
- gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
- break;
- case 0x3f: /* FRECPX */
- gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
- break;
- case 0x7d: /* FRSQRTE */
- gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- write_vec_element(s, tcg_res, rd, pass, MO_64);
- }
- clear_vec_high(s, !is_scalar, rd);
- } else {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
- int pass, maxpasses;
-
- if (is_scalar) {
- maxpasses = 1;
- } else {
- maxpasses = is_q ? 4 : 2;
- }
-
- for (pass = 0; pass < maxpasses; pass++) {
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
-
- switch (opcode) {
- case 0x3c: /* URECPE */
- gen_helper_recpe_u32(tcg_res, tcg_op);
- break;
- case 0x3d: /* FRECPE */
- gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
- break;
- case 0x3f: /* FRECPX */
- gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
- break;
- case 0x7d: /* FRSQRTE */
- gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
- break;
- default:
- g_assert_not_reached();
- }
+/*
+ * [US]CVTF (vector), scalar version.
+ * Which sounds weird, but really just means input from fp register
+ * instead of input from general register. Input and output element
+ * size are always equal.
+ */
+static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
+{
+ TCGv_i64 tcg_int;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- if (is_scalar) {
- write_fp_sreg(s, rd, tcg_res);
- } else {
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- }
- }
- if (!is_scalar) {
- clear_vec_high(s, is_q, rd);
- }
+ if (check <= 0) {
+ return check == 0;
}
-}
-static void handle_2misc_narrow(DisasContext *s, bool scalar,
- int opcode, bool u, bool is_q,
- int size, int rn, int rd)
-{
- /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
- * in the source becomes a size element in the destination).
- */
- int pass;
- TCGv_i32 tcg_res[2];
- int destelt = is_q ? 2 : 0;
- int passes = scalar ? 1 : 2;
+ tcg_int = tcg_temp_new_i64();
+ read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
+ return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
+}
- if (scalar) {
- tcg_res[1] = tcg_constant_i32(0);
- }
+TRANS(SCVTF_f, do_cvtf_f, a, true)
+TRANS(UCVTF_f, do_cvtf_f, a, false)
- for (pass = 0; pass < passes; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenNarrowFn *genfn = NULL;
- NeonGenNarrowEnvFn *genenvfn = NULL;
+static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
+ TCGv_i64 tcg_out, int shift, int rn,
+ ARMFPRounding rmode)
+{
+ TCGv_ptr tcg_fpstatus;
+ TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
- if (scalar) {
- read_vec_element(s, tcg_op, rn, pass, size + 1);
- } else {
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- }
- tcg_res[pass] = tcg_temp_new_i32();
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+ tcg_shift = tcg_constant_i32(shift);
+ tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
- switch (opcode) {
- case 0x12: /* XTN, SQXTUN */
- {
- static NeonGenNarrowFn * const xtnfns[3] = {
- gen_helper_neon_narrow_u8,
- gen_helper_neon_narrow_u16,
- tcg_gen_extrl_i64_i32,
- };
- static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
- gen_helper_neon_unarrow_sat8,
- gen_helper_neon_unarrow_sat16,
- gen_helper_neon_unarrow_sat32,
- };
- if (u) {
- genenvfn = sqxtunfns[size];
- } else {
- genfn = xtnfns[size];
- }
- break;
- }
- case 0x14: /* SQXTN, UQXTN */
- {
- static NeonGenNarrowEnvFn * const fns[3][2] = {
- { gen_helper_neon_narrow_sat_s8,
- gen_helper_neon_narrow_sat_u8 },
- { gen_helper_neon_narrow_sat_s16,
- gen_helper_neon_narrow_sat_u16 },
- { gen_helper_neon_narrow_sat_s32,
- gen_helper_neon_narrow_sat_u32 },
- };
- genenvfn = fns[size][u];
+ switch (esz) {
+ case MO_64:
+ read_vec_element(s, tcg_out, rn, 0, MO_64);
+ switch (out) {
+ case MO_64 | MO_SIGN:
+ gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
break;
- }
- case 0x16: /* FCVTN, FCVTN2 */
- /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
- if (size == 2) {
- gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
- } else {
- TCGv_i32 tcg_lo = tcg_temp_new_i32();
- TCGv_i32 tcg_hi = tcg_temp_new_i32();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
- TCGv_i32 ahp = get_ahp_flag();
-
- tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
- gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
- gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
- tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
- }
+ case MO_64:
+ gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
break;
- case 0x36: /* BFCVTN, BFCVTN2 */
- {
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
- gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
- }
+ case MO_32 | MO_SIGN:
+ gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
break;
- case 0x56: /* FCVTXN, FCVTXN2 */
- /* 64 bit to 32 bit float conversion
- * with von Neumann rounding (round to odd)
- */
- assert(size == 2);
- gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
+ case MO_32:
+ gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
break;
default:
g_assert_not_reached();
}
-
- if (genfn) {
- genfn(tcg_res[pass], tcg_op);
- } else if (genenvfn) {
- genenvfn(tcg_res[pass], tcg_env, tcg_op);
- }
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
- }
- clear_vec_high(s, is_q, rd);
-}
-
-/* AdvSIMD scalar two reg misc
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 12, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- bool is_fcvt = false;
- int rmode;
- TCGv_i32 tcg_rmode;
- TCGv_ptr tcg_fpstatus;
-
- switch (opcode) {
- case 0x7: /* SQABS / SQNEG */
- break;
- case 0xa: /* CMLT */
- if (u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x8: /* CMGT, CMGE */
- case 0x9: /* CMEQ, CMLE */
- case 0xb: /* ABS, NEG */
- if (size != 3) {
- unallocated_encoding(s);
- return;
- }
break;
- case 0x12: /* SQXTUN */
- if (!u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x14: /* SQXTN, UQXTN */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
- return;
- case 0xc ... 0xf:
- case 0x16 ... 0x1d:
- case 0x1f:
- /* Floating point: U, size[1] and opcode indicate operation;
- * size[0] indicates single or double precision.
- */
- opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
- size = extract32(size, 0, 1) ? 3 : 2;
- switch (opcode) {
- case 0x2c: /* FCMGT (zero) */
- case 0x2d: /* FCMEQ (zero) */
- case 0x2e: /* FCMLT (zero) */
- case 0x6c: /* FCMGE (zero) */
- case 0x6d: /* FCMLE (zero) */
- handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
- return;
- case 0x1d: /* SCVTF */
- case 0x5d: /* UCVTF */
- {
- bool is_signed = (opcode == 0x1d);
- if (!fp_access_check(s)) {
- return;
- }
- handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
- return;
- }
- case 0x3d: /* FRECPE */
- case 0x3f: /* FRECPX */
- case 0x7d: /* FRSQRTE */
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
- return;
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- is_fcvt = true;
- rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
+
+ case MO_32:
+ tcg_single = read_fp_sreg(s, rn);
+ switch (out) {
+ case MO_64 | MO_SIGN:
+ gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
break;
- case 0x1c: /* FCVTAS */
- case 0x5c: /* FCVTAU */
- /* TIEAWAY doesn't fit in the usual rounding mode encoding */
- is_fcvt = true;
- rmode = FPROUNDING_TIEAWAY;
+ case MO_64:
+ gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
+ break;
+ case MO_32 | MO_SIGN:
+ gen_helper_vfp_tosls(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
+ break;
+ case MO_32:
+ gen_helper_vfp_touls(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
break;
- case 0x56: /* FCVTXN, FCVTXN2 */
- if (size == 2) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
- return;
default:
- unallocated_encoding(s);
- return;
+ g_assert_not_reached();
}
break;
- default:
- case 0x3: /* USQADD / SUQADD */
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- if (is_fcvt) {
- tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
- tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
- } else {
- tcg_fpstatus = NULL;
- tcg_rmode = NULL;
- }
-
- if (size == 3) {
- TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
- TCGv_i64 tcg_rd = tcg_temp_new_i64();
-
- handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
- write_fp_dreg(s, rd, tcg_rd);
- } else {
- TCGv_i32 tcg_rn = tcg_temp_new_i32();
- TCGv_i32 tcg_rd = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_rn, rn, 0, size);
-
- switch (opcode) {
- case 0x7: /* SQABS, SQNEG */
- {
- NeonGenOneOpEnvFn *genfn;
- static NeonGenOneOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
- { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
- { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
- };
- genfn = fns[size][u];
- genfn(tcg_rd, tcg_env, tcg_rn);
+ case MO_16:
+ tcg_single = read_fp_hreg(s, rn);
+ switch (out) {
+ case MO_64 | MO_SIGN:
+ gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
break;
- }
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
- tcg_fpstatus);
+ case MO_64:
+ gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
+ break;
+ case MO_32 | MO_SIGN:
+ gen_helper_vfp_toslh(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
+ break;
+ case MO_32:
+ gen_helper_vfp_toulh(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
break;
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
- tcg_fpstatus);
+ case MO_16 | MO_SIGN:
+ gen_helper_vfp_toshh(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
+ break;
+ case MO_16:
+ gen_helper_vfp_touhh(tcg_single, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ tcg_gen_extu_i32_i64(tcg_out, tcg_single);
break;
default:
g_assert_not_reached();
}
+ break;
- write_fp_sreg(s, rd, tcg_rd);
+ default:
+ g_assert_not_reached();
}
- if (is_fcvt) {
- gen_restore_rmode(tcg_rmode, tcg_fpstatus);
- }
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
-/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
-static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
- int immh, int immb, int opcode, int rn, int rd)
+static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
+ ARMFPRounding rmode, bool is_signed)
{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = 2 * (8 << size) - immhb;
- GVecGen2iFn *gvec_fn;
+ TCGv_i64 tcg_int;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- if (extract32(immh, 3, 1) && !is_q) {
- unallocated_encoding(s);
- return;
+ if (check <= 0) {
+ return check == 0;
}
- tcg_debug_assert(size <= 3);
- if (!fp_access_check(s)) {
- return;
- }
+ tcg_int = cpu_reg(s, a->rd);
+ do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
+ a->esz, tcg_int, a->shift, a->rn, rmode);
- switch (opcode) {
- case 0x02: /* SSRA / USRA (accumulate) */
- gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
- break;
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_int, tcg_int);
+ }
+ return true;
+}
- case 0x08: /* SRI */
- gvec_fn = gen_gvec_sri;
- break;
+TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
+TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
+TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
+TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
+TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
+TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
+TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
+TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
+TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
+TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
- case 0x00: /* SSHR / USHR */
- if (is_u) {
- if (shift == 8 << size) {
- /* Shift count the same size as element size produces zero. */
- tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
- return;
- }
- gvec_fn = tcg_gen_gvec_shri;
- } else {
- /* Shift count the same size as element size produces all sign. */
- if (shift == 8 << size) {
- shift -= 1;
- }
- gvec_fn = tcg_gen_gvec_sari;
- }
- break;
+/*
+ * FCVT* (vector), scalar version.
+ * Which sounds weird, but really just means output to fp register
+ * instead of output to general register. Input and output element
+ * size are always equal.
+ */
+static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
+ ARMFPRounding rmode, bool is_signed)
+{
+ TCGv_i64 tcg_int;
+ int check = fp_access_check_scalar_hsd(s, a->esz);
- case 0x04: /* SRSHR / URSHR (rounding) */
- gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
- break;
+ if (check <= 0) {
+ return check == 0;
+ }
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
- break;
+ tcg_int = tcg_temp_new_i64();
+ do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
+ a->esz, tcg_int, a->shift, a->rn, rmode);
- default:
- g_assert_not_reached();
+ if (!s->fpcr_nep) {
+ clear_vec(s, a->rd);
}
-
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
+ write_vec_element(s, tcg_int, a->rd, 0, a->esz);
+ return true;
}
-/* SHL/SLI - Vector shift left */
-static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
- int immh, int immb, int opcode, int rn, int rd)
-{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
-
- /* Range of size is limited by decode: immh is a non-zero 4 bit field */
- assert(size >= 0 && size <= 3);
+TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
+TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
+TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
+TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
+TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
+TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
+TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
+TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
+TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
+TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
- if (extract32(immh, 3, 1) && !is_q) {
- unallocated_encoding(s);
- return;
+static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
+{
+ if (!dc_isar_feature(aa64_jscvt, s)) {
+ return false;
}
+ if (fp_access_check(s)) {
+ TCGv_i64 t = read_fp_dreg(s, a->rn);
+ TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
- if (!fp_access_check(s)) {
- return;
- }
+ gen_helper_fjcvtzs(t, t, fpstatus);
- if (insert) {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
+ tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
+ tcg_gen_extrh_i64_i32(cpu_ZF, t);
+ tcg_gen_movi_i32(cpu_CF, 0);
+ tcg_gen_movi_i32(cpu_NF, 0);
+ tcg_gen_movi_i32(cpu_VF, 0);
}
+ return true;
}
-/* USHLL/SHLL - Vector shift left with widening */
-static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
- int immh, int immb, int opcode, int rn, int rd)
+static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
- int dsize = 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- TCGv_i64 tcg_rn = tcg_temp_new_i64();
- TCGv_i64 tcg_rd = tcg_temp_new_i64();
- int i;
-
- if (size >= 3) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
}
-
- /* For the LL variants the store is larger than the load,
- * so if rd == rn we would overwrite parts of our input.
- * So load everything right now and use shifts in the main loop.
- */
- read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
-
- for (i = 0; i < elements; i++) {
- tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
- ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
- tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
- write_vec_element(s, tcg_rd, rd, i, size + 1);
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_ext16u_i64(tmp, tcg_rn);
+ write_fp_dreg(s, a->rd, tmp);
}
+ return true;
}
-/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
-static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
- int immh, int immb, int opcode, int rn, int rd)
+static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int dsize = 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- int shift = (2 * esize) - immhb;
- bool round = extract32(opcode, 0, 1);
- TCGv_i64 tcg_rn, tcg_rd, tcg_final;
- TCGv_i64 tcg_round;
- int i;
-
- if (extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(tmp, tcg_rn);
+ write_fp_dreg(s, a->rd, tmp);
}
+ return true;
+}
- if (!fp_access_check(s)) {
- return;
+static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+ write_fp_dreg(s, a->rd, tcg_rn);
}
+ return true;
+}
- tcg_rn = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- tcg_final = tcg_temp_new_i64();
- read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
-
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
- } else {
- tcg_round = NULL;
+static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+ tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
+ clear_vec_high(s, true, a->rd);
}
+ return true;
+}
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, size+1);
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- false, true, size+1, shift);
-
- tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
+{
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
}
-
- if (!is_q) {
- write_vec_element(s, tcg_final, rd, 0, MO_64);
- } else {
- write_vec_element(s, tcg_final, rd, 1, MO_64);
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
}
-
- clear_vec_high(s, is_q, rd);
+ return true;
}
-
-/* AdvSIMD shift by immediate
- * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
- * +---+---+---+-------------+------+------+--------+---+------+------+
- * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
- * +---+---+---+-------------+------+------+--------+---+------+------+
- */
-static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 5);
- int immb = extract32(insn, 16, 3);
- int immh = extract32(insn, 19, 4);
- bool is_u = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
-
- /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
- assert(immh != 0);
-
- switch (opcode) {
- case 0x08: /* SRI */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x00: /* SSHR / USHR */
- case 0x02: /* SSRA / USRA (accumulate) */
- case 0x04: /* SRSHR / URSHR (rounding) */
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x0a: /* SHL / SLI */
- handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x10: /* SHRN */
- case 0x11: /* RSHRN / SQRSHRUN */
- if (is_u) {
- handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
- opcode, rn, rd);
- } else {
- handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
- }
- break;
- case 0x12: /* SQSHRN / UQSHRN */
- case 0x13: /* SQRSHRN / UQRSHRN */
- handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
- opcode, rn, rd);
- break;
- case 0x14: /* SSHLL / USHLL */
- handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x1c: /* SCVTF / UCVTF */
- handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
- opcode, rn, rd);
- break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
- break;
- case 0x1f: /* FCVTZS/ FCVTZU */
- handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
- return;
- default:
- unallocated_encoding(s);
- return;
+static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
}
+ return true;
}
-/* Generate code to do a "long" addition or subtraction, ie one done in
- * TCGv_i64 on vector lanes twice the width specified by size.
- */
-static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
- TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
{
- static NeonGenTwo64OpFn * const fns[3][2] = {
- { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
- { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
- { tcg_gen_add_i64, tcg_gen_sub_i64 },
- };
- NeonGenTwo64OpFn *genfn;
- assert(size < 3);
-
- genfn = fns[size][is_sub];
- genfn(tcg_res, tcg_op1, tcg_op2);
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
+ }
+ return true;
}
-static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
+static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
{
- /* 3-reg-different widening insns: 64 x 64 -> 128 */
- TCGv_i64 tcg_res[2];
- int pass, accop;
+ if (fp_access_check(s)) {
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
+ }
+ return true;
+}
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
+typedef struct ENVScalar1 {
+ NeonGenOneOpEnvFn *gen_bhs[3];
+ NeonGenOne64OpEnvFn *gen_d;
+} ENVScalar1;
- /* Does this op do an adding accumulate, a subtracting accumulate,
- * or no accumulate at all?
- */
- switch (opcode) {
- case 5:
- case 8:
- case 9:
- accop = 1;
- break;
- case 10:
- case 11:
- accop = -1;
- break;
- default:
- accop = 0;
- break;
+static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
+{
+ if (!fp_access_check(s)) {
+ return true;
}
+ if (a->esz == MO_64) {
+ TCGv_i64 t = read_fp_dreg(s, a->rn);
+ f->gen_d(t, tcg_env, t);
+ write_fp_dreg(s, a->rd, t);
+ } else {
+ TCGv_i32 t = tcg_temp_new_i32();
- if (accop != 0) {
- read_vec_element(s, tcg_res[0], rd, 0, MO_64);
- read_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ read_vec_element_i32(s, t, a->rn, 0, a->esz);
+ f->gen_bhs[a->esz](t, tcg_env, t);
+ write_fp_sreg(s, a->rd, t);
}
+ return true;
+}
- /* size == 2 means two 32x32->64 operations; this is worth special
- * casing because we can generally handle it inline.
- */
- if (size == 2) {
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_passres;
- MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
-
- int elt = pass + is_q * 2;
-
- read_vec_element(s, tcg_op1, rn, elt, memop);
- read_vec_element(s, tcg_op2, rm, elt, memop);
-
- if (accop == 0) {
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- switch (opcode) {
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- {
- TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
- TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
-
- tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
- tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
- tcg_passres,
- tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
- break;
- }
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
- tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
+static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
+{
+ if (a->esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ if (a->esz == MO_64) {
+ TCGv_i64 t = tcg_temp_new_i64();
- if (opcode == 9 || opcode == 11) {
- /* saturating accumulate ops */
- if (accop < 0) {
- tcg_gen_neg_i64(tcg_passres, tcg_passres);
- }
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
- tcg_res[pass], tcg_passres);
- } else if (accop > 0) {
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- } else if (accop < 0) {
- tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- }
+ for (int i = 0; i < 2; ++i) {
+ read_vec_element(s, t, a->rn, i, MO_64);
+ f->gen_d(t, tcg_env, t);
+ write_vec_element(s, t, a->rd, i, MO_64);
}
} else {
- /* size 0 or 1, generally helper functions */
- for (pass = 0; pass < 2; pass++) {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i64 tcg_passres;
- int elt = pass + is_q * 2;
-
- read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
- read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
-
- if (accop == 0) {
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- switch (opcode) {
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- {
- TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
- static NeonGenWidenFn * const widenfns[2][2] = {
- { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
- { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
- };
- NeonGenWidenFn *widenfn = widenfns[size][is_u];
-
- widenfn(tcg_op2_64, tcg_op2);
- widenfn(tcg_passres, tcg_op1);
- gen_neon_addl(size, (opcode == 2), tcg_passres,
- tcg_passres, tcg_op2_64);
- break;
- }
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- if (size == 0) {
- if (is_u) {
- gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
- }
- } else {
- if (is_u) {
- gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
- }
- }
- break;
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
- if (size == 0) {
- if (is_u) {
- gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
- }
- } else {
- if (is_u) {
- gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
- }
- }
- break;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- assert(size == 1);
- gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
+ TCGv_i32 t = tcg_temp_new_i32();
+ int n = (a->q ? 16 : 8) >> a->esz;
- if (accop != 0) {
- if (opcode == 9 || opcode == 11) {
- /* saturating accumulate ops */
- if (accop < 0) {
- gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
- }
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- } else {
- gen_neon_addl(size, (accop < 0), tcg_res[pass],
- tcg_res[pass], tcg_passres);
- }
- }
+ for (int i = 0; i < n; ++i) {
+ read_vec_element_i32(s, t, a->rn, i, a->esz);
+ f->gen_bhs[a->esz](t, tcg_env, t);
+ write_vec_element_i32(s, t, a->rd, i, a->esz);
}
}
-
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ clear_vec_high(s, a->q, a->rd);
+ return true;
}
-static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
-{
- TCGv_i64 tcg_res[2];
- int part = is_q ? 2 : 0;
- int pass;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
- static NeonGenWidenFn * const widenfns[3][2] = {
- { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
- { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
- { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
- };
- NeonGenWidenFn *widenfn = widenfns[size][is_u];
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
- widenfn(tcg_op2_wide, tcg_op2);
- tcg_res[pass] = tcg_temp_new_i64();
- gen_neon_addl(size, (opcode == 3),
- tcg_res[pass], tcg_op1, tcg_op2_wide);
- }
+static const ENVScalar1 f_scalar_sqabs = {
+ { gen_helper_neon_qabs_s8,
+ gen_helper_neon_qabs_s16,
+ gen_helper_neon_qabs_s32 },
+ gen_helper_neon_qabs_s64,
+};
+TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
+TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
+
+static const ENVScalar1 f_scalar_sqneg = {
+ { gen_helper_neon_qneg_s8,
+ gen_helper_neon_qneg_s16,
+ gen_helper_neon_qneg_s32 },
+ gen_helper_neon_qneg_s64,
+};
+TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
+TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 t = read_fp_dreg(s, a->rn);
+ f(t, t);
+ write_fp_dreg(s, a->rd, t);
}
+ return true;
}
-static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
- tcg_gen_addi_i64(in, in, 1U << 31);
- tcg_gen_extrh_i64_i32(res, in);
-}
+TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
+TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
-static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
+static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
{
- TCGv_i32 tcg_res[2];
- int part = is_q ? 2 : 0;
- int pass;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_wideres = tcg_temp_new_i64();
- static NeonGenNarrowFn * const narrowfns[3][2] = {
- { gen_helper_neon_narrow_high_u8,
- gen_helper_neon_narrow_round_high_u8 },
- { gen_helper_neon_narrow_high_u16,
- gen_helper_neon_narrow_round_high_u16 },
- { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
- };
- NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
-
- tcg_res[pass] = tcg_temp_new_i32();
- gennarrow(tcg_res[pass], tcg_wideres);
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
+ if (fp_access_check(s)) {
+ TCGv_i64 t = read_fp_dreg(s, a->rn);
+ tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
+ write_fp_dreg(s, a->rd, t);
}
- clear_vec_high(s, is_q, rd);
+ return true;
}
-/* AdvSIMD three different
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- */
-static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
-{
- /* Instructions in this group fall into three basic classes
- * (in each case with the operation working on each element in
- * the input vectors):
- * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
- * 128 bit input)
- * (2) wide 64 x 128 -> 128
- * (3) narrowing 128 x 128 -> 64
- * Here we do initial decode, catch unallocated cases and
- * dispatch to separate functions for each class.
- */
- int is_q = extract32(insn, 30, 1);
- int is_u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- switch (opcode) {
- case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
- case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
- /* 64 x 128 -> 128 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
- case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
- /* 128 x 128 -> 64 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- case 14: /* PMULL, PMULL2 */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- switch (size) {
- case 0: /* PMULL.P8 */
- if (!fp_access_check(s)) {
- return;
- }
- /* The Q field specifies lo/hi half input for this insn. */
- gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
- gen_helper_neon_pmull_h);
- break;
+TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
+TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
+TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
+TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
+TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
- case 3: /* PMULL.P64 */
- if (!dc_isar_feature(aa64_pmull, s)) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- /* The Q field specifies lo/hi half input for this insn. */
- gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
- gen_helper_gvec_pmull_q);
- break;
-
- default:
- unallocated_encoding(s);
- break;
- }
- return;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- if (is_u || size == 0) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
- /* 64 x 64 -> 128 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- default:
- /* opcode 15 not allocated */
- unallocated_encoding(s);
- break;
+static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
+ ArithOneOp * const fn[3])
+{
+ if (a->esz == MO_64) {
+ return false;
}
-}
+ if (fp_access_check(s)) {
+ TCGv_i64 t = tcg_temp_new_i64();
-/* AdvSIMD three same extra
- * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
- * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
- */
-static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 4);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
- bool feature;
- int rot;
-
- switch (u * 16 + opcode) {
- case 0x10: /* SQRDMLAH (vector) */
- case 0x11: /* SQRDMLSH (vector) */
- if (size != 1 && size != 2) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_rdm, s);
- break;
- case 0x02: /* SDOT (vector) */
- case 0x12: /* UDOT (vector) */
- if (size != MO_32) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_dp, s);
- break;
- case 0x03: /* USDOT */
- if (size != MO_32) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_i8mm, s);
- break;
- case 0x04: /* SMMLA */
- case 0x14: /* UMMLA */
- case 0x05: /* USMMLA */
- if (!is_q || size != MO_32) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_i8mm, s);
- break;
- case 0x18: /* FCMLA, #0 */
- case 0x19: /* FCMLA, #90 */
- case 0x1a: /* FCMLA, #180 */
- case 0x1b: /* FCMLA, #270 */
- case 0x1c: /* FCADD, #90 */
- case 0x1e: /* FCADD, #270 */
- if (size == 0
- || (size == 1 && !dc_isar_feature(aa64_fp16, s))
- || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_fcma, s);
- break;
- case 0x1d: /* BFMMLA */
- if (size != MO_16 || !is_q) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_bf16, s);
- break;
- case 0x1f:
- switch (size) {
- case 1: /* BFDOT */
- case 3: /* BFMLAL{B,T} */
- feature = dc_isar_feature(aa64_bf16, s);
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
+ read_vec_element(s, t, a->rn, 0, a->esz + 1);
+ fn[a->esz](t, t);
+ clear_vec(s, a->rd);
+ write_vec_element(s, t, a->rd, 0, a->esz);
}
+ return true;
+}
- switch (opcode) {
- case 0x0: /* SQRDMLAH (vector) */
- gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
- return;
-
- case 0x1: /* SQRDMLSH (vector) */
- gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
- return;
+#define WRAP_ENV(NAME) \
+ static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
+ { gen_helper_##NAME(d, tcg_env, n); }
- case 0x2: /* SDOT / UDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
- u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
- return;
+WRAP_ENV(neon_unarrow_sat8)
+WRAP_ENV(neon_unarrow_sat16)
+WRAP_ENV(neon_unarrow_sat32)
- case 0x3: /* USDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
- return;
+static ArithOneOp * const f_scalar_sqxtun[] = {
+ gen_neon_unarrow_sat8,
+ gen_neon_unarrow_sat16,
+ gen_neon_unarrow_sat32,
+};
+TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
- case 0x04: /* SMMLA, UMMLA */
- gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
- u ? gen_helper_gvec_ummla_b
- : gen_helper_gvec_smmla_b);
- return;
- case 0x05: /* USMMLA */
- gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
- return;
+WRAP_ENV(neon_narrow_sat_s8)
+WRAP_ENV(neon_narrow_sat_s16)
+WRAP_ENV(neon_narrow_sat_s32)
- case 0x8: /* FCMLA, #0 */
- case 0x9: /* FCMLA, #90 */
- case 0xa: /* FCMLA, #180 */
- case 0xb: /* FCMLA, #270 */
- rot = extract32(opcode, 0, 2);
- switch (size) {
- case 1:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
- gen_helper_gvec_fcmlah);
- break;
- case 2:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
- gen_helper_gvec_fcmlas);
- break;
- case 3:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
- gen_helper_gvec_fcmlad);
- break;
- default:
- g_assert_not_reached();
- }
- return;
+static ArithOneOp * const f_scalar_sqxtn[] = {
+ gen_neon_narrow_sat_s8,
+ gen_neon_narrow_sat_s16,
+ gen_neon_narrow_sat_s32,
+};
+TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
- case 0xc: /* FCADD, #90 */
- case 0xe: /* FCADD, #270 */
- rot = extract32(opcode, 1, 1);
- switch (size) {
- case 1:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
- gen_helper_gvec_fcaddh);
- break;
- case 2:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
- gen_helper_gvec_fcadds);
- break;
- case 3:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
- gen_helper_gvec_fcaddd);
- break;
- default:
- g_assert_not_reached();
- }
- return;
+WRAP_ENV(neon_narrow_sat_u8)
+WRAP_ENV(neon_narrow_sat_u16)
+WRAP_ENV(neon_narrow_sat_u32)
- case 0xd: /* BFMMLA */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
- return;
- case 0xf:
- switch (size) {
- case 1: /* BFDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
- break;
- case 3: /* BFMLAL{B,T} */
- gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
- gen_helper_gvec_bfmlal);
- break;
- default:
- g_assert_not_reached();
- }
- return;
+static ArithOneOp * const f_scalar_uqxtn[] = {
+ gen_neon_narrow_sat_u8,
+ gen_neon_narrow_sat_u16,
+ gen_neon_narrow_sat_u32,
+};
+TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
- default:
- g_assert_not_reached();
+static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
+{
+ if (fp_access_check(s)) {
+ /*
+ * 64 bit to 32 bit float conversion
+ * with von Neumann rounding (round to odd)
+ */
+ TCGv_i64 src = read_fp_dreg(s, a->rn);
+ TCGv_i32 dst = tcg_temp_new_i32();
+ gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
+ write_fp_sreg_merging(s, a->rd, a->rd, dst);
}
+ return true;
}
-static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
- int size, int rn, int rd)
-{
- /* Handle 2-reg-misc ops which are widening (so each size element
- * in the source becomes a 2*size element in the destination.
- * The only instruction like this is FCVTL.
- */
- int pass;
-
- if (size == 3) {
- /* 32 -> 64 bit fp conversion */
- TCGv_i64 tcg_res[2];
- int srcelt = is_q ? 2 : 0;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- tcg_res[pass] = tcg_temp_new_i64();
+#undef WRAP_ENV
- read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
- gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
- }
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
- } else {
- /* 16 -> 32 bit fp conversion */
- int srcelt = is_q ? 4 : 0;
- TCGv_i32 tcg_res[4];
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
- TCGv_i32 ahp = get_ahp_flag();
+static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
+{
+ if (!a->q && a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
+ }
+ return true;
+}
- for (pass = 0; pass < 4; pass++) {
- tcg_res[pass] = tcg_temp_new_i32();
+TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
+TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
+TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
+TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
+TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
+TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
+TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
+TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
+TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
+TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
+TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
+TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
+TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
+TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
- read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
- gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
- fpst, ahp);
- }
- for (pass = 0; pass < 4; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
- }
+static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
+{
+ if (a->esz == MO_64) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
}
+ return true;
}
-static void handle_rev(DisasContext *s, int opcode, bool u,
- bool is_q, int size, int rn, int rd)
-{
- int op = (opcode << 1) | u;
- int opsz = op + size;
- int grp_size = 3 - opsz;
- int dsize = is_q ? 128 : 64;
- int i;
+TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
+TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
+TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
+TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
+TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
+TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
+TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
- if (opsz >= 3) {
- unallocated_encoding(s);
- return;
+static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
+ ArithOneOp * const fn[3])
+{
+ if (a->esz == MO_64) {
+ return false;
}
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
- if (!fp_access_check(s)) {
- return;
+ read_vec_element(s, t0, a->rn, 0, MO_64);
+ read_vec_element(s, t1, a->rn, 1, MO_64);
+ fn[a->esz](t0, t0);
+ fn[a->esz](t1, t1);
+ write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
+ write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
+ clear_vec_high(s, a->q, a->rd);
}
+ return true;
+}
- if (size == 0) {
- /* Special case bytes, use bswap op on each group of elements */
- int groups = dsize / (8 << grp_size);
-
- for (i = 0; i < groups; i++) {
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_tmp, rn, i, grp_size);
- switch (grp_size) {
- case MO_16:
- tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
- break;
- case MO_32:
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
- break;
- case MO_64:
- tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
- break;
- default:
- g_assert_not_reached();
- }
- write_vec_element(s, tcg_tmp, rd, i, grp_size);
- }
- clear_vec_high(s, is_q, rd);
- } else {
- int revmask = (1 << grp_size) - 1;
- int esize = 8 << size;
- int elements = dsize / esize;
- TCGv_i64 tcg_rn = tcg_temp_new_i64();
- TCGv_i64 tcg_rd[2];
+static ArithOneOp * const f_scalar_xtn[] = {
+ gen_helper_neon_narrow_u8,
+ gen_helper_neon_narrow_u16,
+ tcg_gen_ext32u_i64,
+};
+TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
+TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
+TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
+TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
- for (i = 0; i < 2; i++) {
- tcg_rd[i] = tcg_temp_new_i64();
- tcg_gen_movi_i64(tcg_rd[i], 0);
- }
+static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i32 tcg_lo = tcg_temp_new_i32();
+ TCGv_i32 tcg_hi = tcg_temp_new_i32();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
+ TCGv_i32 ahp = get_ahp_flag();
- for (i = 0; i < elements; i++) {
- int e_rev = (i & 0xf) ^ revmask;
- int w = (e_rev * esize) / 64;
- int o = (e_rev * esize) % 64;
+ tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
+ tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
+ tcg_gen_extu_i32_i64(d, tcg_lo);
+}
- read_vec_element(s, tcg_rn, rn, i, size);
- tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
- }
+static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
- for (i = 0; i < 2; i++) {
- write_vec_element(s, tcg_rd[i], rd, i, MO_64);
- }
- clear_vec_high(s, true, rd);
- }
+ gen_helper_vfp_fcvtsd(tmp, n, fpst);
+ tcg_gen_extu_i32_i64(d, tmp);
}
-static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
- bool is_q, int size, int rn, int rd)
+static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
{
- /* Implement the pairwise operations from 2-misc:
- * SADDLP, UADDLP, SADALP, UADALP.
- * These all add pairs of elements in the input to produce a
- * double-width result element in the output (possibly accumulating).
+ /*
+ * 64 bit to 32 bit float conversion
+ * with von Neumann rounding (round to odd)
*/
- bool accum = (opcode == 0x6);
- int maxpass = is_q ? 2 : 1;
- int pass;
- TCGv_i64 tcg_res[2];
-
- if (size == 2) {
- /* 32 + 32 -> 64 op */
- MemOp memop = size + (u ? 0 : MO_SIGN);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
+ tcg_gen_extu_i32_i64(d, tmp);
+}
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+static ArithOneOp * const f_vector_fcvtn[] = {
+ NULL,
+ gen_fcvtn_hs,
+ gen_fcvtn_sd,
+};
+static ArithOneOp * const f_scalar_fcvtxn[] = {
+ NULL,
+ NULL,
+ gen_fcvtxn_sd,
+};
+TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
+TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
- tcg_res[pass] = tcg_temp_new_i64();
+static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_bfcvt_pair(tmp, n, fpst);
+ tcg_gen_extu_i32_i64(d, tmp);
+}
- read_vec_element(s, tcg_op1, rn, pass * 2, memop);
- read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
- tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
- if (accum) {
- read_vec_element(s, tcg_op1, rd, pass, MO_64);
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- }
- } else {
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenOne64OpFn *genfn;
- static NeonGenOne64OpFn * const fns[2][2] = {
- { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
- { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
- };
+static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_bfcvt_pair(tmp, n, fpst);
+ tcg_gen_extu_i32_i64(d, tmp);
+}
- genfn = fns[size][u];
+static ArithOneOp * const f_vector_bfcvtn[2][3] = {
+ {
+ NULL,
+ gen_bfcvtn_hs,
+ NULL,
+ }, {
+ NULL,
+ gen_bfcvtn_ah_hs,
+ NULL,
+ }
+};
+TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
+ f_vector_bfcvtn[s->fpcr_ah])
- tcg_res[pass] = tcg_temp_new_i64();
+static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
+{
+ static NeonGenWidenFn * const widenfns[3] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ };
+ NeonGenWidenFn *widenfn;
+ TCGv_i64 tcg_res[2];
+ TCGv_i32 tcg_op;
+ int part, pass;
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_res[pass], tcg_op);
-
- if (accum) {
- read_vec_element(s, tcg_op, rd, pass, MO_64);
- if (size == 0) {
- gen_helper_neon_addl_u16(tcg_res[pass],
- tcg_res[pass], tcg_op);
- } else {
- gen_helper_neon_addl_u32(tcg_res[pass],
- tcg_res[pass], tcg_op);
- }
- }
- }
- }
- if (!is_q) {
- tcg_res[1] = tcg_constant_i64(0);
+ if (a->esz == MO_64) {
+ return false;
}
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ if (!fp_access_check(s)) {
+ return true;
}
-}
-static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
-{
- /* Implement SHLL and SHLL2 */
- int pass;
- int part = is_q ? 2 : 0;
- TCGv_i64 tcg_res[2];
+ tcg_op = tcg_temp_new_i32();
+ widenfn = widenfns[a->esz];
+ part = a->q ? 2 : 0;
for (pass = 0; pass < 2; pass++) {
- static NeonGenWidenFn * const widenfns[3] = {
- gen_helper_neon_widen_u8,
- gen_helper_neon_widen_u16,
- tcg_gen_extu_i32_i64,
- };
- NeonGenWidenFn *widenfn = widenfns[size];
- TCGv_i32 tcg_op = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
+ read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
tcg_res[pass] = tcg_temp_new_i64();
widenfn(tcg_res[pass], tcg_op);
- tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
+ tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
}
for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
}
+ return true;
}
-/* AdvSIMD two reg misc
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
-{
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- bool u = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- bool need_fpstatus = false;
- int rmode = -1;
- TCGv_i32 tcg_rmode;
- TCGv_ptr tcg_fpstatus;
-
- switch (opcode) {
- case 0x0: /* REV64, REV32 */
- case 0x1: /* REV16 */
- handle_rev(s, opcode, u, is_q, size, rn, rd);
- return;
- case 0x5: /* CNT, NOT, RBIT */
- if (u && size == 0) {
- /* NOT */
- break;
- } else if (u && size == 1) {
- /* RBIT */
- break;
- } else if (!u && size == 0) {
- /* CNT */
- break;
- }
- unallocated_encoding(s);
- return;
- case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
- case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
- return;
- case 0x4: /* CLS, CLZ */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x2: /* SADDLP, UADDLP */
- case 0x6: /* SADALP, UADALP */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
- return;
- case 0x13: /* SHLL, SHLL2 */
- if (u == 0 || size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_shll(s, is_q, size, rn, rd);
- return;
- case 0xa: /* CMLT */
- if (u == 1) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x8: /* CMGT, CMGE */
- case 0x9: /* CMEQ, CMLE */
- case 0xb: /* ABS, NEG */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x7: /* SQABS, SQNEG */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0xc ... 0xf:
- case 0x16 ... 0x1f:
- {
- /* Floating point: U, size[1] and opcode indicate operation;
- * size[0] indicates single or double precision.
- */
- int is_double = extract32(size, 0, 1);
- opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
- size = is_double ? 3 : 2;
- switch (opcode) {
- case 0x2f: /* FABS */
- case 0x6f: /* FNEG */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x1d: /* SCVTF */
- case 0x5d: /* UCVTF */
- {
- bool is_signed = (opcode == 0x1d) ? true : false;
- int elements = is_double ? 2 : is_q ? 4 : 2;
- if (is_double && !is_q) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
- return;
- }
- case 0x2c: /* FCMGT (zero) */
- case 0x2d: /* FCMEQ (zero) */
- case 0x2e: /* FCMLT (zero) */
- case 0x6c: /* FCMGE (zero) */
- case 0x6d: /* FCMLE (zero) */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
- return;
- case 0x7f: /* FSQRT */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- need_fpstatus = true;
- rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x5c: /* FCVTAU */
- case 0x1c: /* FCVTAS */
- need_fpstatus = true;
- rmode = FPROUNDING_TIEAWAY;
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x3c: /* URECPE */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x3d: /* FRECPE */
- case 0x7d: /* FRSQRTE */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
- return;
- case 0x56: /* FCVTXN, FCVTXN2 */
- if (size == 2) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x16: /* FCVTN, FCVTN2 */
- /* handle_2misc_narrow does a 2*size -> size operation, but these
- * instructions encode the source size rather than dest size.
- */
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
- return;
- case 0x36: /* BFCVTN, BFCVTN2 */
- if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
- return;
- case 0x17: /* FCVTL, FCVTL2 */
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_widening(s, opcode, is_q, size, rn, rd);
- return;
- case 0x18: /* FRINTN */
- case 0x19: /* FRINTM */
- case 0x38: /* FRINTP */
- case 0x39: /* FRINTZ */
- rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
- /* fall through */
- case 0x59: /* FRINTX */
- case 0x79: /* FRINTI */
- need_fpstatus = true;
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x58: /* FRINTA */
- rmode = FPROUNDING_TIEAWAY;
- need_fpstatus = true;
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x7c: /* URSQRTE */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x1e: /* FRINT32Z */
- case 0x1f: /* FRINT64Z */
- rmode = FPROUNDING_ZERO;
- /* fall through */
- case 0x5e: /* FRINT32X */
- case 0x5f: /* FRINT64X */
- need_fpstatus = true;
- if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
- }
- default:
- case 0x3: /* SUQADD, USQADD */
- unallocated_encoding(s);
- return;
- }
+static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
+{
+ int check = fp_access_check_vector_hsd(s, a->q, a->esz);
- if (!fp_access_check(s)) {
- return;
+ if (check <= 0) {
+ return check == 0;
}
- if (need_fpstatus || rmode >= 0) {
- tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
- } else {
- tcg_fpstatus = NULL;
- }
- if (rmode >= 0) {
- tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
- } else {
- tcg_rmode = NULL;
- }
+ gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
+ return true;
+}
- switch (opcode) {
- case 0x5:
- if (u && size == 0) { /* NOT */
- gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
- return;
- }
- break;
- case 0x8: /* CMGT, CMGE */
- if (u) {
- gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
- } else {
- gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
- }
- return;
- case 0x9: /* CMEQ, CMLE */
- if (u) {
- gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
- } else {
- gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
- }
- return;
- case 0xa: /* CMLT */
- gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
- return;
- case 0xb:
- if (u) { /* ABS, NEG */
- gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
- } else {
- gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
- }
- return;
- }
+TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
+TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
- if (size == 3) {
- /* All 64-bit element operations can be shared with scalar 2misc */
- int pass;
+static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
+ const FPScalar1 *f, int rmode)
+{
+ TCGv_i32 tcg_rmode = NULL;
+ TCGv_ptr fpst;
+ int check = fp_access_check_vector_hsd(s, a->q, a->esz);
- /* Coverity claims (size == 3 && !is_q) has been eliminated
- * from all paths leading to here.
- */
- tcg_debug_assert(is_q);
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
+ if (check <= 0) {
+ return check == 0;
+ }
- read_vec_element(s, tcg_op, rn, pass, MO_64);
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+ if (rmode >= 0) {
+ tcg_rmode = gen_set_rmode(rmode, fpst);
+ }
- handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
- tcg_rmode, tcg_fpstatus);
+ if (a->esz == MO_64) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
- write_vec_element(s, tcg_res, rd, pass, MO_64);
+ for (int pass = 0; pass < 2; ++pass) {
+ read_vec_element(s, t64, a->rn, pass, MO_64);
+ f->gen_d(t64, t64, fpst);
+ write_vec_element(s, t64, a->rd, pass, MO_64);
}
} else {
- int pass;
-
- for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
-
- if (size == 2) {
- /* Special cases for 32 bit elements */
- switch (opcode) {
- case 0x4: /* CLS */
- if (u) {
- tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
- } else {
- tcg_gen_clrsb_i32(tcg_res, tcg_op);
- }
- break;
- case 0x7: /* SQABS, SQNEG */
- if (u) {
- gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
- } else {
- gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
- }
- break;
- case 0x2f: /* FABS */
- gen_vfp_abss(tcg_res, tcg_op);
- break;
- case 0x6f: /* FNEG */
- gen_vfp_negs(tcg_res, tcg_op);
- break;
- case 0x7f: /* FSQRT */
- gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
- break;
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- gen_helper_vfp_tosls(tcg_res, tcg_op,
- tcg_constant_i32(0), tcg_fpstatus);
- break;
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- gen_helper_vfp_touls(tcg_res, tcg_op,
- tcg_constant_i32(0), tcg_fpstatus);
- break;
- case 0x18: /* FRINTN */
- case 0x19: /* FRINTM */
- case 0x38: /* FRINTP */
- case 0x39: /* FRINTZ */
- case 0x58: /* FRINTA */
- case 0x79: /* FRINTI */
- gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x59: /* FRINTX */
- gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x7c: /* URSQRTE */
- gen_helper_rsqrte_u32(tcg_res, tcg_op);
- break;
- case 0x1e: /* FRINT32Z */
- case 0x5e: /* FRINT32X */
- gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x1f: /* FRINT64Z */
- case 0x5f: /* FRINT64X */
- gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- /* Use helpers for 8 and 16 bit elements */
- switch (opcode) {
- case 0x5: /* CNT, RBIT */
- /* For these two insns size is part of the opcode specifier
- * (handled earlier); they always operate on byte elements.
- */
- if (u) {
- gen_helper_neon_rbit_u8(tcg_res, tcg_op);
- } else {
- gen_helper_neon_cnt_u8(tcg_res, tcg_op);
- }
- break;
- case 0x7: /* SQABS, SQNEG */
- {
- NeonGenOneOpEnvFn *genfn;
- static NeonGenOneOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
- { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
- };
- genfn = fns[size][u];
- genfn(tcg_res, tcg_env, tcg_op);
- break;
- }
- case 0x4: /* CLS, CLZ */
- if (u) {
- if (size == 0) {
- gen_helper_neon_clz_u8(tcg_res, tcg_op);
- } else {
- gen_helper_neon_clz_u16(tcg_res, tcg_op);
- }
- } else {
- if (size == 0) {
- gen_helper_neon_cls_s8(tcg_res, tcg_op);
- } else {
- gen_helper_neon_cls_s16(tcg_res, tcg_op);
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
+ TCGv_i32 t32 = tcg_temp_new_i32();
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
+ = (a->esz == MO_16 ? f->gen_h : f->gen_s);
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
+ read_vec_element_i32(s, t32, a->rn, pass, a->esz);
+ gen(t32, t32, fpst);
+ write_vec_element_i32(s, t32, a->rd, pass, a->esz);
}
}
- clear_vec_high(s, is_q, rd);
+ clear_vec_high(s, a->q, a->rd);
- if (tcg_rmode) {
- gen_restore_rmode(tcg_rmode, tcg_fpstatus);
+ if (rmode >= 0) {
+ gen_restore_rmode(tcg_rmode, fpst);
}
+ return true;
}
-/* AdvSIMD [scalar] two register miscellaneous (FP16)
- *
- * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
- * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
- * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
- * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
- *
- * This actually covers two groups where scalar access is governed by
- * bit 28. A bunch of the instructions (float to integral) only exist
- * in the vector form and are un-allocated for the scalar decode. Also
- * in the scalar decode Q is always 1.
- */
-static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
-{
- int fpop, opcode, a, u;
- int rn, rd;
- bool is_q;
- bool is_scalar;
- bool only_in_vector = false;
+TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
- int pass;
- TCGv_i32 tcg_rmode = NULL;
- TCGv_ptr tcg_fpstatus = NULL;
- bool need_fpst = true;
- int rmode = -1;
-
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
+TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
+TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
+TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
+TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
+TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
+TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
+TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
- rd = extract32(insn, 0, 5);
- rn = extract32(insn, 5, 5);
+TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
+ &f_scalar_frint32, FPROUNDING_ZERO)
+TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
+TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
+ &f_scalar_frint64, FPROUNDING_ZERO)
+TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
- a = extract32(insn, 23, 1);
- u = extract32(insn, 29, 1);
- is_scalar = extract32(insn, 28, 1);
- is_q = extract32(insn, 30, 1);
+static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
+ bool is_q, int rd, int rn, int data,
+ gen_helper_gvec_2_ptr * const fns[3],
+ ARMFPStatusFlavour fpsttype)
+{
+ int check = fp_access_check_vector_hsd(s, is_q, esz);
+ TCGv_ptr fpst;
- opcode = extract32(insn, 12, 5);
- fpop = deposit32(opcode, 5, 1, a);
- fpop = deposit32(fpop, 6, 1, u);
+ if (check <= 0) {
+ return check == 0;
+ }
- switch (fpop) {
- case 0x1d: /* SCVTF */
- case 0x5d: /* UCVTF */
- {
- int elements;
+ fpst = fpstatus_ptr(fpsttype);
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), fpst,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ data, fns[esz - 1]);
+ return true;
+}
- if (is_scalar) {
- elements = 1;
- } else {
- elements = (is_q ? 8 : 4);
- }
+static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
+ int rd, int rn, int data,
+ gen_helper_gvec_2_ptr * const fns[3])
+{
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
+ esz == MO_16 ? FPST_A64_F16 :
+ FPST_A64);
+}
- if (!fp_access_check(s)) {
- return;
- }
- handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
- return;
- }
- break;
- case 0x2c: /* FCMGT (zero) */
- case 0x2d: /* FCMEQ (zero) */
- case 0x2e: /* FCMLT (zero) */
- case 0x6c: /* FCMGE (zero) */
- case 0x6d: /* FCMLE (zero) */
- handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
- return;
- case 0x3d: /* FRECPE */
- case 0x3f: /* FRECPX */
- break;
- case 0x18: /* FRINTN */
- only_in_vector = true;
- rmode = FPROUNDING_TIEEVEN;
- break;
- case 0x19: /* FRINTM */
- only_in_vector = true;
- rmode = FPROUNDING_NEGINF;
- break;
- case 0x38: /* FRINTP */
- only_in_vector = true;
- rmode = FPROUNDING_POSINF;
- break;
- case 0x39: /* FRINTZ */
- only_in_vector = true;
- rmode = FPROUNDING_ZERO;
- break;
- case 0x58: /* FRINTA */
- only_in_vector = true;
- rmode = FPROUNDING_TIEAWAY;
- break;
- case 0x59: /* FRINTX */
- case 0x79: /* FRINTI */
- only_in_vector = true;
- /* current rounding mode */
- break;
- case 0x1a: /* FCVTNS */
- rmode = FPROUNDING_TIEEVEN;
- break;
- case 0x1b: /* FCVTMS */
- rmode = FPROUNDING_NEGINF;
- break;
- case 0x1c: /* FCVTAS */
- rmode = FPROUNDING_TIEAWAY;
- break;
- case 0x3a: /* FCVTPS */
- rmode = FPROUNDING_POSINF;
- break;
- case 0x3b: /* FCVTZS */
- rmode = FPROUNDING_ZERO;
- break;
- case 0x5a: /* FCVTNU */
- rmode = FPROUNDING_TIEEVEN;
- break;
- case 0x5b: /* FCVTMU */
- rmode = FPROUNDING_NEGINF;
- break;
- case 0x5c: /* FCVTAU */
- rmode = FPROUNDING_TIEAWAY;
- break;
- case 0x7a: /* FCVTPU */
- rmode = FPROUNDING_POSINF;
- break;
- case 0x7b: /* FCVTZU */
- rmode = FPROUNDING_ZERO;
- break;
- case 0x2f: /* FABS */
- case 0x6f: /* FNEG */
- need_fpst = false;
- break;
- case 0x7d: /* FRSQRTE */
- case 0x7f: /* FSQRT (vector) */
- break;
- default:
- unallocated_encoding(s);
- return;
- }
+static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
+ int rd, int rn, int data,
+ gen_helper_gvec_2_ptr * const fns[3])
+{
+ return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
+ fns, select_ah_fpst(s, esz));
+}
+static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
+ gen_helper_gvec_vcvt_sh,
+ gen_helper_gvec_vcvt_sf,
+ gen_helper_gvec_vcvt_sd,
+};
+TRANS(SCVTF_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
+TRANS(SCVTF_vf, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
+
+static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
+ gen_helper_gvec_vcvt_uh,
+ gen_helper_gvec_vcvt_uf,
+ gen_helper_gvec_vcvt_ud,
+};
+TRANS(UCVTF_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
+TRANS(UCVTF_vf, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
+
+static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
+ gen_helper_gvec_vcvt_rz_hs,
+ gen_helper_gvec_vcvt_rz_fs,
+ gen_helper_gvec_vcvt_rz_ds,
+};
+TRANS(FCVTZS_vf, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
- /* Check additional constraints for the scalar encoding */
- if (is_scalar) {
- if (!is_q) {
- unallocated_encoding(s);
- return;
- }
- /* FRINTxx is only in the vector form */
- if (only_in_vector) {
- unallocated_encoding(s);
- return;
- }
- }
+static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
+ gen_helper_gvec_vcvt_rz_hu,
+ gen_helper_gvec_vcvt_rz_fu,
+ gen_helper_gvec_vcvt_rz_du,
+};
+TRANS(FCVTZU_vf, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
- if (!fp_access_check(s)) {
- return;
- }
+static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
+ gen_helper_gvec_vcvt_rm_sh,
+ gen_helper_gvec_vcvt_rm_ss,
+ gen_helper_gvec_vcvt_rm_sd,
+};
- if (rmode >= 0 || need_fpst) {
- tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
- }
+static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
+ gen_helper_gvec_vcvt_rm_uh,
+ gen_helper_gvec_vcvt_rm_us,
+ gen_helper_gvec_vcvt_rm_ud,
+};
- if (rmode >= 0) {
- tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
- }
+TRANS(FCVTNS_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
+TRANS(FCVTNU_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
+TRANS(FCVTPS_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
+TRANS(FCVTPU_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
+TRANS(FCVTMS_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
+TRANS(FCVTMU_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
+TRANS(FCVTZS_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
+TRANS(FCVTZU_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
+TRANS(FCVTAS_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
+TRANS(FCVTAU_vi, do_gvec_op2_fpst,
+ a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
+
+static gen_helper_gvec_2_ptr * const f_fceq0[] = {
+ gen_helper_gvec_fceq0_h,
+ gen_helper_gvec_fceq0_s,
+ gen_helper_gvec_fceq0_d,
+};
+TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
- if (is_scalar) {
- TCGv_i32 tcg_op = read_fp_hreg(s, rn);
- TCGv_i32 tcg_res = tcg_temp_new_i32();
+static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
+ gen_helper_gvec_fcgt0_h,
+ gen_helper_gvec_fcgt0_s,
+ gen_helper_gvec_fcgt0_d,
+};
+TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
- switch (fpop) {
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x3d: /* FRECPE */
- gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x3f: /* FRECPX */
- gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x6f: /* FNEG */
- tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
- break;
- case 0x7d: /* FRSQRTE */
- gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- default:
- g_assert_not_reached();
- }
+static gen_helper_gvec_2_ptr * const f_fcge0[] = {
+ gen_helper_gvec_fcge0_h,
+ gen_helper_gvec_fcge0_s,
+ gen_helper_gvec_fcge0_d,
+};
+TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
- /* limit any sign extension going on */
- tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
- write_fp_sreg(s, rd, tcg_res);
- } else {
- for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
-
- switch (fpop) {
- case 0x1a: /* FCVTNS */
- case 0x1b: /* FCVTMS */
- case 0x1c: /* FCVTAS */
- case 0x3a: /* FCVTPS */
- case 0x3b: /* FCVTZS */
- gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x3d: /* FRECPE */
- gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x5a: /* FCVTNU */
- case 0x5b: /* FCVTMU */
- case 0x5c: /* FCVTAU */
- case 0x7a: /* FCVTPU */
- case 0x7b: /* FCVTZU */
- gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x18: /* FRINTN */
- case 0x19: /* FRINTM */
- case 0x38: /* FRINTP */
- case 0x39: /* FRINTZ */
- case 0x58: /* FRINTA */
- case 0x79: /* FRINTI */
- gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x59: /* FRINTX */
- gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x2f: /* FABS */
- tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
- break;
- case 0x6f: /* FNEG */
- tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
- break;
- case 0x7d: /* FRSQRTE */
- gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- case 0x7f: /* FSQRT */
- gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
- break;
- default:
- g_assert_not_reached();
- }
+static gen_helper_gvec_2_ptr * const f_fclt0[] = {
+ gen_helper_gvec_fclt0_h,
+ gen_helper_gvec_fclt0_s,
+ gen_helper_gvec_fclt0_d,
+};
+TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
- write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- }
+static gen_helper_gvec_2_ptr * const f_fcle0[] = {
+ gen_helper_gvec_fcle0_h,
+ gen_helper_gvec_fcle0_s,
+ gen_helper_gvec_fcle0_d,
+};
+TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
- clear_vec_high(s, is_q, rd);
- }
+static gen_helper_gvec_2_ptr * const f_frecpe[] = {
+ gen_helper_gvec_frecpe_h,
+ gen_helper_gvec_frecpe_s,
+ gen_helper_gvec_frecpe_d,
+};
+static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
+ gen_helper_gvec_frecpe_h,
+ gen_helper_gvec_frecpe_rpres_s,
+ gen_helper_gvec_frecpe_d,
+};
+TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
- if (tcg_rmode) {
- gen_restore_rmode(tcg_rmode, tcg_fpstatus);
- }
-}
+static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
+ gen_helper_gvec_frsqrte_h,
+ gen_helper_gvec_frsqrte_s,
+ gen_helper_gvec_frsqrte_d,
+};
+static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
+ gen_helper_gvec_frsqrte_h,
+ gen_helper_gvec_frsqrte_rpres_s,
+ gen_helper_gvec_frsqrte_d,
+};
+TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
-/* AdvSIMD scalar x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * AdvSIMD vector x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_indexed(DisasContext *s, uint32_t insn)
-{
- /* This encoding has two kinds of instruction:
- * normal, where we perform elt x idxelt => elt for each
- * element in the vector
- * long, where we perform elt x idxelt and generate a result of
- * double the width of the input element
- * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
+static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
+{
+ /* Handle 2-reg-misc ops which are widening (so each size element
+ * in the source becomes a 2*size element in the destination.
+ * The only instruction like this is FCVTL.
*/
- bool is_scalar = extract32(insn, 28, 1);
- bool is_q = extract32(insn, 30, 1);
- bool u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int l = extract32(insn, 21, 1);
- int m = extract32(insn, 20, 1);
- /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
- int rm = extract32(insn, 16, 4);
- int opcode = extract32(insn, 12, 4);
- int h = extract32(insn, 11, 1);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- bool is_long = false;
- int is_fp = 0;
- bool is_fp16 = false;
- int index;
+ int pass;
TCGv_ptr fpst;
- switch (16 * u + opcode) {
- case 0x02: /* SMLAL, SMLAL2 */
- case 0x12: /* UMLAL, UMLAL2 */
- case 0x06: /* SMLSL, SMLSL2 */
- case 0x16: /* UMLSL, UMLSL2 */
- case 0x0a: /* SMULL, SMULL2 */
- case 0x1a: /* UMULL, UMULL2 */
- if (is_scalar) {
- unallocated_encoding(s);
- return;
- }
- is_long = true;
- break;
- case 0x03: /* SQDMLAL, SQDMLAL2 */
- case 0x07: /* SQDMLSL, SQDMLSL2 */
- case 0x0b: /* SQDMULL, SQDMULL2 */
- is_long = true;
- break;
- case 0x1d: /* SQRDMLAH */
- case 0x1f: /* SQRDMLSH */
- if (!dc_isar_feature(aa64_rdm, s)) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x0e: /* SDOT */
- case 0x1e: /* UDOT */
- if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x0f:
- switch (size) {
- case 0: /* SUDOT */
- case 2: /* USDOT */
- if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
- unallocated_encoding(s);
- return;
- }
- size = MO_32;
- break;
- case 1: /* BFDOT */
- if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
- unallocated_encoding(s);
- return;
- }
- size = MO_32;
- break;
- case 3: /* BFMLAL{B,T} */
- if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
- unallocated_encoding(s);
- return;
- }
- /* can't set is_fp without other incorrect size checks */
- size = MO_16;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x11: /* FCMLA #0 */
- case 0x13: /* FCMLA #90 */
- case 0x15: /* FCMLA #180 */
- case 0x17: /* FCMLA #270 */
- if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
- unallocated_encoding(s);
- return;
- }
- is_fp = 2;
- break;
- default:
- case 0x00: /* FMLAL */
- case 0x01: /* FMLA */
- case 0x04: /* FMLSL */
- case 0x05: /* FMLS */
- case 0x08: /* MUL */
- case 0x09: /* FMUL */
- case 0x0c: /* SQDMULH */
- case 0x0d: /* SQRDMULH */
- case 0x10: /* MLA */
- case 0x14: /* MLS */
- case 0x18: /* FMLAL2 */
- case 0x19: /* FMULX */
- case 0x1c: /* FMLSL2 */
- unallocated_encoding(s);
- return;
- }
-
- switch (is_fp) {
- case 1: /* normal fp */
- unallocated_encoding(s); /* in decodetree */
- return;
-
- case 2: /* complex fp */
- /* Each indexable element is a complex pair. */
- size += 1;
- switch (size) {
- case MO_32:
- if (h && !is_q) {
- unallocated_encoding(s);
- return;
- }
- is_fp16 = true;
- break;
- case MO_64:
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
-
- default: /* integer */
- switch (size) {
- case MO_8:
- case MO_64:
- unallocated_encoding(s);
- return;
- }
- break;
- }
- if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
-
- /* Given MemOp size, adjust register and indexing. */
- switch (size) {
- case MO_16:
- index = h << 2 | l << 1 | m;
- break;
- case MO_32:
- index = h << 1 | l;
- rm |= m << 4;
- break;
- case MO_64:
- if (l || !is_q) {
- unallocated_encoding(s);
- return;
- }
- index = h;
- rm |= m << 4;
- break;
- default:
- g_assert_not_reached();
- }
-
if (!fp_access_check(s)) {
- return;
- }
-
- if (is_fp) {
- fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
- } else {
- fpst = NULL;
+ return true;
}
- switch (16 * u + opcode) {
- case 0x0e: /* SDOT */
- case 0x1e: /* UDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- u ? gen_helper_gvec_udot_idx_b
- : gen_helper_gvec_sdot_idx_b);
- return;
- case 0x0f:
- switch (extract32(insn, 22, 2)) {
- case 0: /* SUDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_sudot_idx_b);
- return;
- case 1: /* BFDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_bfdot_idx);
- return;
- case 2: /* USDOT */
- gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_usdot_idx_b);
- return;
- case 3: /* BFMLAL{B,T} */
- gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
- gen_helper_gvec_bfmlal_idx);
- return;
- }
- g_assert_not_reached();
- case 0x11: /* FCMLA #0 */
- case 0x13: /* FCMLA #90 */
- case 0x15: /* FCMLA #180 */
- case 0x17: /* FCMLA #270 */
- {
- int rot = extract32(insn, 13, 2);
- int data = (index << 2) | rot;
- tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- vec_full_reg_offset(s, rd), fpst,
- is_q ? 16 : 8, vec_full_reg_size(s), data,
- size == MO_64
- ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
- }
- return;
- }
+ if (a->esz == MO_64) {
+ /* 32 -> 64 bit fp conversion */
+ TCGv_i64 tcg_res[2];
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ int srcelt = a->q ? 2 : 0;
- if (size == 3) {
- g_assert_not_reached();
- } else if (!is_long) {
- /* 32 bit floating point, or 16 or 32 bit integer.
- * For the 16 bit scalar case we use the usual Neon helpers and
- * rely on the fact that 0 op 0 == 0 with no side effects.
- */
- TCGv_i32 tcg_idx = tcg_temp_new_i32();
- int pass, maxpasses;
+ fpst = fpstatus_ptr(FPST_A64);
- if (is_scalar) {
- maxpasses = 1;
- } else {
- maxpasses = is_q ? 4 : 2;
+ for (pass = 0; pass < 2; pass++) {
+ tcg_res[pass] = tcg_temp_new_i64();
+ read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
+ gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
}
-
- read_vec_element_i32(s, tcg_idx, rm, index, size);
-
- if (size == 1 && !is_scalar) {
- /* The simplest way to handle the 16x16 indexed ops is to duplicate
- * the index into both halves of the 32 bit tcg_idx and then use
- * the usual Neon helpers.
- */
- tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
- }
-
- for (pass = 0; pass < maxpasses; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
-
- switch (16 * u + opcode) {
- case 0x10: /* MLA */
- case 0x14: /* MLS */
- {
- static NeonGenTwoOpFn * const fns[2][2] = {
- { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
- { tcg_gen_add_i32, tcg_gen_sub_i32 },
- };
- NeonGenTwoOpFn *genfn;
- bool is_sub = opcode == 0x4;
-
- if (size == 1) {
- gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
- } else {
- tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
- }
- if (opcode == 0x8) {
- break;
- }
- read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
- genfn = fns[size - 1][is_sub];
- genfn(tcg_res, tcg_op, tcg_res);
- break;
- }
- case 0x0c: /* SQDMULH */
- if (size == 1) {
- gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
- tcg_op, tcg_idx);
- } else {
- gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
- tcg_op, tcg_idx);
- }
- break;
- case 0x0d: /* SQRDMULH */
- if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
- tcg_op, tcg_idx);
- } else {
- gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
- tcg_op, tcg_idx);
- }
- break;
- case 0x1d: /* SQRDMLAH */
- read_vec_element_i32(s, tcg_res, rd, pass,
- is_scalar ? size : MO_32);
- if (size == 1) {
- gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env,
- tcg_op, tcg_idx, tcg_res);
- } else {
- gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env,
- tcg_op, tcg_idx, tcg_res);
- }
- break;
- case 0x1f: /* SQRDMLSH */
- read_vec_element_i32(s, tcg_res, rd, pass,
- is_scalar ? size : MO_32);
- if (size == 1) {
- gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env,
- tcg_op, tcg_idx, tcg_res);
- } else {
- gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env,
- tcg_op, tcg_idx, tcg_res);
- }
- break;
- default:
- case 0x01: /* FMLA */
- case 0x05: /* FMLS */
- case 0x09: /* FMUL */
- case 0x19: /* FMULX */
- g_assert_not_reached();
- }
-
- if (is_scalar) {
- write_fp_sreg(s, rd, tcg_res);
- } else {
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- }
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
}
-
- clear_vec_high(s, is_q, rd);
} else {
- /* long ops: 16x16->32 or 32x32->64 */
- TCGv_i64 tcg_res[2];
- int pass;
- bool satop = extract32(opcode, 0, 1);
- MemOp memop = MO_32;
-
- if (satop || !u) {
- memop |= MO_SIGN;
- }
-
- if (size == 2) {
- TCGv_i64 tcg_idx = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_idx, rm, index, memop);
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_passres;
- int passelt;
-
- if (is_scalar) {
- passelt = 0;
- } else {
- passelt = pass + (is_q * 2);
- }
-
- read_vec_element(s, tcg_op, rn, passelt, memop);
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- if (opcode == 0xa || opcode == 0xb) {
- /* Non-accumulating ops */
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
-
- if (satop) {
- /* saturating, doubling */
- gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- }
-
- if (opcode == 0xa || opcode == 0xb) {
- continue;
- }
-
- /* Accumulating op: handle accumulate step */
- read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
-
- switch (opcode) {
- case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- break;
- case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- break;
- case 0x7: /* SQDMLSL, SQDMLSL2 */
- tcg_gen_neg_i64(tcg_passres, tcg_passres);
- /* fall through */
- case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- clear_vec_high(s, !is_scalar, rd);
- } else {
- TCGv_i32 tcg_idx = tcg_temp_new_i32();
-
- assert(size == 1);
- read_vec_element_i32(s, tcg_idx, rm, index, size);
-
- if (!is_scalar) {
- /* The simplest way to handle the 16x16 indexed ops is to
- * duplicate the index into both halves of the 32 bit tcg_idx
- * and then use the usual Neon helpers.
- */
- tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
- }
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i64 tcg_passres;
-
- if (is_scalar) {
- read_vec_element_i32(s, tcg_op, rn, pass, size);
- } else {
- read_vec_element_i32(s, tcg_op, rn,
- pass + (is_q * 2), MO_32);
- }
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- if (opcode == 0xa || opcode == 0xb) {
- /* Non-accumulating ops */
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- if (memop & MO_SIGN) {
- gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
- } else {
- gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
- }
- if (satop) {
- gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- }
-
- if (opcode == 0xa || opcode == 0xb) {
- continue;
- }
-
- /* Accumulating op: handle accumulate step */
- read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
-
- switch (opcode) {
- case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- break;
- case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- break;
- case 0x7: /* SQDMLSL, SQDMLSL2 */
- gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
- /* fall through */
- case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
- }
+ /* 16 -> 32 bit fp conversion */
+ int srcelt = a->q ? 4 : 0;
+ TCGv_i32 tcg_res[4];
+ TCGv_i32 ahp = get_ahp_flag();
- if (is_scalar) {
- tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
- }
- }
+ fpst = fpstatus_ptr(FPST_A64_F16);
- if (is_scalar) {
- tcg_res[1] = tcg_constant_i64(0);
+ for (pass = 0; pass < 4; pass++) {
+ tcg_res[pass] = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
+ gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
+ fpst, ahp);
}
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ for (pass = 0; pass < 4; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
}
}
-}
-
-/* C3.6 Data processing - SIMD, inc Crypto
- *
- * As the decode gets a little complex we are using a table based
- * approach for this part of the decode.
- */
-static const AArch64DecodeTable data_proc_simd[] = {
- /* pattern , mask , fn */
- { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
- { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
- { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
- { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
- { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
- /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
- { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
- { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
- { 0x0e000000, 0xbf208c00, disas_simd_tb },
- { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
- { 0x2e000000, 0xbf208400, disas_simd_ext },
- { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
- { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
- { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
- { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
- { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
- { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
- { 0x00000000, 0x00000000, NULL }
-};
-
-static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
-{
- /* Note that this is called with all non-FP cases from
- * table C3-6 so it must UNDEF for entries not specifically
- * allocated to instructions in that table.
- */
- AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
- if (fn) {
- fn(s, insn);
- } else {
- unallocated_encoding(s);
- }
-}
-
-/* C3.6 Data processing - SIMD and floating point */
-static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
-{
- if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
- disas_data_proc_fp(s, insn);
- } else {
- /* SIMD, including crypto */
- disas_data_proc_simd(s, insn);
- }
+ clear_vec_high(s, true, a->rd);
+ return true;
}
static bool trans_OK(DisasContext *s, arg_OK *a)
@@ -12584,37 +10040,6 @@ static bool trans_FAIL(DisasContext *s, arg_OK *a)
}
/**
- * is_guarded_page:
- * @env: The cpu environment
- * @s: The DisasContext
- *
- * Return true if the page is guarded.
- */
-static bool is_guarded_page(CPUARMState *env, DisasContext *s)
-{
- uint64_t addr = s->base.pc_first;
-#ifdef CONFIG_USER_ONLY
- return page_get_flags(addr) & PAGE_BTI;
-#else
- CPUTLBEntryFull *full;
- void *host;
- int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
- int flags;
-
- /*
- * We test this immediately after reading an insn, which means
- * that the TLB entry must be present and valid, and thus this
- * access will never raise an exception.
- */
- flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
- false, &host, &full, 0);
- assert(!(flags & TLB_INVALID_MASK));
-
- return full->extra.arm.guarded;
-#endif
-}
-
-/**
* btype_destination_ok:
* @insn: The instruction at the branch destination
* @bt: SCTLR_ELx.BT
@@ -12666,24 +10091,6 @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
return false;
}
-/* C3.1 A64 instruction index by encoding */
-static void disas_a64_legacy(DisasContext *s, uint32_t insn)
-{
- switch (extract32(insn, 25, 4)) {
- case 0x5:
- case 0xd: /* Data processing - register */
- disas_data_proc_reg(s, insn);
- break;
- case 0x7:
- case 0xf: /* Data processing - SIMD and floating point */
- disas_data_proc_simd_fp(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
-}
-
static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
CPUState *cpu)
{
@@ -12738,6 +10145,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
+ dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
+ dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
@@ -12831,7 +10240,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
* start of the TB.
*/
assert(s->base.num_insns == 1);
- gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
+ gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc));
s->base.is_jmp = DISAS_NORETURN;
s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
return;
@@ -12842,8 +10251,8 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
s->insn = insn;
s->base.pc_next = pc + 4;
- s->fp_access_checked = false;
- s->sve_access_checked = false;
+ s->fp_access_checked = 0;
+ s->sve_access_checked = 0;
if (s->pstate_il) {
/*
@@ -12856,19 +10265,6 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
if (dc_isar_feature(aa64_bti, s)) {
if (s->base.num_insns == 1) {
- /*
- * At the first insn of the TB, compute s->guarded_page.
- * We delayed computing this until successfully reading
- * the first insn of the TB, above. This (mostly) ensures
- * that the softmmu tlb entry has been populated, and the
- * page table GP bit is available.
- *
- * Note that we need to compute this even if btype == 0,
- * because this value is used for BR instructions later
- * where ENV is not available.
- */
- s->guarded_page = is_guarded_page(env, s);
-
/* First insn can have btype set to non-zero. */
tcg_debug_assert(s->btype >= 0);
@@ -12877,12 +10273,13 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
* priority -- below debugging exceptions but above most
* everything else. This allows us to handle this now
* instead of waiting until the insn is otherwise decoded.
+ *
+ * We can check all but the guarded page check here;
+ * defer the latter to a helper.
*/
if (s->btype != 0
- && s->guarded_page
&& !btype_destination_ok(insn, s->bt, s->btype)) {
- gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
- return;
+ gen_helper_guarded_page_check(tcg_env);
}
} else {
/* Not the first insn: btype must be 0. */
@@ -12898,7 +10295,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
if (!disas_a64(s, insn) &&
!disas_sme(s, insn) &&
!disas_sve(s, insn)) {
- disas_a64_legacy(s, insn);
+ unallocated_encoding(s);
}
/*
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index 0fcf7cb..b2420f5 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -65,7 +65,7 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
static inline void assert_fp_access_checked(DisasContext *s)
{
#ifdef CONFIG_DEBUG_TCG
- if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
+ if (unlikely(s->fp_access_checked <= 0)) {
fprintf(stderr, "target-arm: FP access check missing for "
"instruction 0x%08x\n", s->insn);
abort();
@@ -185,6 +185,19 @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
return ret;
}
+/*
+ * Return the ARMFPStatusFlavour to use based on element size and
+ * whether FPCR.AH is set.
+ */
+static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz)
+{
+ if (s->fpcr_ah) {
+ return esz == MO_16 ? FPST_AH_F16 : FPST_AH;
+ } else {
+ return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
+ }
+}
+
bool disas_sve(DisasContext *, uint32_t);
bool disas_sme(DisasContext *, uint32_t);
diff --git a/target/arm/tcg/translate-m-nocp.c b/target/arm/tcg/translate-m-nocp.c
index f564d06..b92773b 100644
--- a/target/arm/tcg/translate-m-nocp.c
+++ b/target/arm/tcg/translate-m-nocp.c
@@ -332,7 +332,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
if (dc_isar_feature(aa32_mve, s)) {
/* QC is only present for MVE; otherwise RES0 */
TCGv_i32 qc = tcg_temp_new_i32();
- tcg_gen_andi_i32(qc, tmp, FPCR_QC);
+ tcg_gen_andi_i32(qc, tmp, FPSR_QC);
/*
* The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
* here writing the same value into all elements is simplest.
@@ -340,11 +340,11 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
16, 16, qc);
}
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
- fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
+ fpscr = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(fpscr, fpscr, ~FPSR_NZCV_MASK);
tcg_gen_or_i32(fpscr, fpscr, tmp);
- store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
+ store_cpu_field_low32(fpscr, vfp.fpsr);
break;
}
case ARM_VFP_FPCXT_NS:
@@ -390,7 +390,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
tcg_gen_deposit_i32(control, control, sfpa,
R_V7M_CONTROL_SFPA_SHIFT, 1);
store_cpu_field(control, v7m.control[M_REG_S]);
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
gen_helper_vfp_set_fpscr(tcg_env, tmp);
s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
break;
@@ -457,7 +457,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
case ARM_VFP_FPSCR_NZCVQC:
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCVQC_MASK);
storefn(s, opaque, tmp, true);
break;
case QEMU_VFP_FPSCR_NZCV:
@@ -465,8 +465,8 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
* Read just NZCV; this is a special case to avoid the
* helper call for the "VMRS to CPSR.NZCV" insn.
*/
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ tmp = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
storefn(s, opaque, tmp, true);
break;
case ARM_VFP_FPCXT_S:
@@ -476,7 +476,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
tmp = tcg_temp_new_i32();
sfpa = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
@@ -529,7 +529,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
sfpa = tcg_temp_new_i32();
fpscr = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(fpscr, tcg_env);
- tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, fpscr, ~FPSR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 915c9e5..c4fecb8 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -148,6 +148,37 @@ static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
return true;
}
+static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
+ int data, gen_helper_gvec_4_ptr *fn_gvec)
+{
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+ return false;
+ }
+
+ /*
+ * UNDEF accesses to odd registers for each bit of Q.
+ * Q will be 0b111 for all Q-reg instructions, otherwise
+ * when we have mixed Q- and D-reg inputs.
+ */
+ if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ int opr_sz = q ? 16 : 8;
+ tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+ vfp_reg_offset(1, vn),
+ vfp_reg_offset(1, vm),
+ vfp_reg_offset(1, vd),
+ tcg_env,
+ opr_sz, opr_sz, data, fn_gvec);
+ return true;
+}
+
static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
int data, ARMFPStatusFlavour fp_flavour,
gen_helper_gvec_4_ptr *fn_gvec_ptr)
@@ -266,8 +297,8 @@ static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
if (!dc_isar_feature(aa32_bf16, s)) {
return false;
}
- return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_bfdot);
+ return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfdot);
}
static bool trans_VFML(DisasContext *s, arg_VFML *a)
@@ -360,8 +391,8 @@ static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
if (!dc_isar_feature(aa32_bf16, s)) {
return false;
}
- return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
- gen_helper_gvec_bfdot_idx);
+ return do_neon_ddda_env(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_bfdot_idx);
}
static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
@@ -1068,144 +1099,18 @@ DO_2SH(VRSHR_S, gen_gvec_srshr)
DO_2SH(VRSHR_U, gen_gvec_urshr)
DO_2SH(VRSRA_S, gen_gvec_srsra)
DO_2SH(VRSRA_U, gen_gvec_ursra)
-
-static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
-{
- /* Signed shift out of range results in all-sign-bits */
- a->shift = MIN(a->shift, (8 << a->size) - 1);
- return do_vector_2sh(s, a, tcg_gen_gvec_sari);
-}
-
-static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t oprsz, uint32_t maxsz)
-{
- tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
-}
-
-static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
-{
- /* Shift out of range is architecturally valid and results in zero. */
- if (a->shift >= (8 << a->size)) {
- return do_vector_2sh(s, a, gen_zero_rd_2sh);
- } else {
- return do_vector_2sh(s, a, tcg_gen_gvec_shri);
- }
-}
-
-static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
- NeonGenTwo64OpEnvFn *fn)
-{
- /*
- * 2-reg-and-shift operations, size == 3 case, where the
- * function needs to be passed tcg_env.
- */
- TCGv_i64 constimm;
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vm | a->vd) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- /*
- * To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- constimm = tcg_constant_i64(dup_const(a->size, a->shift));
-
- for (pass = 0; pass < a->q + 1; pass++) {
- TCGv_i64 tmp = tcg_temp_new_i64();
-
- read_neon_element64(tmp, a->vm, pass, MO_64);
- fn(tmp, tcg_env, tmp, constimm);
- write_neon_element64(tmp, a->vd, pass, MO_64);
- }
- return true;
-}
-
-static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
- NeonGenTwoOpEnvFn *fn)
-{
- /*
- * 2-reg-and-shift operations, size < 3 case, where the
- * helper needs to be passed tcg_env.
- */
- TCGv_i32 constimm, tmp;
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vm | a->vd) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- /*
- * To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- constimm = tcg_constant_i32(dup_const(a->size, a->shift));
- tmp = tcg_temp_new_i32();
-
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- read_neon_element32(tmp, a->vm, pass, MO_32);
- fn(tmp, tcg_env, tmp, constimm);
- write_neon_element32(tmp, a->vd, pass, MO_32);
- }
- return true;
-}
-
-#define DO_2SHIFT_ENV(INSN, FUNC) \
- static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
- { \
- return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
- } \
- static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
- { \
- static NeonGenTwoOpEnvFn * const fns[] = { \
- gen_helper_neon_##FUNC##8, \
- gen_helper_neon_##FUNC##16, \
- gen_helper_neon_##FUNC##32, \
- }; \
- assert(a->size < ARRAY_SIZE(fns)); \
- return do_2shift_env_32(s, a, fns[a->size]); \
- }
-
-DO_2SHIFT_ENV(VQSHLU, qshlu_s)
-DO_2SHIFT_ENV(VQSHL_U, qshl_u)
-DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+DO_2SH(VSHR_S, gen_gvec_sshr)
+DO_2SH(VSHR_U, gen_gvec_ushr)
+DO_2SH(VQSHLU, gen_neon_sqshlui)
+DO_2SH(VQSHL_U, gen_neon_uqshli)
+DO_2SH(VQSHL_S, gen_neon_sqshli)
static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
NeonGenTwo64OpFn *shiftfn,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
/* 2-reg-and-shift narrowing-shift operations, size == 3 case */
- TCGv_i64 constimm, rm1, rm2;
- TCGv_i32 rd;
+ TCGv_i64 constimm, rm1, rm2, rd;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -1232,7 +1137,7 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
constimm = tcg_constant_i64(-a->shift);
rm1 = tcg_temp_new_i64();
rm2 = tcg_temp_new_i64();
- rd = tcg_temp_new_i32();
+ rd = tcg_temp_new_i64();
/* Load both inputs first to avoid potential overwrite if rm == rd */
read_neon_element64(rm1, a->vm, 0, MO_64);
@@ -1240,18 +1145,18 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
shiftfn(rm1, rm1, constimm);
narrowfn(rd, tcg_env, rm1);
- write_neon_element32(rd, a->vd, 0, MO_32);
+ write_neon_element64(rd, a->vd, 0, MO_32);
shiftfn(rm2, rm2, constimm);
narrowfn(rd, tcg_env, rm2);
- write_neon_element32(rd, a->vd, 1, MO_32);
+ write_neon_element64(rd, a->vd, 1, MO_32);
return true;
}
static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
NeonGenTwoOpFn *shiftfn,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
/* 2-reg-and-shift narrowing-shift operations, size < 3 case */
TCGv_i32 constimm, rm1, rm2, rm3, rm4;
@@ -1306,16 +1211,16 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
- narrowfn(rm1, tcg_env, rtmp);
- write_neon_element32(rm1, a->vd, 0, MO_32);
+ narrowfn(rtmp, tcg_env, rtmp);
+ write_neon_element64(rtmp, a->vd, 0, MO_32);
shiftfn(rm3, rm3, constimm);
shiftfn(rm4, rm4, constimm);
tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
- narrowfn(rm3, tcg_env, rtmp);
- write_neon_element32(rm3, a->vd, 1, MO_32);
+ narrowfn(rtmp, tcg_env, rtmp);
+ write_neon_element64(rtmp, a->vd, 1, MO_32);
return true;
}
@@ -1330,17 +1235,17 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
}
-static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u32(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
- tcg_gen_extrl_i64_i32(dest, src);
+ tcg_gen_ext32u_i64(dest, src);
}
-static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u16(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
gen_helper_neon_narrow_u16(dest, src);
}
-static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u8(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
gen_helper_neon_narrow_u8(dest, src);
}
@@ -1504,13 +1409,13 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
-DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
-DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
+DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_rz_fs)
+DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_rz_fu)
DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
-DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
-DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
+DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_rz_hs)
+DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_rz_hu)
static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
GVecGen2iFn *fn)
@@ -1655,8 +1560,8 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NULL, NULL, \
}; \
static NeonGenTwo64OpFn * const addfn[] = { \
- gen_helper_neon_##OP##l_u16, \
- gen_helper_neon_##OP##l_u32, \
+ tcg_gen_vec_##OP##16_i64, \
+ tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \
NULL, \
}; \
@@ -1734,8 +1639,8 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \
static NeonGenTwo64OpFn * const addfn[] = { \
- gen_helper_neon_##OP##l_u16, \
- gen_helper_neon_##OP##l_u32, \
+ tcg_gen_vec_##OP##16_i64, \
+ tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \
NULL, \
}; \
@@ -1856,8 +1761,8 @@ static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
NULL,
};
static NeonGenTwo64OpFn * const addfn[] = {
- gen_helper_neon_addl_u16,
- gen_helper_neon_addl_u32,
+ tcg_gen_vec_add16_i64,
+ tcg_gen_vec_add32_i64,
tcg_gen_add_i64,
NULL,
};
@@ -1874,8 +1779,8 @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
NULL,
};
static NeonGenTwo64OpFn * const addfn[] = {
- gen_helper_neon_addl_u16,
- gen_helper_neon_addl_u32,
+ tcg_gen_vec_add16_i64,
+ tcg_gen_vec_add32_i64,
tcg_gen_add_i64,
NULL,
};
@@ -1935,8 +1840,8 @@ static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
NULL, \
}; \
static NeonGenTwo64OpFn * const accfn[] = { \
- gen_helper_neon_##ACC##l_u16, \
- gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_vec_##ACC##16_i64, \
+ tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \
NULL, \
}; \
@@ -2466,7 +2371,7 @@ static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
}; \
static NeonGenTwo64OpFn * const accfn[] = { \
NULL, \
- gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \
NULL, \
}; \
@@ -2660,204 +2565,6 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
return true;
}
-static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
-{
- int pass, half;
- TCGv_i32 tmp[2];
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (a->size == 3) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- tmp[0] = tcg_temp_new_i32();
- tmp[1] = tcg_temp_new_i32();
-
- for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
- for (half = 0; half < 2; half++) {
- read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
- switch (a->size) {
- case 0:
- tcg_gen_bswap32_i32(tmp[half], tmp[half]);
- break;
- case 1:
- gen_swap_half(tmp[half], tmp[half]);
- break;
- case 2:
- break;
- default:
- g_assert_not_reached();
- }
- }
- write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
- write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
- }
- return true;
-}
-
-static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
- NeonGenWidenFn *widenfn,
- NeonGenTwo64OpFn *opfn,
- NeonGenTwo64OpFn *accfn)
-{
- /*
- * Pairwise long operations: widen both halves of the pair,
- * combine the pairs with the opfn, and then possibly accumulate
- * into the destination with the accfn.
- */
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vd | a->vm) & a->q) {
- return false;
- }
-
- if (!widenfn) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- for (pass = 0; pass < a->q + 1; pass++) {
- TCGv_i32 tmp;
- TCGv_i64 rm0_64, rm1_64, rd_64;
-
- rm0_64 = tcg_temp_new_i64();
- rm1_64 = tcg_temp_new_i64();
- rd_64 = tcg_temp_new_i64();
-
- tmp = tcg_temp_new_i32();
- read_neon_element32(tmp, a->vm, pass * 2, MO_32);
- widenfn(rm0_64, tmp);
- read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
- widenfn(rm1_64, tmp);
-
- opfn(rd_64, rm0_64, rm1_64);
-
- if (accfn) {
- TCGv_i64 tmp64 = tcg_temp_new_i64();
- read_neon_element64(tmp64, a->vd, pass, MO_64);
- accfn(rd_64, tmp64, rd_64);
- }
- write_neon_element64(rd_64, a->vd, pass, MO_64);
- }
- return true;
-}
-
-static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
-{
- static NeonGenWidenFn * const widenfn[] = {
- gen_helper_neon_widen_s8,
- gen_helper_neon_widen_s16,
- tcg_gen_ext_i32_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const opfn[] = {
- gen_helper_neon_paddl_u16,
- gen_helper_neon_paddl_u32,
- tcg_gen_add_i64,
- NULL,
- };
-
- return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
-}
-
-static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
-{
- static NeonGenWidenFn * const widenfn[] = {
- gen_helper_neon_widen_u8,
- gen_helper_neon_widen_u16,
- tcg_gen_extu_i32_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const opfn[] = {
- gen_helper_neon_paddl_u16,
- gen_helper_neon_paddl_u32,
- tcg_gen_add_i64,
- NULL,
- };
-
- return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
-}
-
-static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
-{
- static NeonGenWidenFn * const widenfn[] = {
- gen_helper_neon_widen_s8,
- gen_helper_neon_widen_s16,
- tcg_gen_ext_i32_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const opfn[] = {
- gen_helper_neon_paddl_u16,
- gen_helper_neon_paddl_u32,
- tcg_gen_add_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const accfn[] = {
- gen_helper_neon_addl_u16,
- gen_helper_neon_addl_u32,
- tcg_gen_add_i64,
- NULL,
- };
-
- return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
- accfn[a->size]);
-}
-
-static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
-{
- static NeonGenWidenFn * const widenfn[] = {
- gen_helper_neon_widen_u8,
- gen_helper_neon_widen_u16,
- tcg_gen_extu_i32_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const opfn[] = {
- gen_helper_neon_paddl_u16,
- gen_helper_neon_paddl_u32,
- tcg_gen_add_i64,
- NULL,
- };
- static NeonGenTwo64OpFn * const accfn[] = {
- gen_helper_neon_addl_u16,
- gen_helper_neon_addl_u32,
- tcg_gen_add_i64,
- NULL,
- };
-
- return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
- accfn[a->size]);
-}
-
typedef void ZipFn(TCGv_ptr, TCGv_ptr);
static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
@@ -2931,10 +2638,9 @@ static bool trans_VZIP(DisasContext *s, arg_2misc *a)
}
static bool do_vmovn(DisasContext *s, arg_2misc *a,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
- TCGv_i64 rm;
- TCGv_i32 rd0, rd1;
+ TCGv_i64 rm, rd0, rd1;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -2959,22 +2665,22 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
}
rm = tcg_temp_new_i64();
- rd0 = tcg_temp_new_i32();
- rd1 = tcg_temp_new_i32();
+ rd0 = tcg_temp_new_i64();
+ rd1 = tcg_temp_new_i64();
read_neon_element64(rm, a->vm, 0, MO_64);
narrowfn(rd0, tcg_env, rm);
read_neon_element64(rm, a->vm, 1, MO_64);
narrowfn(rd1, tcg_env, rm);
- write_neon_element32(rd0, a->vd, 0, MO_32);
- write_neon_element32(rd1, a->vd, 1, MO_32);
+ write_neon_element64(rd0, a->vd, 0, MO_32);
+ write_neon_element64(rd1, a->vd, 1, MO_32);
return true;
}
#define DO_VMOVN(INSN, FUNC) \
static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
{ \
- static NeonGenNarrowEnvFn * const narrowfn[] = { \
+ static NeonGenOne64OpEnvFn * const narrowfn[] = { \
FUNC##8, \
FUNC##16, \
FUNC##32, \
@@ -3216,6 +2922,13 @@ DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
+DO_2MISC_VEC(VCLS, gen_gvec_cls)
+DO_2MISC_VEC(VCLZ, gen_gvec_clz)
+DO_2MISC_VEC(VREV64, gen_gvec_rev64)
+DO_2MISC_VEC(VPADDL_S, gen_gvec_saddlp)
+DO_2MISC_VEC(VPADDL_U, gen_gvec_uaddlp)
+DO_2MISC_VEC(VPADAL_S, gen_gvec_sadalp)
+DO_2MISC_VEC(VPADAL_U, gen_gvec_uadalp)
static bool trans_VMVN(DisasContext *s, arg_2misc *a)
{
@@ -3225,6 +2938,30 @@ static bool trans_VMVN(DisasContext *s, arg_2misc *a)
return do_2misc_vec(s, a, tcg_gen_gvec_not);
}
+static bool trans_VCNT(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_2misc_vec(s, a, gen_gvec_cnt);
+}
+
+static bool trans_VREV16(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_2misc_vec(s, a, gen_gvec_rev16);
+}
+
+static bool trans_VREV32(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0 && a->size != 1) {
+ return false;
+ }
+ return do_2misc_vec(s, a, gen_gvec_rev32);
+}
+
#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
uint32_t rm_ofs, uint32_t oprsz, \
@@ -3304,68 +3041,6 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
return true;
}
-static bool trans_VREV32(DisasContext *s, arg_2misc *a)
-{
- static NeonGenOneOpFn * const fn[] = {
- tcg_gen_bswap32_i32,
- gen_swap_half,
- NULL,
- NULL,
- };
- return do_2misc(s, a, fn[a->size]);
-}
-
-static bool trans_VREV16(DisasContext *s, arg_2misc *a)
-{
- if (a->size != 0) {
- return false;
- }
- return do_2misc(s, a, gen_rev16);
-}
-
-static bool trans_VCLS(DisasContext *s, arg_2misc *a)
-{
- static NeonGenOneOpFn * const fn[] = {
- gen_helper_neon_cls_s8,
- gen_helper_neon_cls_s16,
- gen_helper_neon_cls_s32,
- NULL,
- };
- return do_2misc(s, a, fn[a->size]);
-}
-
-static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
-{
- tcg_gen_clzi_i32(rd, rm, 32);
-}
-
-static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
-{
- static NeonGenOneOpFn * const fn[] = {
- gen_helper_neon_clz_u8,
- gen_helper_neon_clz_u16,
- do_VCLZ_32,
- NULL,
- };
- return do_2misc(s, a, fn[a->size]);
-}
-
-static bool trans_VCNT(DisasContext *s, arg_2misc *a)
-{
- if (a->size != 0) {
- return false;
- }
- return do_2misc(s, a, gen_helper_neon_cnt_u8);
-}
-
-static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- uint32_t oprsz, uint32_t maxsz)
-{
- tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
- vece == MO_16 ? 0x7fff : 0x7fffffff,
- oprsz, maxsz);
-}
-
static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
{
if (a->size == MO_16) {
@@ -3375,15 +3050,7 @@ static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) {
return false;
}
- return do_2misc_vec(s, a, gen_VABS_F);
-}
-
-static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- uint32_t oprsz, uint32_t maxsz)
-{
- tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
- vece == MO_16 ? 0x8000 : 0x80000000,
- oprsz, maxsz);
+ return do_2misc_vec(s, a, gen_gvec_fabs);
}
static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
@@ -3395,7 +3062,7 @@ static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) {
return false;
}
- return do_2misc_vec(s, a, gen_VNEG_F);
+ return do_2misc_vec(s, a, gen_gvec_fneg);
}
static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
@@ -3403,7 +3070,7 @@ static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
if (a->size != 2) {
return false;
}
- return do_2misc(s, a, gen_helper_recpe_u32);
+ return do_2misc_vec(s, a, gen_gvec_urecpe);
}
static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
@@ -3411,7 +3078,7 @@ static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
if (a->size != 2) {
return false;
}
- return do_2misc(s, a, gen_helper_rsqrte_u32);
+ return do_2misc_vec(s, a, gen_gvec_ursqrte);
}
#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
@@ -3699,8 +3366,8 @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
if (!dc_isar_feature(aa32_bf16, s)) {
return false;
}
- return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_bfmmla);
+ return do_neon_ddda_env(s, 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfmmla);
}
static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 46c7fce..fcbb350 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -49,7 +49,15 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
/* Prepare a power-of-two modulo via extraction of @len bits. */
len = ctz32(streaming_vec_reg_size(s)) - esz;
- if (vertical) {
+ if (!len) {
+ /*
+ * SVL is 128 and the element size is 128. There is exactly
+ * one 128x128 tile in the ZA storage, and so we calculate
+ * (Rs + imm) MOD 1, which is always 0. We need to special case
+ * this because TCG doesn't allow deposit ops with len 0.
+ */
+ tcg_gen_movi_i32(tmp, 0);
+ } else if (vertical) {
/*
* Compute the byte offset of the index within the tile:
* (index % (svl / size)) * size
@@ -304,6 +312,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
}
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+ ARMFPStatusFlavour e_fpst,
gen_helper_gvec_5_ptr *fn)
{
int svl = streaming_vec_reg_size(s);
@@ -319,18 +328,41 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(e_fpst);
fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
return true;
}
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
+static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
+ gen_helper_gvec_5_ptr *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, a->sub);
+ TCGv_ptr za, zn, zm, pn, pm;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ za = get_tile(s, esz, a->zad);
+ zn = vec_full_reg_ptr(s, a->zn);
+ zm = vec_full_reg_ptr(s, a->zm);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+
+ fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
+ MO_32, gen_helper_sme_fmopa_h)
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
+ MO_32, FPST_A64, gen_helper_sme_fmopa_s)
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
+ MO_64, FPST_A64, gen_helper_sme_fmopa_d)
-/* TODO: FEAT_EBF16 */
-TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 798ab2b..f3cf028 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -50,13 +50,27 @@ static int tszimm_esz(DisasContext *s, int x)
static int tszimm_shr(DisasContext *s, int x)
{
- return (16 << tszimm_esz(s, x)) - x;
+ /*
+ * We won't use the tszimm_shr() value if tszimm_esz() returns -1 (the
+ * trans function will check for esz < 0), so we can return any
+ * value we like from here in that case as long as we avoid UB.
+ */
+ int esz = tszimm_esz(s, x);
+ if (esz < 0) {
+ return esz;
+ }
+ return (16 << esz) - x;
}
/* See e.g. LSL (immediate, predicated). */
static int tszimm_shl(DisasContext *s, int x)
{
- return x - (8 << tszimm_esz(s, x));
+ /* As with tszimm_shr(), value will be unused if esz < 0 */
+ int esz = tszimm_esz(s, x);
+ if (esz < 0) {
+ return esz;
+ }
+ return x - (8 << esz);
}
/* The SH bit is in bit 8. Extract the low 8 and shift. */
@@ -123,11 +137,11 @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
return true;
}
-static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
- arg_rr_esz *a, int data)
+static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
+ arg_rr_esz *a, int data)
{
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ select_ah_fpst(s, a->esz));
}
/* Invoke an out-of-line helper on 3 Zregs. */
@@ -177,7 +191,14 @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
arg_rrr_esz *a, int data)
{
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
+static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+ arg_rrr_esz *a, int data)
+{
+ return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
+ select_ah_fpst(s, a->esz));
}
/* Invoke an out-of-line helper on 4 Zregs. */
@@ -238,6 +259,25 @@ static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
return ret;
}
+static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int ra,
+ int data)
+{
+ return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
+}
+
+static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ arg_rrrr_esz *a, int data)
+{
+ return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+}
+
+static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ arg_rrxr_esz *a)
+{
+ return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
+}
+
/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
int rd, int rn, int rm, int ra, int pg,
@@ -364,7 +404,7 @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
arg_rprr_esz *a)
{
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
/* Invoke a vector expander on two Zregs and an immediate. */
@@ -563,14 +603,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
- if (TCG_TARGET_HAS_bitsel_vec) {
- tcg_gen_not_vec(vece, n, n);
- tcg_gen_bitsel_vec(vece, d, k, n, m);
- } else {
- tcg_gen_andc_vec(vece, n, k, n);
- tcg_gen_andc_vec(vece, m, m, k);
- tcg_gen_or_vec(vece, d, n, m);
- }
+ tcg_gen_not_vec(vece, n, n);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
}
static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
@@ -595,7 +629,7 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
* = | ~(m | k)
*/
tcg_gen_and_i64(n, n, k);
- if (TCG_TARGET_HAS_orc_i64) {
+ if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) {
tcg_gen_or_i64(m, m, k);
tcg_gen_orc_i64(d, n, m);
} else {
@@ -607,14 +641,8 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
- if (TCG_TARGET_HAS_bitsel_vec) {
- tcg_gen_not_vec(vece, m, m);
- tcg_gen_bitsel_vec(vece, d, k, n, m);
- } else {
- tcg_gen_and_vec(vece, n, n, k);
- tcg_gen_or_vec(vece, m, m, k);
- tcg_gen_orc_vec(vece, d, n, m);
- }
+ tcg_gen_not_vec(vece, m, m);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
}
static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
@@ -755,13 +783,23 @@ static gen_helper_gvec_3 * const fabs_fns[4] = {
NULL, gen_helper_sve_fabs_h,
gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
};
-TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
+static gen_helper_gvec_3 * const fabs_ah_fns[4] = {
+ NULL, gen_helper_sve_ah_fabs_h,
+ gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d,
+};
+TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz,
+ s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0)
static gen_helper_gvec_3 * const fneg_fns[4] = {
NULL, gen_helper_sve_fneg_h,
gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
};
-TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
+static gen_helper_gvec_3 * const fneg_ah_fns[4] = {
+ NULL, gen_helper_sve_ah_fneg_h,
+ gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d,
+};
+TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz,
+ s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0)
static gen_helper_gvec_3 * const sxtb_fns[4] = {
NULL, gen_helper_sve_sxtb_h,
@@ -1200,14 +1238,14 @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
};
TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
- fexpa_fns[a->esz], a->rd, a->rn, 0)
+ fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah)
static gen_helper_gvec_3 * const ftssel_fns[4] = {
NULL, gen_helper_sve_ftssel_h,
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
};
TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
- ftssel_fns[a->esz], a, 0)
+ ftssel_fns[a->esz], a, s->fpcr_ah)
/*
*** SVE Predicate Logical Operations Group
@@ -3486,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
*** SVE Floating Point Multiply-Add Indexed Group
*/
-static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
-{
- static gen_helper_gvec_4_ptr * const fns[4] = {
- NULL,
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
- };
- return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
- (a->index << 1) | sub,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
-}
+static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
+ NULL, gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
+};
+TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
-TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
+static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
+ { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
+ { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
+};
+TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmls_idx_fns[a->esz][s->fpcr_ah],
+ a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Multiply Indexed Group
@@ -3512,7 +3553,7 @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
};
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Fast Reduction Group
@@ -3545,7 +3586,7 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
fn(temp, t_zn, t_pg, status, t_desc);
@@ -3560,11 +3601,23 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
}; \
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
+#define DO_VPZ_AH(NAME, name) \
+ static gen_helper_fp_reduce * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ static gen_helper_fp_reduce * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_ah_##name##_h, \
+ gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
+
DO_VPZ(FADDV, faddv)
DO_VPZ(FMINNMV, fminnmv)
DO_VPZ(FMAXNMV, fmaxnmv)
-DO_VPZ(FMINV, fminv)
-DO_VPZ(FMAXV, fmaxv)
+DO_VPZ_AH(FMINV, fminv)
+DO_VPZ_AH(FMAXV, fmaxv)
#undef DO_VPZ
@@ -3576,13 +3629,25 @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
NULL, gen_helper_gvec_frecpe_h,
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
};
-TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
+static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = {
+ NULL, gen_helper_gvec_frecpe_h,
+ gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d,
+};
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
+ frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0)
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
NULL, gen_helper_gvec_frsqrte_h,
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
};
-TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
+static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = {
+ NULL, gen_helper_gvec_frsqrte_h,
+ gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d,
+};
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
+ s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
+ frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0)
/*
*** SVE Floating Point Compare with Zero Group
@@ -3597,7 +3662,7 @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr status =
- fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
@@ -3632,8 +3697,9 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
};
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm,
+ a->imm | (s->fpcr_ah << 3),
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Accumulating Reduction Group
@@ -3666,7 +3732,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
t_pg = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
@@ -3686,11 +3752,23 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
}; \
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
+#define DO_FP3_AH(NAME, name) \
+ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
+ NULL, gen_helper_gvec_##name##_h, \
+ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
+ }; \
+ static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_gvec_ah_##name##_h, \
+ gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0)
+
DO_FP3(FADD_zzz, fadd)
DO_FP3(FSUB_zzz, fsub)
DO_FP3(FMUL_zzz, fmul)
-DO_FP3(FRECPS, recps)
-DO_FP3(FRSQRTS, rsqrts)
+DO_FP3_AH(FRECPS, recps)
+DO_FP3_AH(FRSQRTS, rsqrts)
#undef DO_FP3
@@ -3712,14 +3790,27 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
}; \
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
+#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
+ NULL, gen_helper_##name##_h, \
+ gen_helper_##name##_s, gen_helper_##name##_d \
+ }; \
+ static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
+ NULL, gen_helper_##ah_name##_h, \
+ gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
+ s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
+ name##_zpzz_fns[a->esz], a)
+
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
-DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
-DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
+DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
+DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
-DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
+DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
@@ -3741,7 +3832,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
- status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+ status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fn(t_zd, t_zn, t_pg, scalar, status, desc);
}
@@ -3774,14 +3865,35 @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
name##_const[a->esz][a->imm], name##_fns[a->esz])
+#define DO_FP_AH_IMM(NAME, name, const0, const1) \
+ static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, \
+ gen_helper_sve_##name##_d \
+ }; \
+ static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_ah_##name##_h, \
+ gen_helper_sve_ah_##name##_s, \
+ gen_helper_sve_ah_##name##_d \
+ }; \
+ static uint64_t const name##_const[4][2] = { \
+ { -1, -1 }, \
+ { float16_##const0, float16_##const1 }, \
+ { float32_##const0, float32_##const1 }, \
+ { float64_##const0, float64_##const1 }, \
+ }; \
+ TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
+ name##_const[a->esz][a->imm], \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
+
DO_FP_IMM(FADD, fadds, half, one)
DO_FP_IMM(FSUB, fsubs, half, one)
DO_FP_IMM(FMUL, fmuls, half, two)
DO_FP_IMM(FSUBR, fsubrs, half, one)
DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
DO_FP_IMM(FMINNM, fminnms, zero, one)
-DO_FP_IMM(FMAX, fmaxs, zero, one)
-DO_FP_IMM(FMIN, fmins, zero, one)
+DO_FP_AH_IMM(FMAX, fmaxs, zero, one)
+DO_FP_AH_IMM(FMIN, fmins, zero, one)
#undef DO_FP_IMM
@@ -3793,7 +3905,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
}
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm),
@@ -3825,22 +3937,28 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
};
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
- a->rd, a->rn, a->rm, a->pg, a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-#define DO_FMLA(NAME, name) \
+#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
NULL, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
#undef DO_FMLA
@@ -3849,67 +3967,68 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
};
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
- a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
};
TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Unary Operations Predicated Group
*/
TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
+ gen_helper_sve_bfcvt, a, 0,
+ s->fpcr_ah ? FPST_AH : FPST_A64)
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
+ gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64)
static gen_helper_gvec_3_ptr * const frint_fns[] = {
NULL,
@@ -3918,7 +4037,7 @@ static gen_helper_gvec_3_ptr * const frint_fns[] = {
gen_helper_sve_frint_d
};
TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static gen_helper_gvec_3_ptr * const frintx_fns[] = {
NULL,
@@ -3927,7 +4046,7 @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = {
gen_helper_sve_frintx_d
};
TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
@@ -3944,7 +4063,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
}
vsz = vec_full_reg_size(s);
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tmode = gen_set_rmode(mode, status);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
@@ -3972,48 +4091,48 @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
};
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a, 0, select_ah_fpst(s, a->esz))
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
NULL, gen_helper_sve_fsqrt_h,
gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
};
TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
+ gen_helper_sve_scvt_ss, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
+ gen_helper_sve_scvt_ds, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
+ gen_helper_sve_scvt_sd, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
+ gen_helper_sve_scvt_dd, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
+ gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
+ gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
+ gen_helper_sve_ucvt_ds, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
+ gen_helper_sve_ucvt_sd, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
+ gen_helper_sve_ucvt_dd, a, 0, FPST_A64)
/*
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
@@ -6048,9 +6167,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
@@ -6105,18 +6224,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
}
} else {
if (shl == 0) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(0, halfbits)));
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
@@ -6284,18 +6403,14 @@ static const TCGOpcode sqxtn_list[] = {
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, d, d, t);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
}
static const GVecGen2 sqxtnb_ops[3] = {
@@ -6316,19 +6431,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const GVecGen2 sqxtnt_ops[3] = {
@@ -6356,12 +6467,10 @@ static const TCGOpcode uqxtn_list[] = {
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 uqxtnb_ops[3] = {
@@ -6382,14 +6491,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 uqxtnt_ops[3] = {
@@ -6417,14 +6525,11 @@ static const TCGOpcode sqxtun_list[] = {
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 sqxtunb_ops[3] = {
@@ -6445,16 +6550,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 sqxtunt_ops[3] = {
@@ -6518,13 +6621,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
@@ -6576,13 +6677,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
@@ -6625,14 +6724,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode sqshrunb_vec_list[] = {
@@ -6657,16 +6754,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode sqshrunt_vec_list[] = {
@@ -6709,18 +6805,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode sqshrnb_vec_list[] = {
@@ -6745,19 +6838,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode sqshrnt_vec_list[] = {
@@ -6800,12 +6890,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode uqshrnb_vec_list[] = {
@@ -6830,14 +6919,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode uqshrnt_vec_list[] = {
@@ -6925,10 +7014,10 @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_FPCR)
+ 0, FPST_A64)
TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_FPCR)
+ 0, FPST_A64)
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
@@ -7044,17 +7133,18 @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
gen_gvec_rax1, a)
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
+ gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64)
TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
+ gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
+ gen_helper_sve_bfcvtnt, a, 0,
+ s->fpcr_ah ? FPST_AH : FPST_A64)
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
+ gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
+ gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
@@ -7066,7 +7156,7 @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = {
gen_helper_flogb_s, gen_helper_flogb_d
};
TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
{
@@ -7099,18 +7189,19 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
gen_helper_gvec_ummla_b, a, 0)
-TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
gen_helper_gvec_bfdot, a, 0)
-TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
+TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz,
gen_helper_gvec_bfdot_idx, a)
-TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
gen_helper_gvec_bfmmla, a, 0)
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
{
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
- a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
+ a->rd, a->rn, a->rm, a->ra, sel,
+ s->fpcr_ah ? FPST_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
@@ -7120,7 +7211,8 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
{
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
a->rd, a->rn, a->rm, a->ra,
- (a->index << 1) | sel, FPST_FPCR);
+ (a->index << 1) | sel,
+ s->fpcr_ah ? FPST_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
index 39ec971..8d9d1ab 100644
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@@ -460,9 +460,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
}
if (sz == 1) {
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
} else {
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
}
tcg_rmode = gen_set_rmode(rounding, fpst);
@@ -527,9 +527,9 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
}
if (sz == 1) {
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
} else {
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
}
tcg_shift = tcg_constant_i32(0);
@@ -833,8 +833,8 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
break;
case ARM_VFP_FPSCR:
if (a->rt == 15) {
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ tmp = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
} else {
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
@@ -1398,7 +1398,7 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
f0 = tcg_temp_new_i32();
f1 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
vfp_load_reg32(f0, vn);
vfp_load_reg32(f1, vm);
@@ -1433,7 +1433,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
/*
* Do a half-precision operation. Functionally this is
* the same as do_vfp_3op_sp(), except:
- * - it uses the FPST_FPCR_F16
+ * - it uses the FPST_A32_F16
* - it doesn't need the VFP vector handling (fp16 is a
* v8 feature, and in v8 VFP vectors don't exist)
* - it does the aa32_fp16_arith feature test
@@ -1456,7 +1456,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
f0 = tcg_temp_new_i32();
f1 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
vfp_load_reg16(f0, vn);
vfp_load_reg16(f1, vm);
@@ -1517,7 +1517,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
f0 = tcg_temp_new_i64();
f1 = tcg_temp_new_i64();
fd = tcg_temp_new_i64();
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
vfp_load_reg64(f0, vn);
vfp_load_reg64(f1, vm);
@@ -2122,7 +2122,7 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negh(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
vfp_store_reg32(vd, a->vd);
return true;
@@ -2181,7 +2181,7 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negs(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
vfp_store_reg32(vd, a->vd);
return true;
@@ -2190,8 +2190,8 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
{
/*
- * VFNMA : fd = muladd(-fd, fn, fm)
- * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFNMA : fd = muladd(-fd, -fn, fm)
+ * VFNMS : fd = muladd(-fd, fn, fm)
* VFMA : fd = muladd( fd, fn, fm)
* VFMS : fd = muladd( fd, -fn, fm)
*
@@ -2246,7 +2246,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negd(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
vfp_store_reg64(vd, a->vd);
return true;
@@ -2262,8 +2262,8 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
#define MAKE_VFM_TRANS_FNS(PREC) \
MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
- MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
- MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
+ MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, false, true) \
+ MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, true, true)
MAKE_VFM_TRANS_FNS(hp)
MAKE_VFM_TRANS_FNS(sp)
@@ -2424,17 +2424,17 @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{
- gen_helper_vfp_sqrth(vd, vm, tcg_env);
+ gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16));
}
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
{
- gen_helper_vfp_sqrts(vd, vm, tcg_env);
+ gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32));
}
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
{
- gen_helper_vfp_sqrtd(vd, vm, tcg_env);
+ gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32));
}
DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
@@ -2565,7 +2565,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
/* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2599,7 +2599,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
/* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2623,7 +2623,7 @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
@@ -2646,7 +2646,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
@@ -2680,7 +2680,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
@@ -2706,7 +2706,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_rinth(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2727,7 +2727,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rints(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2757,7 +2757,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rintd(tmp, tmp, fpst);
vfp_store_reg64(tmp, a->vd);
return true;
@@ -2779,7 +2779,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rinth(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2803,7 +2803,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rints(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2836,7 +2836,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rintd(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2859,7 +2859,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2880,7 +2880,7 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rints_exact(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2910,7 +2910,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rintd_exact(tmp, tmp, fpst);
vfp_store_reg64(tmp, a->vd);
return true;
@@ -2937,7 +2937,7 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
vfp_load_reg32(vm, a->vm);
- gen_helper_vfp_fcvtds(vd, vm, tcg_env);
+ gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32));
vfp_store_reg64(vd, a->vd);
return true;
}
@@ -2963,7 +2963,7 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
vfp_load_reg64(vm, a->vm);
- gen_helper_vfp_fcvtsd(vd, vm, tcg_env);
+ gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32));
vfp_store_reg32(vd, a->vd);
return true;
}
@@ -2983,7 +2983,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
if (a->s) {
/* i32 -> f16 */
gen_helper_vfp_sitoh(vm, vm, fpst);
@@ -3010,7 +3010,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
if (a->s) {
/* i32 -> f32 */
gen_helper_vfp_sitos(vm, vm, fpst);
@@ -3044,7 +3044,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
if (a->s) {
/* i32 -> f64 */
gen_helper_vfp_sitod(vd, vm, fpst);
@@ -3105,7 +3105,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
vd = tcg_temp_new_i32();
vfp_load_reg32(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3161,7 +3161,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
vd = tcg_temp_new_i32();
vfp_load_reg32(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3223,7 +3223,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
vd = tcg_temp_new_i64();
vfp_load_reg64(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3273,7 +3273,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_F16);
+ fpst = fpstatus_ptr(FPST_A32_F16);
vm = tcg_temp_new_i32();
vfp_load_reg16(vm, a->vm);
@@ -3307,7 +3307,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
@@ -3347,7 +3347,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ fpst = fpstatus_ptr(FPST_A32);
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
vfp_load_reg64(vm, a->vm);
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index c5bc691..f7d6d8c 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -27,6 +27,7 @@
#include "semihosting/semihost.h"
#include "cpregs.h"
#include "exec/helper-proto.h"
+#include "exec/target_page.h"
#define HELPER_H "helper.h"
#include "exec/helper-info.c.inc"
@@ -228,6 +229,9 @@ static inline int get_a32_user_mem_index(DisasContext *s)
*/
switch (s->mmu_idx) {
case ARMMMUIdx_E3:
+ case ARMMMUIdx_E30_0:
+ case ARMMMUIdx_E30_3_PAN:
+ return arm_to_core_mmu_idx(ARMMMUIdx_E30_0);
case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
case ARMMMUIdx_E10_0:
case ARMMMUIdx_E10_1:
@@ -368,7 +372,7 @@ static void gen_rebuild_hflags(DisasContext *s, bool new_el)
}
}
-static void gen_exception_internal(int excp)
+void gen_exception_internal(int excp)
{
assert(excp_is_internal(excp));
gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
@@ -490,20 +494,9 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
{
TCGv_i32 tmp = tcg_temp_new_i32();
- if (TCG_TARGET_HAS_add2_i32) {
- tcg_gen_movi_i32(tmp, 0);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
- } else {
- TCGv_i64 q0 = tcg_temp_new_i64();
- TCGv_i64 q1 = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(q0, t0);
- tcg_gen_extu_i32_i64(q1, t1);
- tcg_gen_add_i64(q0, q0, q1);
- tcg_gen_extu_i32_i64(q1, cpu_CF);
- tcg_gen_add_i64(q0, q0, q1);
- tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
- }
+
+ tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0, t1, cpu_CF);
+
tcg_gen_mov_i32(cpu_ZF, cpu_NF);
tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
tcg_gen_xor_i32(tmp, t0, t1);
@@ -3507,7 +3500,7 @@ static int t32_expandimm_rot(DisasContext *s, int x)
/* Return the unrotated immediate from T32ExpandImm. */
static int t32_expandimm_imm(DisasContext *s, int x)
{
- int imm = extract32(x, 0, 8);
+ uint32_t imm = extract32(x, 0, 8);
switch (extract32(x, 8, 4)) {
case 0: /* XY */
@@ -4938,7 +4931,7 @@ static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
}
static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
- TCGv_i32 addr, int address_offset)
+ TCGv_i32 addr)
{
if (!a->p) {
TCGv_i32 ofs = load_reg(s, a->rm);
@@ -4951,7 +4944,6 @@ static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
} else if (!a->w) {
return;
}
- tcg_gen_addi_i32(addr, addr, address_offset);
store_reg(s, a->rn, addr);
}
@@ -4971,7 +4963,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
* Perform base writeback before the loaded value to
* ensure correct behavior with overlapping index registers.
*/
- op_addr_rr_post(s, a, addr, 0);
+ op_addr_rr_post(s, a, addr);
store_reg_from_load(s, a->rt, tmp);
return true;
}
@@ -4996,14 +4988,53 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
disas_set_da_iss(s, mop, issinfo);
- op_addr_rr_post(s, a, addr, 0);
+ op_addr_rr_post(s, a, addr);
return true;
}
-static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
+static void do_ldrd_load(DisasContext *s, TCGv_i32 addr, int rt, int rt2)
{
+ /*
+ * LDRD is required to be an atomic 64-bit access if the
+ * address is 8-aligned, two atomic 32-bit accesses if
+ * it's only 4-aligned, and to give an alignment fault
+ * if it's not 4-aligned. This is MO_ALIGN_4 | MO_ATOM_SUBALIGN.
+ * Rt is always the word from the lower address, and Rt2 the
+ * data from the higher address, regardless of endianness.
+ * So (like gen_load_exclusive) we avoid gen_aa32_ld_i64()
+ * so we don't get its SCTLR_B check, and instead do a 64-bit access
+ * using MO_BE if appropriate and then split the two halves.
+ *
+ * For M-profile, and for A-profile before LPAE, the 64-bit
+ * atomicity is not required. We could model that using
+ * the looser MO_ATOM_IFALIGN_PAIR, but providing a higher
+ * level of atomicity than required is harmless (we would not
+ * currently generate better code for IFALIGN_PAIR here).
+ *
+ * This also gives us the correct behaviour of not updating
+ * rt if the load of rt2 faults; this is required for cases
+ * like "ldrd r2, r3, [r2]" where rt is also the base register.
+ */
int mem_idx = get_mem_index(s);
- TCGv_i32 addr, tmp;
+ MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data;
+ TCGv taddr = gen_aa32_addr(s, addr, opc);
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ TCGv_i32 tmp2 = tcg_temp_new_i32();
+
+ tcg_gen_qemu_ld_i64(t64, taddr, mem_idx, opc);
+ if (s->be_data == MO_BE) {
+ tcg_gen_extr_i64_i32(tmp2, tmp, t64);
+ } else {
+ tcg_gen_extr_i64_i32(tmp, tmp2, t64);
+ }
+ store_reg(s, rt, tmp);
+ store_reg(s, rt2, tmp2);
+}
+
+static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+ TCGv_i32 addr;
if (!ENABLE_ARCH_5TE) {
return false;
@@ -5014,25 +5045,49 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
}
addr = op_addr_rr_pre(s, a);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
- store_reg(s, a->rt, tmp);
-
- tcg_gen_addi_i32(addr, addr, 4);
-
- tmp = tcg_temp_new_i32();
- gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
- store_reg(s, a->rt + 1, tmp);
+ do_ldrd_load(s, addr, a->rt, a->rt + 1);
/* LDRD w/ base writeback is undefined if the registers overlap. */
- op_addr_rr_post(s, a, addr, -4);
+ op_addr_rr_post(s, a, addr);
return true;
}
-static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
+static void do_strd_store(DisasContext *s, TCGv_i32 addr, int rt, int rt2)
{
+ /*
+ * STRD is required to be an atomic 64-bit access if the
+ * address is 8-aligned, two atomic 32-bit accesses if
+ * it's only 4-aligned, and to give an alignment fault
+ * if it's not 4-aligned.
+ * Rt is always the word from the lower address, and Rt2 the
+ * data from the higher address, regardless of endianness.
+ * So (like gen_store_exclusive) we avoid gen_aa32_ld_i64()
+ * so we don't get its SCTLR_B check, and instead do a 64-bit access
+ * using MO_BE if appropriate, using a value constructed
+ * by putting the two halves together in the right order.
+ *
+ * As with LDRD, the 64-bit atomicity is not required for
+ * M-profile, or for A-profile before LPAE, and we provide
+ * the higher guarantee always for simplicity.
+ */
int mem_idx = get_mem_index(s);
- TCGv_i32 addr, tmp;
+ MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data;
+ TCGv taddr = gen_aa32_addr(s, addr, opc);
+ TCGv_i32 t1 = load_reg(s, rt);
+ TCGv_i32 t2 = load_reg(s, rt2);
+ TCGv_i64 t64 = tcg_temp_new_i64();
+
+ if (s->be_data == MO_BE) {
+ tcg_gen_concat_i32_i64(t64, t2, t1);
+ } else {
+ tcg_gen_concat_i32_i64(t64, t1, t2);
+ }
+ tcg_gen_qemu_st_i64(t64, taddr, mem_idx, opc);
+}
+
+static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+ TCGv_i32 addr;
if (!ENABLE_ARCH_5TE) {
return false;
@@ -5043,15 +5098,9 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
}
addr = op_addr_rr_pre(s, a);
- tmp = load_reg(s, a->rt);
- gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ do_strd_store(s, addr, a->rt, a->rt + 1);
- tcg_gen_addi_i32(addr, addr, 4);
-
- tmp = load_reg(s, a->rt + 1);
- gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
- op_addr_rr_post(s, a, addr, -4);
+ op_addr_rr_post(s, a, addr);
return true;
}
@@ -5087,13 +5136,14 @@ static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
}
static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
- TCGv_i32 addr, int address_offset)
+ TCGv_i32 addr)
{
+ int address_offset = 0;
if (!a->p) {
if (a->u) {
- address_offset += a->imm;
+ address_offset = a->imm;
} else {
- address_offset -= a->imm;
+ address_offset = -a->imm;
}
} else if (!a->w) {
return;
@@ -5118,7 +5168,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
* Perform base writeback before the loaded value to
* ensure correct behavior with overlapping index registers.
*/
- op_addr_ri_post(s, a, addr, 0);
+ op_addr_ri_post(s, a, addr);
store_reg_from_load(s, a->rt, tmp);
return true;
}
@@ -5143,29 +5193,20 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
disas_set_da_iss(s, mop, issinfo);
- op_addr_ri_post(s, a, addr, 0);
+ op_addr_ri_post(s, a, addr);
return true;
}
static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
{
- int mem_idx = get_mem_index(s);
- TCGv_i32 addr, tmp;
+ TCGv_i32 addr;
addr = op_addr_ri_pre(s, a);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
- store_reg(s, a->rt, tmp);
-
- tcg_gen_addi_i32(addr, addr, 4);
-
- tmp = tcg_temp_new_i32();
- gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
- store_reg(s, rt2, tmp);
+ do_ldrd_load(s, addr, a->rt, rt2);
/* LDRD w/ base writeback is undefined if the registers overlap. */
- op_addr_ri_post(s, a, addr, -4);
+ op_addr_ri_post(s, a, addr);
return true;
}
@@ -5188,20 +5229,13 @@ static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
{
- int mem_idx = get_mem_index(s);
- TCGv_i32 addr, tmp;
+ TCGv_i32 addr;
addr = op_addr_ri_pre(s, a);
- tmp = load_reg(s, a->rt);
- gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
- tcg_gen_addi_i32(addr, addr, 4);
-
- tmp = load_reg(s, rt2);
- gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ do_strd_store(s, addr, a->rt, rt2);
- op_addr_ri_post(s, a, addr, -4);
+ op_addr_ri_post(s, a, addr);
return true;
}
@@ -7726,7 +7760,8 @@ static bool arm_check_ss_active(DisasContext *dc)
static void arm_post_translate_insn(DisasContext *dc)
{
- if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
+ if (dc->condjmp &&
+ (dc->base.is_jmp == DISAS_NEXT || dc->base.is_jmp == DISAS_TOO_MANY)) {
if (dc->pc_save != dc->condlabel.pc_save) {
gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
}
@@ -7756,7 +7791,7 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
* be possible after an indirect branch, at the start of the TB.
*/
assert(dc->base.num_insns == 1);
- gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
+ gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc));
dc->base.is_jmp = DISAS_NORETURN;
dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
return;
@@ -8090,9 +8125,8 @@ static const TranslatorOps thumb_translator_ops = {
.tb_stop = arm_tr_tb_stop,
};
-/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc = { };
const TranslatorOps *ops = &arm_translator_ops;
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index aba21f7..0004a97 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -4,8 +4,8 @@
#include "cpu.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
-#include "exec/exec-all.h"
#include "exec/translator.h"
+#include "exec/translation-block.h"
#include "exec/helper-gen.h"
#include "internals.h"
#include "cpu-features.h"
@@ -91,15 +91,19 @@ typedef struct DisasContext {
bool aarch64;
bool thumb;
bool lse2;
- /* Because unallocated encodings generate different exception syndrome
+ /*
+ * Because unallocated encodings generate different exception syndrome
* information from traps due to FP being disabled, we can't do a single
* "is fp access disabled" check at a high level in the decode tree.
* To help in catching bugs where the access check was forgotten in some
* code path, we set this flag when the access check is done, and assert
* that it is set at the point where we actually touch the FP regs.
+ * 0: not checked,
+ * 1: checked, access ok
+ * -1: checked, access denied
*/
- bool fp_access_checked;
- bool sve_access_checked;
+ int8_t fp_access_checked;
+ int8_t sve_access_checked;
/* ARMv8 single-step state (this is distinct from the QEMU gdbstub
* single-step support).
*/
@@ -154,6 +158,10 @@ typedef struct DisasContext {
bool nv2_mem_e20;
/* True if NV2 enabled and NV2 RAM accesses are big-endian */
bool nv2_mem_be;
+ /* True if FPCR.AH is 1 (alternate floating point handling) */
+ bool fpcr_ah;
+ /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */
+ bool fpcr_nep;
/*
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
* < 0, set by the current instruction.
@@ -163,8 +171,6 @@ typedef struct DisasContext {
uint8_t dcz_blocksize;
/* A copy of cpu->gm_blocksize. */
uint8_t gm_blocksize;
- /* True if this page is guarded. */
- bool guarded_page;
/* True if the current insn_start has been updated. */
bool insn_start_updated;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */
@@ -341,6 +347,7 @@ void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
void arm_gen_test_cc(int cc, TCGLabel *label);
MemOp pow2_align(unsigned i);
void unallocated_encoding(DisasContext *s);
+void gen_exception_internal(int excp);
void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
uint32_t syn, uint32_t target_el);
void gen_exception_insn(DisasContext *s, target_long pc_diff,
@@ -351,8 +358,7 @@ static inline TCGv_i32 get_ahp_flag(void)
{
TCGv_i32 ret = tcg_temp_new_i32();
- tcg_gen_ld_i32(ret, tcg_env,
- offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
+ tcg_gen_ld_i32(ret, tcg_env, offsetoflow32(CPUARMState, vfp.fpcr));
tcg_gen_extract_i32(ret, ret, 26, 1);
return ret;
@@ -472,6 +478,13 @@ void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+
void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
@@ -515,6 +528,11 @@ void gen_sqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b);
void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
@@ -569,6 +587,41 @@ void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+
+/* These exclusively manipulate the sign bit. */
+void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz);
+void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz);
+
+void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
@@ -594,13 +647,13 @@ typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
-typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64);
+typedef void NeonGenOne64OpEnvFn(TCGv_i64, TCGv_env, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
@@ -621,54 +674,18 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
}
-/*
- * Enum for argument to fpstatus_ptr().
- */
-typedef enum ARMFPStatusFlavour {
- FPST_FPCR,
- FPST_FPCR_F16,
- FPST_STD,
- FPST_STD_F16,
-} ARMFPStatusFlavour;
-
/**
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
*
* We have multiple softfloat float_status fields in the Arm CPU state struct
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
* been set up to point to the requested field in the CPU state struct.
- * The options are:
- *
- * FPST_FPCR
- * for non-FP16 operations controlled by the FPCR
- * FPST_FPCR_F16
- * for operations controlled by the FPCR where FPCR.FZ16 is to be used
- * FPST_STD
- * for A32/T32 Neon operations using the "standard FPSCR value"
- * FPST_STD_F16
- * as FPST_STD, but where FPCR.FZ16 is to be used
*/
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
- int offset;
-
- switch (flavour) {
- case FPST_FPCR:
- offset = offsetof(CPUARMState, vfp.fp_status);
- break;
- case FPST_FPCR_F16:
- offset = offsetof(CPUARMState, vfp.fp_status_f16);
- break;
- case FPST_STD:
- offset = offsetof(CPUARMState, vfp.standard_fp_status);
- break;
- case FPST_STD_F16:
- offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
- break;
- default:
- g_assert_not_reached();
- }
+ int offset = offsetof(CPUARMState, vfp.fp_status[flavour]);
+
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
return statusptr;
}
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index b05922b..986eaf8 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -317,10 +317,12 @@ void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm,
intptr_t i, j, opr_sz = simd_oprsz(desc);
int idx = simd_data(desc);
int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ intptr_t elements = opr_sz / 2;
+ intptr_t eltspersegment = MIN(16 / 2, elements);
- for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ for (i = 0; i < elements; i += 16 / 2) {
int16_t mm = m[i];
- for (j = 0; j < 16 / 2; ++j) {
+ for (j = 0; j < eltspersegment; ++j) {
d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq);
}
}
@@ -333,16 +335,54 @@ void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm,
intptr_t i, j, opr_sz = simd_oprsz(desc);
int idx = simd_data(desc);
int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ intptr_t elements = opr_sz / 2;
+ intptr_t eltspersegment = MIN(16 / 2, elements);
- for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ for (i = 0; i < elements; i += 16 / 2) {
int16_t mm = m[i];
- for (j = 0; j < 16 / 2; ++j) {
+ for (j = 0; j < eltspersegment; ++j) {
d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+void HELPER(neon_sqrdmlah_idx_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ intptr_t elements = opr_sz / 2;
+ intptr_t eltspersegment = MIN(16 / 2, elements);
+
+ for (i = 0; i < elements; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < eltspersegment; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], false, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmlsh_idx_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ intptr_t elements = opr_sz / 2;
+ intptr_t eltspersegment = MIN(16 / 2, elements);
+
+ for (i = 0; i < elements; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < eltspersegment; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], true, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm,
void *va, uint32_t desc)
{
@@ -512,10 +552,12 @@ void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm,
intptr_t i, j, opr_sz = simd_oprsz(desc);
int idx = simd_data(desc);
int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
- for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ for (i = 0; i < elements; i += 16 / 4) {
int32_t mm = m[i];
- for (j = 0; j < 16 / 4; ++j) {
+ for (j = 0; j < eltspersegment; ++j) {
d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq);
}
}
@@ -528,16 +570,54 @@ void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm,
intptr_t i, j, opr_sz = simd_oprsz(desc);
int idx = simd_data(desc);
int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
- for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ for (i = 0; i < elements; i += 16 / 4) {
int32_t mm = m[i];
- for (j = 0; j < 16 / 4; ++j) {
+ for (j = 0; j < eltspersegment; ++j) {
d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+void HELPER(neon_sqrdmlah_idx_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
+
+ for (i = 0; i < elements; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < eltspersegment; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], false, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmlsh_idx_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
+
+ for (i = 0; i < elements; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < eltspersegment; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], true, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm,
void *va, uint32_t desc)
{
@@ -756,6 +836,13 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
{ \
intptr_t i = 0, opr_sz = simd_oprsz(desc); \
intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \
+ /* \
+ * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \
+ * first iteration might not be a full 16 byte segment. But \
+ * for vector lengths beyond that this must be SVE and we know \
+ * opr_sz is a multiple of 16, so we need not clamp segend \
+ * to opr_sz_n when we advance it at the end of the loop. \
+ */ \
intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \
intptr_t index = simd_data(desc); \
TYPED *d = vd, *a = va; \
@@ -773,7 +860,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
n[i * 4 + 2] * m2 + \
n[i * 4 + 3] * m3); \
} while (++i < segend); \
- segend = i + 4; \
+ segend = i + (16 / sizeof(TYPED)); \
} while (i < opr_sz_n); \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
@@ -786,26 +873,27 @@ DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd;
float16 *n = vn;
float16 *m = vm;
- float_status *fpst = vfpst;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
-
for (i = 0; i < opr_sz / 2; i += 2) {
float16 e0 = n[H2(i)];
- float16 e1 = m[H2(i + 1)] ^ neg_imag;
+ float16 e1 = m[H2(i + 1)];
float16 e2 = n[H2(i + 1)];
- float16 e3 = m[H2(i)] ^ neg_real;
+ float16 e3 = m[H2(i)];
+
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
+ }
d[H2(i)] = float16_add(e0, e1, fpst);
d[H2(i + 1)] = float16_add(e2, e3, fpst);
@@ -814,26 +902,27 @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
}
void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd;
float32 *n = vn;
float32 *m = vm;
- float_status *fpst = vfpst;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
-
for (i = 0; i < opr_sz / 4; i += 2) {
float32 e0 = n[H4(i)];
- float32 e1 = m[H4(i + 1)] ^ neg_imag;
+ float32 e1 = m[H4(i + 1)];
float32 e2 = n[H4(i + 1)];
- float32 e3 = m[H4(i)] ^ neg_real;
+ float32 e3 = m[H4(i)];
+
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
+ }
d[H4(i)] = float32_add(e0, e1, fpst);
d[H4(i + 1)] = float32_add(e2, e3, fpst);
@@ -842,26 +931,27 @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
}
void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float64 *d = vd;
float64 *n = vn;
float64 *m = vm;
- float_status *fpst = vfpst;
- uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
-
for (i = 0; i < opr_sz / 8; i += 2) {
float64 e0 = n[i];
- float64 e1 = m[i + 1] ^ neg_imag;
+ float64 e1 = m[i + 1];
float64 e2 = n[i + 1];
- float64 e3 = m[i] ^ neg_real;
+ float64 e3 = m[i];
+
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
+ }
d[i] = float64_add(e0, e1, fpst);
d[i + 1] = float64_add(e2, e3, fpst);
@@ -870,152 +960,167 @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
}
void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
- float_status *fpst = vfpst;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float16 negx_imag, negx_real;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 2; i += 2) {
float16 e2 = n[H2(i + flip)];
- float16 e1 = m[H2(i + flip)] ^ neg_real;
+ float16 e1 = m[H2(i + flip)] ^ negx_real;
float16 e4 = e2;
- float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
+ float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag;
- d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
- d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
+ d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst);
+ d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
- float_status *fpst = vfpst;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float16);
- intptr_t eltspersegment = 16 / sizeof(float16);
+ intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
+ float16 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float16 mr = m[H2(i + 2 * index + 0)];
float16 mi = m[H2(i + 2 * index + 1)];
- float16 e1 = neg_real ^ (flip ? mi : mr);
- float16 e3 = neg_imag ^ (flip ? mr : mi);
+ float16 e1 = negx_real ^ (flip ? mi : mr);
+ float16 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float16 e2 = n[H2(j + flip)];
float16 e4 = e2;
- d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
- d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
- float_status *fpst = vfpst;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float32 negx_imag, negx_real;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 4; i += 2) {
float32 e2 = n[H4(i + flip)];
- float32 e1 = m[H4(i + flip)] ^ neg_real;
+ float32 e1 = m[H4(i + flip)] ^ negx_real;
float32 e4 = e2;
- float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
+ float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag;
- d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
- d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
+ d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst);
+ d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
- float_status *fpst = vfpst;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float32);
- intptr_t eltspersegment = 16 / sizeof(float32);
+ intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
+ float32 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float32 mr = m[H4(i + 2 * index + 0)];
float32 mi = m[H4(i + 2 * index + 1)];
- float32 e1 = neg_real ^ (flip ? mi : mr);
- float32 e3 = neg_imag ^ (flip ? mr : mi);
+ float32 e1 = negx_real ^ (flip ? mi : mr);
+ float32 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float32 e2 = n[H4(j + flip)];
float32 e4 = e2;
- d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
- d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
- void *vfpst, uint32_t desc)
+ float_status *fpst, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
float64 *d = vd, *n = vn, *m = vm, *a = va;
- float_status *fpst = vfpst;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint64_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float64 negx_real, negx_imag;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 8; i += 2) {
float64 e2 = n[i + flip];
- float64 e1 = m[i + flip] ^ neg_real;
+ float64 e1 = m[i + flip] ^ negx_real;
float64 e4 = e2;
- float64 e3 = m[i + 1 - flip] ^ neg_imag;
+ float64 e3 = m[i + 1 - flip] ^ negx_imag;
- d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
- d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
+ d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst);
+ d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -1100,9 +1205,8 @@ static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat)
return -float64_lt(float64_abs(op2), float64_abs(op1), stat);
}
-static int16_t vfp_tosszh(float16 x, void *fpstp)
+static int16_t vfp_tosszh(float16 x, float_status *fpst)
{
- float_status *fpst = fpstp;
if (float16_is_any_nan(x)) {
float_raise(float_flag_invalid, fpst);
return 0;
@@ -1110,9 +1214,8 @@ static int16_t vfp_tosszh(float16 x, void *fpstp)
return float16_to_int16_round_to_zero(x, fpst);
}
-static uint16_t vfp_touszh(float16 x, void *fpstp)
+static uint16_t vfp_touszh(float16 x, float_status *fpst)
{
- float_status *fpst = fpstp;
if (float16_is_any_nan(x)) {
float_raise(float_flag_invalid, fpst);
return 0;
@@ -1121,7 +1224,7 @@ static uint16_t vfp_touszh(float16 x, void *fpstp)
}
#define DO_2OP(NAME, FUNC, TYPE) \
-void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
TYPE *d = vd, *n = vn; \
@@ -1133,10 +1236,12 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
+DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32)
DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
+DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32)
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
@@ -1166,8 +1271,10 @@ DO_2OP(gvec_touszh, vfp_touszh, float16)
#define DO_2OP_CMP0(FN, CMPOP, DIRN) \
WRAP_CMP0_##DIRN(FN, CMPOP, float16) \
WRAP_CMP0_##DIRN(FN, CMPOP, float32) \
+ WRAP_CMP0_##DIRN(FN, CMPOP, float64) \
DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \
- DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32)
+ DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) \
+ DO_2OP(gvec_f##FN##0_d, float64_##FN##0, float64)
DO_2OP_CMP0(cgt, cgt, FWD)
DO_2OP_CMP0(cge, cge, FWD)
@@ -1223,6 +1330,25 @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
return float64_abs(float64_sub(op1, op2, stat));
}
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
+static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat)
+{
+ float16 r = float16_sub(op1, op2, stat);
+ return float16_is_any_nan(r) ? r : float16_abs(r);
+}
+
+static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat)
+{
+ float32 r = float32_sub(op1, op2, stat);
+ return float32_is_any_nan(r) ? r : float32_abs(r);
+}
+
+static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat)
+{
+ float64 r = float64_sub(op1, op2, stat);
+ return float64_is_any_nan(r) ? r : float64_abs(r);
+}
+
/*
* Reciprocal step. These are the AArch32 version which uses a
* non-fused multiply-and-subtract.
@@ -1279,7 +1405,8 @@ static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
}
#define DO_3OP(NAME, FUNC, TYPE) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, \
+ float_status *stat, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
TYPE *d = vd, *n = vn, *m = vm; \
@@ -1309,6 +1436,10 @@ DO_3OP(gvec_fabd_h, float16_abd, float16)
DO_3OP(gvec_fabd_s, float32_abd, float32)
DO_3OP(gvec_fabd_d, float64_abd, float64)
+DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16)
+DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32)
+DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64)
+
DO_3OP(gvec_fceq_h, float16_ceq, float16)
DO_3OP(gvec_fceq_s, float32_ceq, float32)
DO_3OP(gvec_fceq_d, float64_ceq, float64)
@@ -1368,6 +1499,22 @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
+DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16)
+DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32)
+DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64)
+
+DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16)
+DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32)
+DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64)
+
+DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
+DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
+DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
+
+DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16)
+DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32)
+DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64)
+
#endif
#undef DO_3OP
@@ -1433,8 +1580,27 @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
}
-#define DO_MULADD(NAME, FUNC, TYPE) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
+}
+
+static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat);
+}
+
+static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2,
+ float_status *stat)
+{
+ return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat);
+}
+
+#define DO_MULADD(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, \
+ float_status *stat, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
TYPE *d = vd, *n = vn, *m = vm; \
@@ -1458,6 +1624,10 @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
+DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
+DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
+DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
+
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
* For AdvSIMD, there is of course only one such vector segment.
*/
@@ -1511,7 +1681,8 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
#undef DO_MLA_IDX
#define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, \
+ float_status *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
@@ -1553,29 +1724,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
#undef DO_FMUL_IDX
-#define DO_FMLA_IDX(NAME, TYPE, H) \
+#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
- void *stat, uint32_t desc) \
+ float_status *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
- intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
+ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
- mm, a[i + j], 0, stat); \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
+ a[i + j], NEGF, stat); \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
+
+DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
+
+DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
#undef DO_FMLA_IDX
@@ -1948,135 +2125,171 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
* as there is not yet SVE versions that might use blocking.
*/
-static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
- uint32_t desc, bool fz16)
+static void do_fmlal(float32 *d, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc,
+ ARMFPStatusFlavour fpst_idx,
+ uint64_t negx, int negf)
{
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
intptr_t i, oprsz = simd_oprsz(desc);
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int is_q = oprsz == 16;
uint64_t n_4, m_4;
- /* Pre-load all of the f16 data, avoiding overlap issues. */
- n_4 = load4_f16(vn, is_q, is_2);
+ /*
+ * Pre-load all of the f16 data, avoiding overlap issues.
+ * Negate all inputs for AH=0 FMLSL at once.
+ */
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
m_4 = load4_f16(vm, is_q, is_2);
- /* Negate all inputs for FMLSL at once. */
- if (is_s) {
- n_4 ^= 0x8000800080008000ull;
- }
-
for (i = 0; i < oprsz / 4; i++) {
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
}
clear_tail(d, oprsz, simd_maxsz(desc));
}
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
- CPUARMState *env = venv;
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
+
+ do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0);
}
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
- CPUARMState *env = venv;
- do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = 0;
+ int negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000800080008000ull;
+ }
+ }
+ do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf);
}
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
intptr_t i, oprsz = simd_oprsz(desc);
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
- CPUARMState *env = venv;
- float_status *status = &env->vfp.fp_status;
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+ float_status *status = &env->vfp.fp_status[FPST_A64];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
+ int negx = 0, negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000;
+ }
+ }
for (i = 0; i < oprsz; i += sizeof(float32)) {
- float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx;
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
float32 aa = *(float32 *)(va + H1_4(i));
- *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
+ *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status);
}
}
-static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
- uint32_t desc, bool fz16)
+static void do_fmlal_idx(float32 *d, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc,
+ ARMFPStatusFlavour fpst_idx,
+ uint64_t negx, int negf)
{
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
intptr_t i, oprsz = simd_oprsz(desc);
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
int is_q = oprsz == 16;
uint64_t n_4;
float32 m_1;
- /* Pre-load all of the f16 data, avoiding overlap issues. */
- n_4 = load4_f16(vn, is_q, is_2);
-
- /* Negate all inputs for FMLSL at once. */
- if (is_s) {
- n_4 ^= 0x8000800080008000ull;
- }
-
+ /*
+ * Pre-load all of the f16 data, avoiding overlap issues.
+ * Negate all inputs for AH=0 FMLSL at once.
+ */
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
for (i = 0; i < oprsz / 4; i++) {
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
}
clear_tail(d, oprsz, simd_maxsz(desc));
}
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
- CPUARMState *env = venv;
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
+
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0);
}
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
- CPUARMState *env = venv;
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = 0;
+ int negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000800080008000ull;
+ }
+ }
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf);
}
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
- void *venv, uint32_t desc)
+ CPUARMState *env, uint32_t desc)
{
intptr_t i, j, oprsz = simd_oprsz(desc);
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
- CPUARMState *env = venv;
- float_status *status = &env->vfp.fp_status;
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+ float_status *status = &env->vfp.fp_status[FPST_A64];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
+ int negx = 0, negf = 0;
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000;
+ }
+ }
for (i = 0; i < oprsz; i += 16) {
float16 mm_16 = *(float16 *)(vm + i + idx);
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
for (j = 0; j < 16; j += sizeof(float32)) {
- float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx;
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
float32 aa = *(float32 *)(va + H1_4(i + j));
*(float32 *)(vd + H1_4(i + j)) =
- float32_muladd(nn, mm, aa, 0, status);
+ float32_muladd(nn, mm, aa, negf, status);
}
}
}
@@ -2321,7 +2534,8 @@ DO_ABA(gvec_uaba_d, uint64_t)
#undef DO_ABA
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, \
+ float_status *stat, uint32_t desc) \
{ \
ARMVectorReg scratch; \
intptr_t oprsz = simd_oprsz(desc); \
@@ -2359,6 +2573,16 @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
+#ifdef TARGET_AARCH64
+DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2)
+DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4)
+DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, )
+
+DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2)
+DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4)
+DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, )
+#endif
+
#undef DO_3OP_PAIR
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
@@ -2406,7 +2630,7 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
#undef DO_3OP_PAIR
#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
- void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
int shift = simd_data(desc); \
@@ -2418,21 +2642,25 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
+DO_VCVT_FIXED(gvec_vcvt_sd, helper_vfp_sqtod, uint64_t)
+DO_VCVT_FIXED(gvec_vcvt_ud, helper_vfp_uqtod, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t)
-DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t)
-DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t)
-DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t)
-DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
+
+DO_VCVT_FIXED(gvec_vcvt_rz_ds, helper_vfp_tosqd_round_to_zero, uint64_t)
+DO_VCVT_FIXED(gvec_vcvt_rz_du, helper_vfp_touqd_round_to_zero, uint64_t)
+DO_VCVT_FIXED(gvec_vcvt_rz_fs, helper_vfp_tosls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_rz_fu, helper_vfp_touls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_rz_hs, helper_vfp_toshh_round_to_zero, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_rz_hu, helper_vfp_touhh_round_to_zero, uint16_t)
#undef DO_VCVT_FIXED
#define DO_VCVT_RMODE(NAME, FUNC, TYPE) \
- void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ void HELPER(NAME)(void *vd, void *vn, float_status *fpst, uint32_t desc) \
{ \
- float_status *fpst = stat; \
intptr_t i, oprsz = simd_oprsz(desc); \
uint32_t rmode = simd_data(desc); \
uint32_t prev_rmode = get_float_rounding_mode(fpst); \
@@ -2445,6 +2673,8 @@ DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
+DO_VCVT_RMODE(gvec_vcvt_rm_sd, helper_vfp_tosqd, uint64_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_ud, helper_vfp_touqd, uint64_t)
DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t)
@@ -2453,9 +2683,8 @@ DO_VCVT_RMODE(gvec_vcvt_rm_uh, helper_vfp_touhh, uint16_t)
#undef DO_VCVT_RMODE
#define DO_VRINT_RMODE(NAME, FUNC, TYPE) \
- void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ void HELPER(NAME)(void *vd, void *vn, float_status *fpst, uint32_t desc) \
{ \
- float_status *fpst = stat; \
intptr_t i, oprsz = simd_oprsz(desc); \
uint32_t rmode = simd_data(desc); \
uint32_t prev_rmode = get_float_rounding_mode(fpst); \
@@ -2474,10 +2703,9 @@ DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t)
#undef DO_VRINT_RMODE
#ifdef TARGET_AARCH64
-void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
+void HELPER(simd_tblx)(void *vd, void *vm, CPUARMState *env, uint32_t desc)
{
const uint8_t *indices = vm;
- CPUARMState *env = venv;
size_t oprsz = simd_oprsz(desc);
uint32_t rn = extract32(desc, SIMD_DATA_SHIFT, 5);
bool is_tbx = extract32(desc, SIMD_DATA_SHIFT + 5, 1);
@@ -2710,44 +2938,109 @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
* BFloat16 Dot Product
*/
-float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
+bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
+{
+ /*
+ * For BFDOT, BFMMLA, etc, the behaviour depends on FPCR.EBF.
+ * For EBF = 0, we ignore the FPCR bits which determine rounding
+ * mode and denormal-flushing, and we do unfused multiplies and
+ * additions with intermediate rounding of all products and sums.
+ * For EBF = 1, we honour FPCR rounding mode and denormal-flushing bits,
+ * and we perform a fused two-way sum-of-products without intermediate
+ * rounding of the products.
+ * In either case, we don't set fp exception flags.
+ *
+ * EBF is AArch64 only, so even if it's set in the FPCR it has
+ * no effect on AArch32 instructions.
+ */
+ bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
+
+ *statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32];
+ set_default_nan_mode(true, statusp);
+
+ if (ebf) {
+ /* EBF=1 needs to do a step with round-to-odd semantics */
+ *oddstatusp = *statusp;
+ set_float_rounding_mode(float_round_to_odd, oddstatusp);
+ } else {
+ set_flush_to_zero(true, statusp);
+ set_flush_inputs_to_zero(true, statusp);
+ set_float_rounding_mode(float_round_to_odd_inf, statusp);
+ }
+ return ebf;
+}
+
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst)
{
- /* FPCR is ignored for BFDOT and BFMMLA. */
- float_status bf_status = {
- .tininess_before_rounding = float_tininess_before_rounding,
- .float_rounding_mode = float_round_to_odd_inf,
- .flush_to_zero = true,
- .flush_inputs_to_zero = true,
- .default_nan_mode = true,
- };
float32 t1, t2;
/*
* Extract each BFloat16 from the element pair, and shift
* them such that they become float32.
*/
- t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
- t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
- t1 = float32_add(t1, t2, &bf_status);
- t1 = float32_add(sum, t1, &bf_status);
+ t1 = float32_mul(e1 << 16, e2 << 16, fpst);
+ t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, fpst);
+ t1 = float32_add(t1, t2, fpst);
+ t1 = float32_add(sum, t1, fpst);
return t1;
}
-void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
+ float_status *fpst, float_status *fpst_odd)
+{
+ /*
+ * Compare f16_dotadd() in sme_helper.c, but here we have
+ * bfloat16 inputs. In particular that means that we do not
+ * want the FPCR.FZ16 flush semantics, so we use the normal
+ * float_status for the input handling here.
+ */
+ float64 e1r = float32_to_float64(e1 << 16, fpst);
+ float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst);
+ float64 e2r = float32_to_float64(e2 << 16, fpst);
+ float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst);
+ float64 t64;
+ float32 t32;
+
+ /*
+ * The ARM pseudocode function FPDot performs both multiplies
+ * and the add with a single rounding operation. Emulate this
+ * by performing the first multiply in round-to-odd, then doing
+ * the second multiply as fused multiply-add, and rounding to
+ * float32 all in one step.
+ */
+ t64 = float64_mul(e1r, e2r, fpst_odd);
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst);
+
+ /* This conversion is exact, because we've already rounded. */
+ t32 = float64_to_float32(t64, fpst);
+
+ /* The final accumulation step is not fused. */
+ return float32_add(sum, t32, fpst);
+}
+
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
+ CPUARMState *env, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
float32 *d = vd, *a = va;
uint32_t *n = vn, *m = vm;
+ float_status fpst, fpst_odd;
- for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = bfdotadd(a[i], n[i], m[i]);
+ if (is_ebf(env, &fpst, &fpst_odd)) {
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd);
+ }
+ } else {
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = bfdotadd(a[i], n[i], m[i], &fpst);
+ }
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
- void *va, uint32_t desc)
+ void *va, CPUARMState *env, uint32_t desc)
{
intptr_t i, j, opr_sz = simd_oprsz(desc);
intptr_t index = simd_data(desc);
@@ -2755,59 +3048,106 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
intptr_t eltspersegment = MIN(16 / 4, elements);
float32 *d = vd, *a = va;
uint32_t *n = vn, *m = vm;
+ float_status fpst, fpst_odd;
- for (i = 0; i < elements; i += eltspersegment) {
- uint32_t m_idx = m[i + H4(index)];
+ if (is_ebf(env, &fpst, &fpst_odd)) {
+ for (i = 0; i < elements; i += eltspersegment) {
+ uint32_t m_idx = m[i + H4(index)];
- for (j = i; j < i + eltspersegment; j++) {
- d[j] = bfdotadd(a[j], n[j], m_idx);
+ for (j = i; j < i + eltspersegment; j++) {
+ d[j] = bfdotadd_ebf(a[j], n[j], m_idx, &fpst, &fpst_odd);
+ }
+ }
+ } else {
+ for (i = 0; i < elements; i += eltspersegment) {
+ uint32_t m_idx = m[i + H4(index)];
+
+ for (j = i; j < i + eltspersegment; j++) {
+ d[j] = bfdotadd(a[j], n[j], m_idx, &fpst);
+ }
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va,
+ CPUARMState *env, uint32_t desc)
{
intptr_t s, opr_sz = simd_oprsz(desc);
float32 *d = vd, *a = va;
uint32_t *n = vn, *m = vm;
+ float_status fpst, fpst_odd;
- for (s = 0; s < opr_sz / 4; s += 4) {
- float32 sum00, sum01, sum10, sum11;
-
- /*
- * Process the entire segment at once, writing back the
- * results only after we've consumed all of the inputs.
- *
- * Key to indices by column:
- * i j i k j k
- */
- sum00 = a[s + H4(0 + 0)];
- sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
- sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
-
- sum01 = a[s + H4(0 + 1)];
- sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
- sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
-
- sum10 = a[s + H4(2 + 0)];
- sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
- sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
+ if (is_ebf(env, &fpst, &fpst_odd)) {
+ for (s = 0; s < opr_sz / 4; s += 4) {
+ float32 sum00, sum01, sum10, sum11;
- sum11 = a[s + H4(2 + 1)];
- sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
- sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
+ /*
+ * Process the entire segment at once, writing back the
+ * results only after we've consumed all of the inputs.
+ *
+ * Key to indices by column:
+ * i j i k j k
+ */
+ sum00 = a[s + H4(0 + 0)];
+ sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd);
+ sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd);
+
+ sum01 = a[s + H4(0 + 1)];
+ sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd);
+ sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd);
+
+ sum10 = a[s + H4(2 + 0)];
+ sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd);
+ sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd);
+
+ sum11 = a[s + H4(2 + 1)];
+ sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd);
+ sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd);
+
+ d[s + H4(0 + 0)] = sum00;
+ d[s + H4(0 + 1)] = sum01;
+ d[s + H4(2 + 0)] = sum10;
+ d[s + H4(2 + 1)] = sum11;
+ }
+ } else {
+ for (s = 0; s < opr_sz / 4; s += 4) {
+ float32 sum00, sum01, sum10, sum11;
- d[s + H4(0 + 0)] = sum00;
- d[s + H4(0 + 1)] = sum01;
- d[s + H4(2 + 0)] = sum10;
- d[s + H4(2 + 1)] = sum11;
+ /*
+ * Process the entire segment at once, writing back the
+ * results only after we've consumed all of the inputs.
+ *
+ * Key to indices by column:
+ * i j i k j k
+ */
+ sum00 = a[s + H4(0 + 0)];
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst);
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst);
+
+ sum01 = a[s + H4(0 + 1)];
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst);
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst);
+
+ sum10 = a[s + H4(2 + 0)];
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst);
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst);
+
+ sum11 = a[s + H4(2 + 1)];
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst);
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst);
+
+ d[s + H4(0 + 0)] = sum00;
+ d[s + H4(0 + 1)] = sum01;
+ d[s + H4(2 + 0)] = sum10;
+ d[s + H4(2 + 1)] = sum11;
+ }
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
- void *stat, uint32_t desc)
+ float_status *stat, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
intptr_t sel = simd_data(desc);
@@ -2823,7 +3163,7 @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
- void *va, void *stat, uint32_t desc)
+ void *va, float_status *stat, uint32_t desc)
{
intptr_t i, j, opr_sz = simd_oprsz(desc);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
@@ -2867,3 +3207,49 @@ DO_CLAMP(gvec_uclamp_b, uint8_t)
DO_CLAMP(gvec_uclamp_h, uint16_t)
DO_CLAMP(gvec_uclamp_s, uint32_t)
DO_CLAMP(gvec_uclamp_d, uint64_t)
+
+/* Bit count in each 8-bit word. */
+void HELPER(gvec_cnt_b)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ctpop8(n[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/* Reverse bits in each 8 bit word */
+void HELPER(gvec_rbit_b)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = revbit64(bswap64(n[i]));
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_urecpe_s)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = helper_recpe_u32(n[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = helper_rsqrte_u32(n[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index 3ca1b94..c02f9c3 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -20,6 +20,10 @@
#ifndef TARGET_ARM_VEC_INTERNAL_H
#define TARGET_ARM_VEC_INTERNAL_H
+#include "fpu/softfloat.h"
+
+typedef struct CPUArchState CPUARMState;
+
/*
* Note that vector data is stored in host-endian 64-bit chunks,
* so addressing units smaller than that needs a host-endian fixup.
@@ -223,13 +227,79 @@ int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
* bfdotadd:
* @sum: addend
* @e1, @e2: multiplicand vectors
+ * @fpst: floating-point status to use
+ *
+ * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
+ * The @e1 and @e2 operands correspond to the 32-bit source vector
+ * slots and contain two Bfloat16 values each.
+ *
+ * Corresponds to the ARM pseudocode function BFDotAdd, specialized
+ * for the FPCR.EBF == 0 case.
+ */
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst);
+/**
+ * bfdotadd_ebf:
+ * @sum: addend
+ * @e1, @e2: multiplicand vectors
+ * @fpst: floating-point status to use
+ * @fpst_odd: floating-point status to use for round-to-odd operations
*
* BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
* The @e1 and @e2 operands correspond to the 32-bit source vector
* slots and contain two Bfloat16 values each.
*
- * Corresponds to the ARM pseudocode function BFDotAdd.
+ * Corresponds to the ARM pseudocode function BFDotAdd, specialized
+ * for the FPCR.EBF == 1 case.
+ */
+float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
+ float_status *fpst, float_status *fpst_odd);
+
+/**
+ * is_ebf:
+ * @env: CPU state
+ * @statusp: pointer to floating point status to fill in
+ * @oddstatusp: pointer to floating point status to fill in for round-to-odd
+ *
+ * Determine whether a BFDotAdd operation should use FPCR.EBF = 0
+ * or FPCR.EBF = 1 semantics. On return, has initialized *statusp
+ * and *oddstatusp to suitable float_status arguments to use with either
+ * bfdotadd() or bfdotadd_ebf().
+ * Returns true for EBF = 1, false for EBF = 0. (The caller should use this
+ * to decide whether to call bfdotadd() or bfdotadd_ebf().)
+ */
+bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
+
+/*
+ * Negate as for FPCR.AH=1 -- do not negate NaNs.
*/
-float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
+static inline float16 float16_ah_chs(float16 a)
+{
+ return float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_ah_chs(float32 a)
+{
+ return float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_ah_chs(float64 a)
+{
+ return float64_is_any_nan(a) ? a : float64_chs(a);
+}
+
+static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
+{
+ return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
+{
+ return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
+{
+ return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
+}
#endif /* TARGET_ARM_VEC_INTERNAL_H */
diff --git a/target/arm/tcg/vfp.decode b/target/arm/tcg/vfp.decode
index 5405e80..2dd87a2 100644
--- a/target/arm/tcg/vfp.decode
+++ b/target/arm/tcg/vfp.decode
@@ -141,18 +141,18 @@ VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
-VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
-VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
+VFNMS_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
+VFNMA_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
-VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
-VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
+VFNMS_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
+VFNMA_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
-VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
-VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+VFNMS_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
+VFNMA_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
vd=%vd_sp imm=%vmov_imm
diff --git a/target/arm/vfp_helper.c b/target/arm/tcg/vfp_helper.c
index ce26b8a..b1324c5 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/tcg/vfp_helper.c
@@ -19,94 +19,165 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/helper-proto.h"
#include "internals.h"
#include "cpu-features.h"
-#ifdef CONFIG_TCG
-#include "qemu/log.h"
#include "fpu/softfloat.h"
-#endif
+#include "qemu/log.h"
+
+#define HELPER_H "tcg/helper.h"
+#include "exec/helper-proto.h.inc"
-/* VFP support. We follow the convention used for VFP instructions:
- Single precision routines have a "s" suffix, double precision a
- "d" suffix. */
+/*
+ * Set the float_status behaviour to match the Arm defaults:
+ * * tininess-before-rounding
+ * * 2-input NaN propagation prefers SNaN over QNaN, and then
+ * operand A over operand B (see FPProcessNaNs() pseudocode)
+ * * 3-input NaN propagation prefers SNaN over QNaN, and then
+ * operand C over A over B (see FPProcessNaNs3() pseudocode,
+ * but note that for QEMU muladd is a * b + c, whereas for
+ * the pseudocode function the arguments are in the order c, a, b.
+ * * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
+ * and the input NaN if it is signalling
+ * * Default NaN has sign bit clear, msb frac bit set
+ */
+void arm_set_default_fp_behaviours(float_status *s)
+{
+ set_float_detect_tininess(float_tininess_before_rounding, s);
+ set_float_ftz_detection(float_ftz_before_rounding, s);
+ set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
+ set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
+ set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
+ set_float_default_nan_pattern(0b01000000, s);
+}
-#ifdef CONFIG_TCG
+/*
+ * Set the float_status behaviour to match the FEAT_AFP
+ * FPCR.AH=1 requirements:
+ * * tininess-after-rounding
+ * * 2-input NaN propagation prefers the first NaN
+ * * 3-input NaN propagation prefers a over b over c
+ * * 0 * Inf + NaN always returns the input NaN and doesn't
+ * set Invalid for a QNaN
+ * * default NaN has sign bit set, msb frac bit set
+ */
+void arm_set_ah_fp_behaviours(float_status *s)
+{
+ set_float_detect_tininess(float_tininess_after_rounding, s);
+ set_float_ftz_detection(float_ftz_after_rounding, s);
+ set_float_2nan_prop_rule(float_2nan_prop_ab, s);
+ set_float_3nan_prop_rule(float_3nan_prop_abc, s);
+ set_float_infzeronan_rule(float_infzeronan_dnan_never |
+ float_infzeronan_suppress_invalid, s);
+ set_float_default_nan_pattern(0b11000000, s);
+}
/* Convert host exception flags to vfp form. */
-static inline int vfp_exceptbits_from_host(int host_bits)
+static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
{
- int target_bits = 0;
+ uint32_t target_bits = 0;
if (host_bits & float_flag_invalid) {
- target_bits |= 1;
+ target_bits |= FPSR_IOC;
}
if (host_bits & float_flag_divbyzero) {
- target_bits |= 2;
+ target_bits |= FPSR_DZC;
}
if (host_bits & float_flag_overflow) {
- target_bits |= 4;
+ target_bits |= FPSR_OFC;
}
- if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
- target_bits |= 8;
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal_flushed)) {
+ target_bits |= FPSR_UFC;
}
if (host_bits & float_flag_inexact) {
- target_bits |= 0x10;
+ target_bits |= FPSR_IXC;
+ }
+ if (host_bits & float_flag_input_denormal_flushed) {
+ target_bits |= FPSR_IDC;
+ }
+ /*
+ * With FPCR.AH, IDC is set when an input denormal is used,
+ * and flushing an output denormal to zero sets both IXC and UFC.
+ */
+ if (ah && (host_bits & float_flag_input_denormal_used)) {
+ target_bits |= FPSR_IDC;
}
- if (host_bits & float_flag_input_denormal) {
- target_bits |= 0x80;
+ if (ah && (host_bits & float_flag_output_denormal_flushed)) {
+ target_bits |= FPSR_IXC;
}
return target_bits;
}
-/* Convert vfp exception flags to target form. */
-static inline int vfp_exceptbits_to_host(int target_bits)
+uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
{
- int host_bits = 0;
+ uint32_t a32_flags = 0, a64_flags = 0;
- if (target_bits & 1) {
- host_bits |= float_flag_invalid;
- }
- if (target_bits & 2) {
- host_bits |= float_flag_divbyzero;
- }
- if (target_bits & 4) {
- host_bits |= float_flag_overflow;
- }
- if (target_bits & 8) {
- host_bits |= float_flag_underflow;
- }
- if (target_bits & 0x10) {
- host_bits |= float_flag_inexact;
- }
- if (target_bits & 0x80) {
- host_bits |= float_flag_input_denormal;
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
+ /* FZ16 does not generate an input denormal exception. */
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
+ & ~float_flag_input_denormal_flushed);
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
+ & ~float_flag_input_denormal_flushed);
+
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
+ & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
+ /*
+ * We do not merge in flags from FPST_AH or FPST_AH_F16, because
+ * they are used for insns that must not set the cumulative exception bits.
+ */
+
+ /*
+ * Flushing an input denormal *only* because FPCR.FIZ == 1 does
+ * not set FPSR.IDC; if FPCR.FZ is also set then this takes
+ * precedence and IDC is set (see the FPUnpackBase pseudocode).
+ * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
+ * We only do this for the a64 flags because FIZ has no effect
+ * on AArch32 even if it is set.
+ */
+ if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
+ a64_flags &= ~float_flag_input_denormal_flushed;
}
- return host_bits;
+ return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
+ vfp_exceptbits_from_host(a32_flags, false);
}
-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+void vfp_clear_float_status_exc_flags(CPUARMState *env)
{
- uint32_t i;
+ /*
+ * Clear out all the exception-flag information in the float_status
+ * values. The caller should have arranged for env->vfp.fpsr to
+ * be the architecturally up-to-date exception flag information first.
+ */
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
+}
- i = get_float_exception_flags(&env->vfp.fp_status);
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
- /* FZ16 does not generate an input denormal exception. */
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
- & ~float_flag_input_denormal);
- i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
- & ~float_flag_input_denormal);
- return vfp_exceptbits_from_host(i);
+static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
+{
+ /*
+ * Synchronize any pending exception-flag information in the
+ * float_status values into env->vfp.fpsr, and then clear out
+ * the float_status data.
+ */
+ env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
+ vfp_clear_float_status_exc_flags(env);
}
-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
{
- int i;
- uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
+ uint64_t changed = env->vfp.fpcr;
changed ^= val;
+ changed &= mask;
if (changed & (3 << 22)) {
- i = (val >> 22) & 3;
+ int i = (val >> 22) & 3;
switch (i) {
case FPROUNDING_TIEEVEN:
i = float_round_nearest_even;
@@ -121,154 +192,88 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
i = float_round_to_zero;
break;
}
- set_float_rounding_mode(i, &env->vfp.fp_status);
- set_float_rounding_mode(i, &env->vfp.fp_status_f16);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
}
if (changed & FPCR_FZ16) {
bool ftz_enabled = val & FPCR_FZ16;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
- set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
if (changed & FPCR_FZ) {
bool ftz_enabled = val & FPCR_FZ;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
- }
- if (changed & FPCR_DN) {
- bool dnan_enabled = val & FPCR_DN;
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
- }
-
- /*
- * The exception flags are ORed together when we read fpscr so we
- * only need to preserve the current state in one of our
- * float_status values.
- */
- i = vfp_exceptbits_to_host(val);
- set_float_exception_flags(i, &env->vfp.fp_status);
- set_float_exception_flags(0, &env->vfp.fp_status_f16);
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
- set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
-}
-
-#else
-
-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
-{
- return 0;
-}
-
-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
-{
-}
-
-#endif
-
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
-{
- uint32_t i, fpscr;
-
- fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
- | (env->vfp.vec_len << 16)
- | (env->vfp.vec_stride << 20);
-
- /*
- * M-profile LTPSIZE overlaps A-profile Stride; whichever of the
- * two is not applicable to this CPU will always be zero.
- */
- fpscr |= env->v7m.ltpsize << 16;
-
- fpscr |= vfp_get_fpscr_from_host(env);
-
- i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
- fpscr |= i ? FPCR_QC : 0;
-
- return fpscr;
-}
-
-uint32_t vfp_get_fpscr(CPUARMState *env)
-{
- return HELPER(vfp_get_fpscr)(env);
-}
-
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
-{
- ARMCPU *cpu = env_archcpu(env);
-
- /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
- if (!cpu_isar_feature(any_fp16, cpu)) {
- val &= ~FPCR_FZ16;
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
+ /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
}
-
- vfp_set_fpscr_to_host(env, val);
-
- if (!arm_feature(env, ARM_FEATURE_M)) {
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
/*
- * Short-vector length and stride; on M-profile these bits
- * are used for different purposes.
- * We can't make this conditional be "if MVFR0.FPShVec != 0",
- * because in v7A no-short-vector-support cores still had to
- * allow Stride/Len to be written with the only effect that
- * some insns are required to UNDEF if the guest sets them.
+ * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
+ * both FPCR.AH = 0 and FPCR.FZ = 1.
*/
- env->vfp.vec_len = extract32(val, 16, 3);
- env->vfp.vec_stride = extract32(val, 20, 2);
- } else if (cpu_isar_feature(aa32_mve, cpu)) {
- env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
- FPCR_LTPSIZE_LENGTH);
+ bool fitz_enabled = (val & FPCR_FIZ) ||
+ (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
}
-
- if (arm_feature(env, ARM_FEATURE_NEON) ||
- cpu_isar_feature(aa32_mve, cpu)) {
- /*
- * The bit we set within fpscr_q is arbitrary; the register as a
- * whole being zero/non-zero is what counts.
- * TODO: M-profile MVE also has a QC bit.
- */
- env->vfp.qc[0] = val & FPCR_QC;
- env->vfp.qc[1] = 0;
- env->vfp.qc[2] = 0;
- env->vfp.qc[3] = 0;
+ if (changed & FPCR_DN) {
+ bool dnan_enabled = val & FPCR_DN;
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
+ if (changed & FPCR_AH) {
+ bool ah_enabled = val & FPCR_AH;
+ if (ah_enabled) {
+ /* Change behaviours for A64 FP operations */
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
+ } else {
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
+ }
+ }
/*
- * We don't implement trapped exception handling, so the
- * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
- *
- * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
- * fp_status; QC, Len and Stride are stored separately earlier.
- * Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
- * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
+ * If any bits changed that we look at in vfp_get_fpsr_from_host(),
+ * we must sync the float_status flags into vfp.fpsr now (under the
+ * old regime) before we update vfp.fpcr.
*/
- env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
-}
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
- HELPER(vfp_set_fpscr)(env, val);
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
+ vfp_sync_and_clear_float_status_exc_flags(env);
+ }
}
-#ifdef CONFIG_TCG
+/*
+ * VFP support. We follow the convention used for VFP instructions:
+ * Single precision routines have a "s" suffix, double precision a
+ * "d" suffix.
+ */
#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
#define VFP_BINOP(name) \
-dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
return float16_ ## name(a, b, fpst); \
} \
-float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
+float32 VFP_HELPER(name, s)(float32 a, float32 b, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
return float32_ ## name(a, b, fpst); \
} \
-float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
+float64 VFP_HELPER(name, d)(float64 a, float64 b, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
return float64_ ## name(a, b, fpst); \
}
VFP_BINOP(add)
@@ -281,19 +286,19 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
-dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
+dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, float_status *fpst)
{
- return float16_sqrt(a, &env->vfp.fp_status_f16);
+ return float16_sqrt(a, fpst);
}
-float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
+float32 VFP_HELPER(sqrt, s)(float32 a, float_status *fpst)
{
- return float32_sqrt(a, &env->vfp.fp_status);
+ return float32_sqrt(a, fpst);
}
-float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
+float64 VFP_HELPER(sqrt, d)(float64 a, float_status *fpst)
{
- return float64_sqrt(a, &env->vfp.fp_status);
+ return float64_sqrt(a, fpst);
}
static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
@@ -315,8 +320,7 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
default:
g_assert_not_reached();
}
- env->vfp.xregs[ARM_VFP_FPSCR] =
- deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+ env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
}
/* XXX: check quiet/signaling case */
@@ -324,31 +328,29 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
} \
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
+ FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
}
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
-DO_VFP_cmp(s, float32, float32, fp_status)
-DO_VFP_cmp(d, float64, float64, fp_status)
+DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
+DO_VFP_cmp(s, float32, float32, FPST_A32)
+DO_VFP_cmp(d, float64, float64, FPST_A32)
#undef DO_VFP_cmp
/* Integer to float and float to integer conversions */
#define CONV_ITOF(name, ftype, fsz, sign) \
-ftype HELPER(name)(uint32_t x, void *fpstp) \
+ftype HELPER(name)(uint32_t x, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
}
#define CONV_FTOI(name, ftype, fsz, sign, round) \
-sign##int32_t HELPER(name)(ftype x, void *fpstp) \
+sign##int32_t HELPER(name)(ftype x, float_status *fpst) \
{ \
- float_status *fpst = fpstp; \
if (float##fsz##_is_any_nan(x)) { \
float_raise(float_flag_invalid, fpst); \
return 0; \
@@ -373,22 +375,22 @@ FLOAT_CONVS(ui, d, float64, 64, u)
#undef FLOAT_CONVS
/* floating point conversion */
-float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
+float64 VFP_HELPER(fcvtd, s)(float32 x, float_status *status)
{
- return float32_to_float64(x, &env->vfp.fp_status);
+ return float32_to_float64(x, status);
}
-float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
+float32 VFP_HELPER(fcvts, d)(float64 x, float_status *status)
{
- return float64_to_float32(x, &env->vfp.fp_status);
+ return float64_to_float32(x, status);
}
-uint32_t HELPER(bfcvt)(float32 x, void *status)
+uint32_t HELPER(bfcvt)(float32 x, float_status *status)
{
return float32_to_bfloat16(x, status);
}
-uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
+uint32_t HELPER(bfcvt_pair)(uint64_t pair, float_status *status)
{
bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
@@ -404,26 +406,25 @@ uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
*/
#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
- void *fpstp) \
-{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
+ float_status *fpst) \
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpst); }
#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \
uint32_t shift, \
- void *fpstp) \
+ float_status *fpst) \
{ \
ftype ret; \
- float_status *fpst = fpstp; \
FloatRoundMode oldmode = fpst->float_rounding_mode; \
fpst->float_rounding_mode = float_round_nearest_even; \
- ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \
+ ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpst); \
fpst->float_rounding_mode = oldmode; \
return ret; \
}
#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \
- void *fpst) \
+ float_status *fpst) \
{ \
if (unlikely(float##fsz##_is_any_nan(x))) { \
float_raise(float_flag_invalid, fpst); \
@@ -463,6 +464,10 @@ VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
+VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64,
+ float_round_to_zero, _round_to_zero)
+VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64,
+ float_round_to_zero, _round_to_zero)
#undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT
@@ -472,10 +477,8 @@ VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
/* Set the current fp rounding mode and return the old one.
* The argument is a softfloat float_round_ value.
*/
-uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
+uint32_t HELPER(set_rmode)(uint32_t rmode, float_status *fp_status)
{
- float_status *fp_status = fpstp;
-
uint32_t prev_rmode = get_float_rounding_mode(fp_status);
set_float_rounding_mode(rmode, fp_status);
@@ -483,12 +486,12 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
}
/* Half precision conversions. */
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, float_status *fpst,
+ uint32_t ahp_mode)
{
/* Squash FZ16 to 0 for the duration of conversion. In this case,
* it would affect flushing input denormals.
*/
- float_status *fpst = fpstp;
bool save = get_flush_inputs_to_zero(fpst);
set_flush_inputs_to_zero(false, fpst);
float32 r = float16_to_float32(a, !ahp_mode, fpst);
@@ -496,12 +499,12 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
return r;
}
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, float_status *fpst,
+ uint32_t ahp_mode)
{
/* Squash FZ16 to 0 for the duration of conversion. In this case,
* it would affect flushing output denormals.
*/
- float_status *fpst = fpstp;
bool save = get_flush_to_zero(fpst);
set_flush_to_zero(false, fpst);
float16 r = float32_to_float16(a, !ahp_mode, fpst);
@@ -509,12 +512,12 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
return r;
}
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, float_status *fpst,
+ uint32_t ahp_mode)
{
/* Squash FZ16 to 0 for the duration of conversion. In this case,
* it would affect flushing input denormals.
*/
- float_status *fpst = fpstp;
bool save = get_flush_inputs_to_zero(fpst);
set_flush_inputs_to_zero(false, fpst);
float64 r = float16_to_float64(a, !ahp_mode, fpst);
@@ -522,12 +525,12 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
return r;
}
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, float_status *fpst,
+ uint32_t ahp_mode)
{
/* Squash FZ16 to 0 for the duration of conversion. In this case,
* it would affect flushing output denormals.
*/
- float_status *fpst = fpstp;
bool save = get_flush_to_zero(fpst);
set_flush_to_zero(false, fpst);
float16 r = float64_to_float16(a, !ahp_mode, fpst);
@@ -570,6 +573,33 @@ static int recip_estimate(int input)
}
/*
+ * Increased precision version:
+ * input is a 13 bit fixed point number
+ * input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
+ * result range 4096 .. 8191 for a number from 1.0 to 2.0
+ */
+static int recip_estimate_incprec(int input)
+{
+ int a, b, r;
+ assert(2048 <= input && input < 4096);
+ a = (input * 2) + 1;
+ /*
+ * The pseudocode expresses this as an operation on infinite
+ * precision reals where it calculates 2^25 / a and then looks
+ * at the error between that and the rounded-down-to-integer
+ * value to see if it should instead round up. We instead
+ * follow the same approach as the pseudocode for the 8-bit
+ * precision version, and calculate (2 * (2^25 / a)) as an
+ * integer so we can do the "add one and halve" to round it.
+ * So the 1 << 26 here is correct.
+ */
+ b = (1 << 26) / a;
+ r = (b + 1) >> 1;
+ assert(4096 <= r && r < 8192);
+ return r;
+}
+
+/*
* Common wrapper to call recip_estimate
*
* The parameters are exponent and 64 bit fraction (without implicit
@@ -578,7 +608,8 @@ static int recip_estimate(int input)
* callee.
*/
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
+ bool increasedprecision)
{
uint32_t scaled, estimate;
uint64_t result_frac;
@@ -594,12 +625,22 @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
}
}
- /* scaled = UInt('1':fraction<51:44>) */
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
- estimate = recip_estimate(scaled);
+ if (increasedprecision) {
+ /* scaled = UInt('1':fraction<51:41>) */
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
+ estimate = recip_estimate_incprec(scaled);
+ } else {
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ estimate = recip_estimate(scaled);
+ }
result_exp = exp_off - *exp;
- result_frac = deposit64(0, 44, 8, estimate);
+ if (increasedprecision) {
+ result_frac = deposit64(0, 40, 12, estimate);
+ } else {
+ result_frac = deposit64(0, 44, 8, estimate);
+ }
if (result_exp == 0) {
result_frac = deposit64(result_frac >> 1, 51, 1, 1);
} else if (result_exp == -1) {
@@ -628,9 +669,8 @@ static bool round_to_inf(float_status *fpst, bool sign_bit)
}
}
-uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
+uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
{
- float_status *fpst = fpstp;
float16 f16 = float16_squash_input_denormal(input, fpst);
uint32_t f16_val = float16_val(f16);
uint32_t f16_sign = float16_is_neg(f16);
@@ -669,7 +709,7 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
}
f64_frac = call_recip_estimate(&f16_exp, 29,
- ((uint64_t) f16_frac) << (52 - 10));
+ ((uint64_t) f16_frac) << (52 - 10), false);
/* result = sign : result_exp<4:0> : fraction<51:42> */
f16_val = deposit32(0, 15, 1, f16_sign);
@@ -678,9 +718,12 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
return make_float16(f16_val);
}
-float32 HELPER(recpe_f32)(float32 input, void *fpstp)
+/*
+ * FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
+ * which is used when FPCR.AH == 1.
+ */
+static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
{
- float_status *fpst = fpstp;
float32 f32 = float32_squash_input_denormal(input, fpst);
uint32_t f32_val = float32_val(f32);
bool f32_sign = float32_is_neg(f32);
@@ -719,7 +762,7 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
}
f64_frac = call_recip_estimate(&f32_exp, 253,
- ((uint64_t) f32_frac) << (52 - 23));
+ ((uint64_t) f32_frac) << (52 - 23), rpres);
/* result = sign : result_exp<7:0> : fraction<51:29> */
f32_val = deposit32(0, 31, 1, f32_sign);
@@ -728,9 +771,18 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
return make_float32(f32_val);
}
-float64 HELPER(recpe_f64)(float64 input, void *fpstp)
+float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
+{
+ return do_recpe_f32(input, fpst, false);
+}
+
+float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
+{
+ return do_recpe_f32(input, fpst, true);
+}
+
+float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
{
- float_status *fpst = fpstp;
float64 f64 = float64_squash_input_denormal(input, fpst);
uint64_t f64_val = float64_val(f64);
bool f64_sign = float64_is_neg(f64);
@@ -768,7 +820,7 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
return float64_set_sign(float64_zero, float64_is_neg(f64));
}
- f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
/* result = sign : result_exp<10:0> : fraction<51:0>; */
f64_val = deposit64(0, 63, 1, f64_sign);
@@ -802,8 +854,36 @@ static int do_recip_sqrt_estimate(int a)
return estimate;
}
+static int do_recip_sqrt_estimate_incprec(int a)
+{
+ /*
+ * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
+ * in terms of an infinite-precision floating point calculation of a
+ * square root. We implement this using the same kind of pure integer
+ * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
+ */
+ int64_t b, estimate;
+
+ assert(1024 <= a && a < 4096);
+ if (a < 2048) {
+ a = a * 2 + 1;
+ } else {
+ a = (a >> 1) << 1;
+ a = (a + 1) * 2;
+ }
+ b = 8192;
+ while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
+ b += 1;
+ }
+ estimate = (b + 1) / 2;
+
+ assert(4096 <= estimate && estimate < 8192);
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
+ return estimate;
+}
+
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
+ bool increasedprecision)
{
int estimate;
uint32_t scaled;
@@ -816,22 +896,36 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
frac = extract64(frac, 0, 51) << 1;
}
- if (*exp & 1) {
- /* scaled = UInt('01':fraction<51:45>) */
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+ if (increasedprecision) {
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:42>) */
+ scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
+ } else {
+ /* scaled = UInt('1':fraction<51:41>) */
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
+ }
+ estimate = do_recip_sqrt_estimate_incprec(scaled);
} else {
- /* scaled = UInt('1':fraction<51:44>) */
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:45>) */
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+ } else {
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ }
+ estimate = do_recip_sqrt_estimate(scaled);
}
- estimate = do_recip_sqrt_estimate(scaled);
*exp = (exp_off - *exp) / 2;
- return extract64(estimate, 0, 8) << 44;
+ if (increasedprecision) {
+ return extract64(estimate, 0, 12) << 40;
+ } else {
+ return extract64(estimate, 0, 8) << 44;
+ }
}
-uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
+uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
{
- float_status *s = fpstp;
float16 f16 = float16_squash_input_denormal(input, s);
uint16_t val = float16_val(f16);
bool f16_sign = float16_is_neg(f16);
@@ -844,7 +938,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
if (float16_is_signaling_nan(f16, s)) {
float_raise(float_flag_invalid, s);
if (!s->default_nan_mode) {
- nan = float16_silence_nan(f16, fpstp);
+ nan = float16_silence_nan(f16, s);
}
}
if (s->default_nan_mode) {
@@ -866,7 +960,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
val = deposit32(0, 15, 1, f16_sign);
@@ -875,9 +969,12 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
return make_float16(val);
}
-float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
+/*
+ * FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
+ * which is used when FPCR.AH == 1.
+ */
+static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
{
- float_status *s = fpstp;
float32 f32 = float32_squash_input_denormal(input, s);
uint32_t val = float32_val(f32);
uint32_t f32_sign = float32_is_neg(f32);
@@ -890,7 +987,7 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
if (float32_is_signaling_nan(f32, s)) {
float_raise(float_flag_invalid, s);
if (!s->default_nan_mode) {
- nan = float32_silence_nan(f32, fpstp);
+ nan = float32_silence_nan(f32, s);
}
}
if (s->default_nan_mode) {
@@ -912,18 +1009,35 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
f64_frac = ((uint64_t) f32_frac) << 29;
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
+ /*
+ * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
+ * or for increased precision
+ * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
+ */
val = deposit32(0, 31, 1, f32_sign);
val = deposit32(val, 23, 8, f32_exp);
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+ if (rpres) {
+ val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
+ } else {
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+ }
return make_float32(val);
}
-float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
+{
+ return do_rsqrte_f32(input, s, false);
+}
+
+float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
+{
+ return do_rsqrte_f32(input, s, true);
+}
+
+float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
{
- float_status *s = fpstp;
float64 f64 = float64_squash_input_denormal(input, s);
uint64_t val = float64_val(f64);
bool f64_sign = float64_is_neg(f64);
@@ -935,7 +1049,7 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
if (float64_is_signaling_nan(f64, s)) {
float_raise(float_flag_invalid, s);
if (!s->default_nan_mode) {
- nan = float64_silence_nan(f64, fpstp);
+ nan = float64_silence_nan(f64, s);
}
}
if (s->default_nan_mode) {
@@ -952,7 +1066,7 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
return float64_zero;
}
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
val = deposit64(0, 61, 1, f64_sign);
@@ -990,41 +1104,40 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a)
/* VFPv4 fused multiply-accumulate */
dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
- dh_ctype_f16 c, void *fpstp)
+ dh_ctype_f16 c, float_status *fpst)
{
- float_status *fpst = fpstp;
return float16_muladd(a, b, c, 0, fpst);
}
-float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
+float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c,
+ float_status *fpst)
{
- float_status *fpst = fpstp;
return float32_muladd(a, b, c, 0, fpst);
}
-float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
+float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c,
+ float_status *fpst)
{
- float_status *fpst = fpstp;
return float64_muladd(a, b, c, 0, fpst);
}
/* ARMv8 round to integral */
-dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, float_status *fp_status)
{
return float16_round_to_int(x, fp_status);
}
-float32 HELPER(rints_exact)(float32 x, void *fp_status)
+float32 HELPER(rints_exact)(float32 x, float_status *fp_status)
{
return float32_round_to_int(x, fp_status);
}
-float64 HELPER(rintd_exact)(float64 x, void *fp_status)
+float64 HELPER(rintd_exact)(float64 x, float_status *fp_status)
{
return float64_round_to_int(x, fp_status);
}
-dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, float_status *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
float16 ret;
@@ -1040,7 +1153,7 @@ dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
return ret;
}
-float32 HELPER(rints)(float32 x, void *fp_status)
+float32 HELPER(rints)(float32 x, float_status *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
float32 ret;
@@ -1056,15 +1169,13 @@ float32 HELPER(rints)(float32 x, void *fp_status)
return ret;
}
-float64 HELPER(rintd)(float64 x, void *fp_status)
+float64 HELPER(rintd)(float64 x, float_status *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
float64 ret;
ret = float64_round_to_int(x, fp_status);
- new_flags = get_float_exception_flags(fp_status);
-
/* Suppress any inexact exceptions the conversion produced */
if (!(old_flags & float_flag_inexact)) {
new_flags = get_float_exception_flags(fp_status);
@@ -1088,11 +1199,10 @@ const FloatRoundMode arm_rmode_to_sf_map[] = {
* Implement float64 to int32_t conversion without saturation;
* the result is supplied modulo 2^32.
*/
-uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
+uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
{
- float_status *status = vstatus;
- uint32_t inexact, frac;
- uint32_t e_old, e_new;
+ uint32_t frac, e_old, e_new;
+ bool inexact;
e_old = get_float_exception_flags(status);
set_float_exception_flags(0, status);
@@ -1100,13 +1210,13 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
e_new = get_float_exception_flags(status);
set_float_exception_flags(e_old | e_new, status);
- if (value == float64_chs(float64_zero)) {
- /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
- inexact = 1;
- } else {
- /* Normal inexact or overflow or NaN */
- inexact = e_new & (float_flag_inexact | float_flag_invalid);
- }
+ /* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
+ inexact = e_new & (float_flag_inexact |
+ float_flag_input_denormal_flushed |
+ float_flag_invalid);
+
+ /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
+ inexact |= value == float64_chs(float64_zero);
/* Pack the result and the env->ZF representation of Z together. */
return deposit64(frac, 32, 32, inexact);
@@ -1114,13 +1224,12 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
{
- uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
uint32_t result = pair;
uint32_t z = (pair >> 32) == 0;
/* Store Z, clear NCV, in FPSCR.NZCV. */
- env->vfp.xregs[ARM_VFP_FPSCR]
- = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
+ env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);
return result;
}
@@ -1163,12 +1272,12 @@ static float32 frint_s(float32 f, float_status *fpst, int intsize)
return (0x100u + 126u + intsize) << 23;
}
-float32 HELPER(frint32_s)(float32 f, void *fpst)
+float32 HELPER(frint32_s)(float32 f, float_status *fpst)
{
return frint_s(f, fpst, 32);
}
-float32 HELPER(frint64_s)(float32 f, void *fpst)
+float32 HELPER(frint64_s)(float32 f, float_status *fpst)
{
return frint_s(f, fpst, 64);
}
@@ -1211,12 +1320,12 @@ static float64 frint_d(float64 f, float_status *fpst, int intsize)
return (uint64_t)(0x800 + 1022 + intsize) << 52;
}
-float64 HELPER(frint32_d)(float64 f, void *fpst)
+float64 HELPER(frint32_d)(float64 f, float_status *fpst)
{
return frint_d(f, fpst, 32);
}
-float64 HELPER(frint64_d)(float64 f, void *fpst)
+float64 HELPER(frint64_d)(float64 f, float_status *fpst)
{
return frint_d(f, fpst, 64);
}
@@ -1250,4 +1359,12 @@ void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
}
-#endif
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+ return vfp_get_fpscr(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+ vfp_set_fpscr(env, val);
+}
diff --git a/target/arm/vfp_fpscr.c b/target/arm/vfp_fpscr.c
new file mode 100644
index 0000000..92ea60e
--- /dev/null
+++ b/target/arm/vfp_fpscr.c
@@ -0,0 +1,155 @@
+/*
+ * ARM VFP floating-point: handling of FPSCR/FPCR/FPSR
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+
+uint32_t vfp_get_fpcr(CPUARMState *env)
+{
+ uint32_t fpcr = env->vfp.fpcr
+ | (env->vfp.vec_len << 16)
+ | (env->vfp.vec_stride << 20);
+
+ /*
+ * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
+ * of the two is not applicable to this CPU will always be zero.
+ */
+ fpcr |= env->v7m.ltpsize << 16;
+
+ return fpcr;
+}
+
+uint32_t vfp_get_fpsr(CPUARMState *env)
+{
+ uint32_t fpsr = env->vfp.fpsr;
+ uint32_t i;
+
+ fpsr |= vfp_get_fpsr_from_host(env);
+
+ i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+ fpsr |= i ? FPSR_QC : 0;
+ return fpsr;
+}
+
+uint32_t vfp_get_fpscr(CPUARMState *env)
+{
+ return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
+ (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
+}
+
+void vfp_set_fpsr(CPUARMState *env, uint32_t val)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ if (arm_feature(env, ARM_FEATURE_NEON) ||
+ cpu_isar_feature(aa32_mve, cpu)) {
+ /*
+ * The bit we set within vfp.qc[] is arbitrary; the array as a
+ * whole being zero/non-zero is what counts.
+ */
+ env->vfp.qc[0] = val & FPSR_QC;
+ env->vfp.qc[1] = 0;
+ env->vfp.qc[2] = 0;
+ env->vfp.qc[3] = 0;
+ }
+
+ /*
+ * NZCV lives only in env->vfp.fpsr. The cumulative exception flags
+ * IOC|DZC|OFC|UFC|IXC|IDC also live in env->vfp.fpsr, with possible
+ * extra pending exception information that hasn't yet been folded in
+ * living in the float_status values (for TCG).
+ * Since this FPSR write gives us the up to date values of the exception
+ * flags, we want to store into vfp.fpsr the NZCV and CEXC bits, zeroing
+ * anything else. We also need to clear out the float_status exception
+ * information so that the next vfp_get_fpsr does not fold in stale data.
+ */
+ val &= FPSR_NZCV_MASK | FPSR_CEXC_MASK;
+ env->vfp.fpsr = val;
+ vfp_clear_float_status_exc_flags(env);
+}
+
+static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ /*
+ * We only set FPCR bits defined by mask, and leave the others alone.
+ * We assume the mask is sensible (e.g. doesn't try to set only
+ * part of a field)
+ */
+ ARMCPU *cpu = env_archcpu(env);
+
+ /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
+ if (!cpu_isar_feature(any_fp16, cpu)) {
+ val &= ~FPCR_FZ16;
+ }
+ if (!cpu_isar_feature(aa64_afp, cpu)) {
+ val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
+ }
+
+ if (!cpu_isar_feature(aa64_ebf16, cpu)) {
+ val &= ~FPCR_EBF;
+ }
+
+ vfp_set_fpcr_to_host(env, val, mask);
+
+ if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * Short-vector length and stride; on M-profile these bits
+ * are used for different purposes.
+ * We can't make this conditional be "if MVFR0.FPShVec != 0",
+ * because in v7A no-short-vector-support cores still had to
+ * allow Stride/Len to be written with the only effect that
+ * some insns are required to UNDEF if the guest sets them.
+ */
+ env->vfp.vec_len = extract32(val, 16, 3);
+ env->vfp.vec_stride = extract32(val, 20, 2);
+ } else if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+ FPCR_LTPSIZE_LENGTH);
+ }
+ }
+
+ /*
+ * We don't implement trapped exception handling, so the
+ * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+ *
+ * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
+ * FIZ, AH, and NEP.
+ * Len, Stride and LTPSIZE we just handled. Store those bits
+ * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
+ * bits.
+ */
+ val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
+ FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
+ env->vfp.fpcr &= ~mask;
+ env->vfp.fpcr |= val;
+}
+
+void vfp_set_fpcr(CPUARMState *env, uint32_t val)
+{
+ vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
+}
+
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
+{
+ vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
+ vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
+}