aboutsummaryrefslogtreecommitdiff
path: root/target/arm/cpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/cpu.h')
-rw-r--r--target/arm/cpu.h201
1 files changed, 96 insertions, 105 deletions
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index fdcf8cd..dc9b6dc 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -32,6 +32,7 @@
#include "qapi/qapi-types-common.h"
#include "target/arm/multiprocessing.h"
#include "target/arm/gtimer.h"
+#include "target/arm/cpu-sysregs.h"
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
@@ -206,6 +207,8 @@ typedef struct NVICState NVICState;
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
* and the reciprocal and square root estimate/step insns);
* for half-precision
+ * ZA: the "streaming sve" fp status.
+ * ZA_F16: likewise for half-precision.
*
* Half-precision operations are governed by a separate
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
@@ -226,6 +229,12 @@ typedef struct NVICState NVICState;
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
* which means we need an FPST_AH_F16 as well.
*
+ * The "ZA" float_status are for Streaming SVE operations which use
+ * default-NaN and do not generate fp exceptions, which means that they
+ * do not accumulate exception bits back into FPCR.
+ * See e.g. FPAdd vs FPAdd_ZA pseudocode functions, and the setting
+ * of fpcr.DN and fpexec parameters.
+ *
* To avoid having to transfer exception bits around, we simply
* say that the FPSCR cumulative exception flags are the logical
* OR of the flags in the four fp statuses. This relies on the
@@ -239,10 +248,12 @@ typedef enum ARMFPStatusFlavour {
FPST_A64_F16,
FPST_AH,
FPST_AH_F16,
+ FPST_ZA,
+ FPST_ZA_F16,
FPST_STD,
FPST_STD_F16,
} ARMFPStatusFlavour;
-#define FPST_COUNT 8
+#define FPST_COUNT 10
typedef struct CPUArchState {
/* Regs for current mode. */
@@ -668,9 +679,6 @@ typedef struct CPUArchState {
uint32_t xregs[16];
- /* Scratch space for aa32 neon expansion. */
- uint32_t scratch[8];
-
/* There are a number of distinct float control structures. */
float_status fp_status[FPST_COUNT];
@@ -707,27 +715,36 @@ typedef struct CPUArchState {
uint64_t scxtnum_el[4];
- /*
- * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
- * as we do with vfp.zregs[]. This corresponds to the architectural ZA
- * array, where ZA[N] is in the least-significant bytes of env->zarray[N].
- * When SVL is less than the architectural maximum, the accessible
- * storage is restricted, such that if the SVL is X bytes the guest can
- * see only the bottom X elements of zarray[], and only the least
- * significant X bytes of each element of the array. (In other words,
- * the observable part is always square.)
- *
- * The ZA storage can also be considered as a set of square tiles of
- * elements of different sizes. The mapping from tiles to the ZA array
- * is architecturally defined, such that for tiles of elements of esz
- * bytes, the Nth row (or "horizontal slice") of tile T is in
- * ZA[T + N * esz]. Note that this means that each tile is not contiguous
- * in the ZA storage, because its rows are striped through the ZA array.
- *
- * Because this is so large, keep this toward the end of the reset area,
- * to keep the offsets into the rest of the structure smaller.
- */
- ARMVectorReg zarray[ARM_MAX_VQ * 16];
+ struct {
+ /* SME2 ZT0 -- 512 bit array, with data ordered like ARMVectorReg. */
+ uint64_t zt0[512 / 64] QEMU_ALIGNED(16);
+
+ /*
+ * SME ZA storage -- 256 x 256 byte array, with bytes in host
+ * word order, as we do with vfp.zregs[]. This corresponds to
+ * the architectural ZA array, where ZA[N] is in the least
+ * significant bytes of env->za_state.za[N].
+ *
+ * When SVL is less than the architectural maximum, the accessible
+ * storage is restricted, such that if the SVL is X bytes the guest
+ * can see only the bottom X elements of zarray[], and only the least
+ * significant X bytes of each element of the array. (In other words,
+ * the observable part is always square.)
+ *
+ * The ZA storage can also be considered as a set of square tiles of
+ * elements of different sizes. The mapping from tiles to the ZA array
+ * is architecturally defined, such that for tiles of elements of esz
+ * bytes, the Nth row (or "horizontal slice") of tile T is in
+ * ZA[T + N * esz]. Note that this means that each tile is not
+ * contiguous in the ZA storage, because its rows are striped through
+ * the ZA array.
+ *
+ * Because this is so large, keep this toward the end of the
+ * reset area, to keep the offsets into the rest of the structure
+ * smaller.
+ */
+ ARMVectorReg za[ARM_MAX_VQ * 16];
+ } za_state;
struct CPUBreakpoint *cpu_breakpoint[16];
struct CPUWatchpoint *cpu_watchpoint[16];
@@ -783,12 +800,9 @@ typedef struct CPUArchState {
#else /* CONFIG_USER_ONLY */
/* For usermode syscall translation. */
bool eabi;
-#endif /* CONFIG_USER_ONLY */
-
-#ifdef TARGET_TAGGED_ADDRESSES
/* Linux syscall tagged address support */
bool tagged_addr_enable;
-#endif
+#endif /* CONFIG_USER_ONLY */
} CPUARMState;
static inline void set_feature(CPUARMState *env, int feature)
@@ -837,6 +851,53 @@ typedef struct {
uint32_t map, init, supported;
} ARMVQMap;
+/* REG is ID_XXX */
+#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \
+ regval = FIELD_DP64(regval, REG, FIELD, VALUE); \
+ i_->idregs[REG ## _EL1_IDX] = regval; \
+ })
+
+#define FIELD_DP32_IDREG(ISAR, REG, FIELD, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \
+ regval = FIELD_DP32(regval, REG, FIELD, VALUE); \
+ i_->idregs[REG ## _EL1_IDX] = regval; \
+ })
+
+#define FIELD_EX64_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_EX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define FIELD_EX32_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_EX32(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define FIELD_SEX64_IDREG(ISAR, REG, FIELD) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ FIELD_SEX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \
+ })
+
+#define SET_IDREG(ISAR, REG, VALUE) \
+ ({ \
+ ARMISARegisters *i_ = (ISAR); \
+ i_->idregs[REG ## _EL1_IDX] = VALUE; \
+ })
+
+#define GET_IDREG(ISAR, REG) \
+ ({ \
+ const ARMISARegisters *i_ = (ISAR); \
+ i_->idregs[REG ## _EL1_IDX]; \
+ })
+
/**
* ARMCPU:
* @env: #CPUARMState
@@ -1005,44 +1066,14 @@ struct ArchCPU {
* field by reading the value from the KVM vCPU.
*/
struct ARMISARegisters {
- uint32_t id_isar0;
- uint32_t id_isar1;
- uint32_t id_isar2;
- uint32_t id_isar3;
- uint32_t id_isar4;
- uint32_t id_isar5;
- uint32_t id_isar6;
- uint32_t id_mmfr0;
- uint32_t id_mmfr1;
- uint32_t id_mmfr2;
- uint32_t id_mmfr3;
- uint32_t id_mmfr4;
- uint32_t id_mmfr5;
- uint32_t id_pfr0;
- uint32_t id_pfr1;
- uint32_t id_pfr2;
uint32_t mvfr0;
uint32_t mvfr1;
uint32_t mvfr2;
- uint32_t id_dfr0;
- uint32_t id_dfr1;
uint32_t dbgdidr;
uint32_t dbgdevid;
uint32_t dbgdevid1;
- uint64_t id_aa64isar0;
- uint64_t id_aa64isar1;
- uint64_t id_aa64isar2;
- uint64_t id_aa64pfr0;
- uint64_t id_aa64pfr1;
- uint64_t id_aa64mmfr0;
- uint64_t id_aa64mmfr1;
- uint64_t id_aa64mmfr2;
- uint64_t id_aa64mmfr3;
- uint64_t id_aa64dfr0;
- uint64_t id_aa64dfr1;
- uint64_t id_aa64zfr0;
- uint64_t id_aa64smfr0;
uint64_t reset_pmcr_el0;
+ uint64_t idregs[NUM_ID_IDX];
} isar;
uint64_t midr;
uint32_t revidr;
@@ -1051,10 +1082,6 @@ struct ArchCPU {
uint32_t reset_sctlr;
uint64_t pmceid0;
uint64_t pmceid1;
- uint32_t id_afr0;
- uint64_t id_aa64afr0;
- uint64_t id_aa64afr1;
- uint64_t clidr;
uint64_t mp_affinity; /* MP ID without feature bits */
/* The elements of this array are the CCSIDR values for each cache,
* in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
@@ -1105,6 +1132,7 @@ struct ArchCPU {
/* Used to set the maximum vector length the cpu will support. */
uint32_t sve_max_vq;
+ uint32_t sme_max_vq;
#ifdef CONFIG_USER_ONLY
/* Used to set the default vector length at process start. */
@@ -1141,10 +1169,6 @@ struct ARMCPUClass {
ResettablePhases parent_phases;
};
-struct AArch64CPUClass {
- ARMCPUClass parent_class;
-};
-
/* Callback functions for the generic timer's timers. */
void arm_gt_ptimer_cb(void *opaque);
void arm_gt_vtimer_cb(void *opaque);
@@ -1157,8 +1181,6 @@ void arm_gt_sel2vtimer_cb(void *opaque);
unsigned int gt_cntfrq_period_ns(ARMCPU *cpu);
void gt_rme_post_el_change(ARMCPU *cpu, void *opaque);
-void arm_cpu_post_init(Object *obj);
-
#define ARM_AFF0_SHIFT 0
#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT)
#define ARM_AFF1_SHIFT 8
@@ -1488,6 +1510,7 @@ FIELD(SVCR, ZA, 1, 1)
/* Fields for SMCR_ELx. */
FIELD(SMCR, LEN, 0, 4)
+FIELD(SMCR, EZT0, 30, 1)
FIELD(SMCR, FA64, 31, 1)
/* Write a new value to v7m.exception, thus transitioning into or out
@@ -2189,6 +2212,7 @@ FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
FIELD(ID_AA64ISAR2, CSSC, 52, 4)
+FIELD(ID_AA64ISAR2, LUT, 56, 4)
FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
FIELD(ID_AA64PFR0, EL0, 0, 4)
@@ -2920,7 +2944,7 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu)
/* If all the CLIDR.Ctypem bits are 0 there are no caches, and
* CSSELR is RAZ/WI.
*/
- return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0;
+ return (GET_IDREG(&cpu->isar, CLIDR) & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0;
}
static inline bool arm_sctlr_b(CPUARMState *env)
@@ -3058,6 +3082,7 @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
+FIELD(TBFLAG_A64, ZT0EXC_EL, 39, 2)
/*
* Helpers for using the above. Note that only the A64 accessors use
@@ -3119,9 +3144,6 @@ static inline bool bswap_code(bool sctlr_b)
#endif
}
-void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
- uint64_t *cs_base, uint32_t *flags);
-
enum {
QEMU_PSCI_CONDUIT_DISABLED = 0,
QEMU_PSCI_CONDUIT_SMC = 1,
@@ -3219,35 +3241,4 @@ extern const uint64_t pred_esz_masks[5];
#define LOG2_TAG_GRANULE 4
#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
-#ifdef CONFIG_USER_ONLY
-
-#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1))
-
-#ifdef TARGET_TAGGED_ADDRESSES
-/**
- * cpu_untagged_addr:
- * @cs: CPU context
- * @x: tagged address
- *
- * Remove any address tag from @x. This is explicitly related to the
- * linux syscall TIF_TAGGED_ADDR setting, not TBI in general.
- *
- * There should be a better place to put this, but we need this in
- * include/exec/cpu_ldst.h, and not some place linux-user specific.
- */
-static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x)
-{
- CPUARMState *env = cpu_env(cs);
- if (env->tagged_addr_enable) {
- /*
- * TBI is enabled for userspace but not kernelspace addresses.
- * Only clear the tag if bit 55 is clear.
- */
- x &= sextract64(x, 0, 56);
- }
- return x;
-}
-#endif /* TARGET_TAGGED_ADDRESSES */
-#endif /* CONFIG_USER_ONLY */
-
#endif