aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-07-11 12:00:00 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-07-11 12:00:00 -0700
commit23901b2b721c0576007ab7580da8aa855d6042a9 (patch)
tree37798d6a801417a512e40ec4034f2e4c179db16a
parent39a032cea23e522268519d89bb738974bc43b6f6 (diff)
parent7f49089158a4db644fcbadfa90cd3d30a4868735 (diff)
downloadqemu-23901b2b721c0576007ab7580da8aa855d6042a9.zip
qemu-23901b2b721c0576007ab7580da8aa855d6042a9.tar.gz
qemu-23901b2b721c0576007ab7580da8aa855d6042a9.tar.bz2
Merge tag 'pull-target-arm-20240711' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue: * Refactor FPCR/FPSR handling in preparation for FEAT_AFP * More decodetree conversions * target/arm: Use cpu_env in cpu_untagged_addr * target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt() * hw/char/pl011: Avoid division-by-zero in pl011_get_baudrate() * hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops * accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory * STM32L4x5: Handle USART interrupts correctly # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmaP24MZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3luAEACF4Uhrcrh7E7RwoDEeQAMQ # IG3+LwUbhnBXIUl7DL0qQTjnmwbbTQH2Ukoq3biqAdSs22JwrT6O6MDQ7fA3X8DI # 3Ew+72BzAAtQHVHJaFRw2f9UVQop8Poa9I7Di6frH4Gxk5AKQY/IwjrD6jYPqhM7 # 9KCksksO3w9DRmpFZ1y5I/dGumTe12btEwdazWxrsyZIBNDoUJSU8xpcMk+9oErF # 23hcsSaXOGDeWwPuEk1q2mMYnRQQtMhVndxV50sF98MfJ3nnMKEttuFuW0znXMCr # Xat8Y4QbigXGmuJNgjXccIzN1Hje+h5zzfUIfVNWBYNzqULvvi/vjwNfJaUiIjm5 # DxeOGUu8iZYQbgvJXvn9NwWbptxvhyWsCLpB46icElcN0jr1MU12wk2IH0CZa7KU # h4kbu0p17dph5Lantd888b1Vu3pOFr4UiRC3qJB9ddBVLyGl/3Km1wb99x038mPo # Mt8Y7Vjnr5OWd+mTNzXFRnYFYIRKu1lI85VuTjd5Uua0lDtFDo/sVnVF9uas84OC # /PrQYGso0UE320li+jYHzE18rKPEi2u/3xTgHWAgh3ra7McWVjWDr2yIsAisKKNH # 2F72gyZNy2n7FJhTYPQAJnozi68maP5f9tHHHXQdfsCE4+2h0fr/wljCeq1+5waq # 4edm31uEbArfW/jLgPHHAA== # =Xkmk # -----END PGP SIGNATURE----- # gpg: Signature made Thu 11 Jul 2024 06:17:55 AM PDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] * tag 'pull-target-arm-20240711' of https://git.linaro.org/people/pmaydell/qemu-arm: (24 commits) target/arm: Convert PMULL to decodetree target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodetree target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetree target/arm: Convert SQDMULL, SQDMLAL, SQDMLSL to decodetree target/arm: Convert SADDL, SSUBL, SABDL, SABAL, and unsigned to decodetree target/arm: Convert SMULL, UMULL, SMLAL, UMLAL, SMLSL, UMLSL to decodetree hw/arm: In STM32L4x5 SOC, connect USART devices to EXTI hw/misc: In STM32L4x5 EXTI, handle direct interrupts hw/misc: In STM32L4x5 EXTI, consolidate 2 constants accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory target: Set TCGCPUOps::cpu_exec_halt to target's has_work implementation target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt() target/arm: Use cpu_env in cpu_untagged_addr hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops hw/char/pl011: Avoid division-by-zero in pl011_get_baudrate() target/arm: Allow FPCR bits that aren't in FPSCR target/arm: Rename FPSR_MASK and FPCR_MASK and define them symbolically target/arm: Rename FPCR_ QC, NZCV macros to FPSR_ target/arm: Store FPSR and FPCR in separate CPU state fields target/arm: Implement store_cpu_field_low32() macro ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--accel/tcg/cpu-exec.c11
-rw-r--r--hw/arm/stm32l4x5_soc.c24
-rw-r--r--hw/char/pl011.c13
-rw-r--r--hw/misc/bcm2835_thermal.c2
-rw-r--r--hw/misc/stm32l4x5_exti.c13
-rw-r--r--include/hw/core/tcg-cpu-ops.h9
-rw-r--r--include/hw/misc/stm32l4x5_exti.h4
-rw-r--r--target/alpha/cpu.c1
-rw-r--r--target/arm/cpu.c2
-rw-r--r--target/arm/cpu.h113
-rw-r--r--target/arm/internals.h3
-rw-r--r--target/arm/machine.c135
-rw-r--r--target/arm/tcg/a64.decode77
-rw-r--r--target/arm/tcg/cpu-v7m.c1
-rw-r--r--target/arm/tcg/mve_helper.c12
-rw-r--r--target/arm/tcg/translate-a32.h7
-rw-r--r--target/arm/tcg/translate-a64.c1155
-rw-r--r--target/arm/tcg/translate-m-nocp.c22
-rw-r--r--target/arm/tcg/translate-vfp.c4
-rw-r--r--target/arm/tcg/translate.h3
-rw-r--r--target/arm/vfp_helper.c181
-rw-r--r--target/avr/cpu.c1
-rw-r--r--target/cris/cpu.c2
-rw-r--r--target/hppa/cpu.c1
-rw-r--r--target/loongarch/cpu.c1
-rw-r--r--target/m68k/cpu.c1
-rw-r--r--target/microblaze/cpu.c1
-rw-r--r--target/mips/cpu.c1
-rw-r--r--target/openrisc/cpu.c1
-rw-r--r--target/ppc/cpu_init.c2
-rw-r--r--target/riscv/cpu.c2
-rw-r--r--target/riscv/internals.h3
-rw-r--r--target/riscv/tcg/tcg-cpu.c2
-rw-r--r--target/rx/cpu.c1
-rw-r--r--target/s390x/cpu.c1
-rw-r--r--target/sh4/cpu.c1
-rw-r--r--target/sparc/cpu.c1
-rw-r--r--target/tricore/cpu.c1
-rw-r--r--target/xtensa/cpu.c1
39 files changed, 890 insertions, 926 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 6711b58..245fd63 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -682,13 +682,8 @@ static inline bool cpu_handle_halt(CPUState *cpu)
#ifndef CONFIG_USER_ONLY
if (cpu->halted) {
const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
- bool leave_halt;
+ bool leave_halt = tcg_ops->cpu_exec_halt(cpu);
- if (tcg_ops->cpu_exec_halt) {
- leave_halt = tcg_ops->cpu_exec_halt(cpu);
- } else {
- leave_halt = cpu_has_work(cpu);
- }
if (!leave_halt) {
return true;
}
@@ -1082,6 +1077,10 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
static bool tcg_target_initialized;
if (!tcg_target_initialized) {
+ /* Check mandatory TCGCPUOps handlers */
+#ifndef CONFIG_USER_ONLY
+ assert(cpu->cc->tcg_ops->cpu_exec_halt);
+#endif /* !CONFIG_USER_ONLY */
cpu->cc->tcg_ops->initialize();
tcg_target_initialized = true;
}
diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
index 38f7a2d..fac83d3 100644
--- a/hw/arm/stm32l4x5_soc.c
+++ b/hw/arm/stm32l4x5_soc.c
@@ -81,6 +81,10 @@ static const int exti_irq[NUM_EXTI_IRQ] = {
#define RCC_BASE_ADDRESS 0x40021000
#define RCC_IRQ 5
+#define EXTI_USART1_IRQ 26
+#define EXTI_UART4_IRQ 29
+#define EXTI_LPUART1_IRQ 31
+
static const int exti_or_gates_out[NUM_EXTI_OR_GATES] = {
23, 40, 63, 1,
};
@@ -129,10 +133,6 @@ static const hwaddr uart_addr[] = {
#define LPUART_BASE_ADDRESS 0x40008000
-static const int usart_irq[] = { 37, 38, 39 };
-static const int uart_irq[] = { 52, 53 };
-#define LPUART_IRQ 70
-
static void stm32l4x5_soc_initfn(Object *obj)
{
Stm32l4x5SocState *s = STM32L4X5_SOC(obj);
@@ -297,6 +297,7 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
}
}
+ /* Connect SYSCFG to EXTI */
for (unsigned i = 0; i < GPIO_NUM_PINS; i++) {
qdev_connect_gpio_out(DEVICE(&s->syscfg), i,
qdev_get_gpio_in(DEVICE(&s->exti), i));
@@ -322,15 +323,10 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
return;
}
sysbus_mmio_map(busdev, 0, usart_addr[i]);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+ EXTI_USART1_IRQ + i));
}
- /*
- * TODO: Connect the USARTs, UARTs and LPUART to the EXTI once the EXTI
- * can handle other gpio-in than the gpios. (e.g. Direct Lines for the
- * usarts)
- */
-
/* UART devices */
for (int i = 0; i < STM_NUM_UARTS; i++) {
g_autofree char *name = g_strdup_printf("uart%d-out", STM_NUM_USARTS + i + 1);
@@ -343,7 +339,8 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
return;
}
sysbus_mmio_map(busdev, 0, uart_addr[i]);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, uart_irq[i]));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+ EXTI_UART4_IRQ + i));
}
/* LPUART device*/
@@ -356,7 +353,8 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
return;
}
sysbus_mmio_map(busdev, 0, LPUART_BASE_ADDRESS);
- sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, LPUART_IRQ));
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+ EXTI_LPUART1_IRQ));
/* APB1 BUS */
create_unimplemented_device("TIM2", 0x40000000, 0x400);
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 8753b84..f8078aa 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -87,6 +87,12 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
#define CR_DTR (1 << 10)
#define CR_LBE (1 << 7)
+/* Integer Baud Rate Divider, UARTIBRD */
+#define IBRD_MASK 0x3f
+
+/* Fractional Baud Rate Divider, UARTFBRD */
+#define FBRD_MASK 0xffff
+
static const unsigned char pl011_id_arm[8] =
{ 0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
static const unsigned char pl011_id_luminary[8] =
@@ -374,11 +380,11 @@ static void pl011_write(void *opaque, hwaddr offset,
s->ilpr = value;
break;
case 9: /* UARTIBRD */
- s->ibrd = value;
+ s->ibrd = value & IBRD_MASK;
pl011_trace_baudrate_change(s);
break;
case 10: /* UARTFBRD */
- s->fbrd = value;
+ s->fbrd = value & FBRD_MASK;
pl011_trace_baudrate_change(s);
break;
case 11: /* UARTLCR_H */
@@ -531,6 +537,9 @@ static int pl011_post_load(void *opaque, int version_id)
s->read_pos = 0;
}
+ s->ibrd &= IBRD_MASK;
+ s->fbrd &= FBRD_MASK;
+
return 0;
}
diff --git a/hw/misc/bcm2835_thermal.c b/hw/misc/bcm2835_thermal.c
index ee7816b..0c49c08 100644
--- a/hw/misc/bcm2835_thermal.c
+++ b/hw/misc/bcm2835_thermal.c
@@ -80,8 +80,10 @@ static void bcm2835_thermal_write(void *opaque, hwaddr addr,
static const MemoryRegionOps bcm2835_thermal_ops = {
.read = bcm2835_thermal_read,
.write = bcm2835_thermal_write,
+ .impl.min_access_size = 4,
.impl.max_access_size = 4,
.valid.min_access_size = 4,
+ .valid.max_access_size = 4,
.endianness = DEVICE_NATIVE_ENDIAN,
};
diff --git a/hw/misc/stm32l4x5_exti.c b/hw/misc/stm32l4x5_exti.c
index 6a2ec62..e281841 100644
--- a/hw/misc/stm32l4x5_exti.c
+++ b/hw/misc/stm32l4x5_exti.c
@@ -42,7 +42,6 @@
#define EXTI_SWIER2 0x30
#define EXTI_PR2 0x34
-#define EXTI_NUM_GPIO_EVENT_IN_LINES 16
#define EXTI_MAX_IRQ_PER_BANK 32
#define EXTI_IRQS_BANK0 32
#define EXTI_IRQS_BANK1 8
@@ -114,6 +113,13 @@ static void stm32l4x5_exti_set_irq(void *opaque, int irq, int level)
return;
}
+ /* In case of a direct line interrupt */
+ if (extract32(exti_romask[bank], irq, 1)) {
+ qemu_set_irq(s->irq[oirq], level);
+ return;
+ }
+
+ /* In case of a configurable interrupt */
if ((level && extract32(s->rtsr[bank], irq, 1)) ||
(!level && extract32(s->ftsr[bank], irq, 1))) {
@@ -238,7 +244,7 @@ static void stm32l4x5_exti_init(Object *obj)
{
Stm32l4x5ExtiState *s = STM32L4X5_EXTI(obj);
- for (size_t i = 0; i < EXTI_NUM_INTERRUPT_OUT_LINES; i++) {
+ for (size_t i = 0; i < EXTI_NUM_LINES; i++) {
sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq[i]);
}
@@ -246,8 +252,7 @@ static void stm32l4x5_exti_init(Object *obj)
TYPE_STM32L4X5_EXTI, 0x400);
sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
- qdev_init_gpio_in(DEVICE(obj), stm32l4x5_exti_set_irq,
- EXTI_NUM_GPIO_EVENT_IN_LINES);
+ qdev_init_gpio_in(DEVICE(obj), stm32l4x5_exti_set_irq, EXTI_NUM_LINES);
}
static const VMStateDescription vmstate_stm32l4x5_exti = {
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 099de33..34318cf 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -122,10 +122,13 @@ struct TCGCPUOps {
* to do when the CPU is in the halted state.
*
* Return true to indicate that the CPU should now leave halt, false
- * if it should remain in the halted state.
+ * if it should remain in the halted state. (This should generally
+ * be the same value that cpu_has_work() would return.)
*
- * If this method is not provided, the default is to do nothing, and
- * to leave halt if cpu_has_work() returns true.
+ * This method must be provided. If the target does not need to
+ * do anything special for halt, the same function used for its
+ * CPUClass::has_work method can be used here, as they have the
+ * same function signature.
*/
bool (*cpu_exec_halt)(CPUState *cpu);
/**
diff --git a/include/hw/misc/stm32l4x5_exti.h b/include/hw/misc/stm32l4x5_exti.h
index 55f763f..62f7936 100644
--- a/include/hw/misc/stm32l4x5_exti.h
+++ b/include/hw/misc/stm32l4x5_exti.h
@@ -30,7 +30,7 @@
#define TYPE_STM32L4X5_EXTI "stm32l4x5-exti"
OBJECT_DECLARE_SIMPLE_TYPE(Stm32l4x5ExtiState, STM32L4X5_EXTI)
-#define EXTI_NUM_INTERRUPT_OUT_LINES 40
+#define EXTI_NUM_LINES 40
#define EXTI_NUM_REGISTER 2
struct Stm32l4x5ExtiState {
@@ -47,7 +47,7 @@ struct Stm32l4x5ExtiState {
/* used for edge detection */
uint32_t irq_levels[EXTI_NUM_REGISTER];
- qemu_irq irq[EXTI_NUM_INTERRUPT_OUT_LINES];
+ qemu_irq irq[EXTI_NUM_LINES];
};
#endif
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 0e2fbcb..9db1dff 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -219,6 +219,7 @@ static const TCGCPUOps alpha_tcg_ops = {
#else
.tlb_fill = alpha_cpu_tlb_fill,
.cpu_exec_interrupt = alpha_cpu_exec_interrupt,
+ .cpu_exec_halt = alpha_cpu_has_work,
.do_interrupt = alpha_cpu_do_interrupt,
.do_transaction_failed = alpha_cpu_do_transaction_failed,
.do_unaligned_access = alpha_cpu_do_unaligned_access,
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 14d4eca..19191c2 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1133,7 +1133,7 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs)
}
#ifdef CONFIG_TCG
-static bool arm_cpu_exec_halt(CPUState *cs)
+bool arm_cpu_exec_halt(CPUState *cs)
{
bool leave_halt = cpu_has_work(cs);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d8eb986..a12859f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -619,6 +619,13 @@ typedef struct CPUArchState {
int vec_len;
int vec_stride;
+ /*
+ * Floating point status and control registers. Some bits are
+ * stored separately in other fields or in the float_status below.
+ */
+ uint64_t fpsr;
+ uint64_t fpcr;
+
uint32_t xregs[16];
/* Scratch space for aa32 neon expansion. */
@@ -1680,61 +1687,99 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
uint32_t vfp_get_fpscr(CPUARMState *env);
void vfp_set_fpscr(CPUARMState *env, uint32_t val);
-/* FPCR, Floating Point Control Register
- * FPSR, Floating Poiht Status Register
+/*
+ * FPCR, Floating Point Control Register
+ * FPSR, Floating Point Status Register
*
- * For A64 the FPSCR is split into two logically distinct registers,
- * FPCR and FPSR. However since they still use non-overlapping bits
- * we store the underlying state in fpscr and just mask on read/write.
+ * For A64 floating point control and status bits are stored in
+ * two logically distinct registers, FPCR and FPSR. We store these
+ * in QEMU in vfp.fpcr and vfp.fpsr.
+ * For A32 there was only one register, FPSCR. The bits are arranged
+ * such that FPSCR bits map to FPCR or FPSR bits in the same bit positions,
+ * so we can use appropriate masking to handle FPSCR reads and writes.
+ * Note that the FPCR has some bits which are not visible in the
+ * AArch32 view (for FEAT_AFP). Writing the FPSCR leaves these unchanged.
*/
-#define FPSR_MASK 0xf800009f
-#define FPCR_MASK 0x07ff9f00
+/* FPCR bits */
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
#define FPCR_UFE (1 << 11) /* Underflow exception trap enable */
#define FPCR_IXE (1 << 12) /* Inexact exception trap enable */
#define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */
+#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
+#define FPCR_STRIDE_MASK (3 << 20) /* Stride */
#define FPCR_RMODE_MASK (3 << 22) /* Rounding mode */
#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
#define FPCR_DN (1 << 25) /* Default NaN enable bit */
#define FPCR_AHP (1 << 26) /* Alternative half-precision */
-#define FPCR_QC (1 << 27) /* Cumulative saturation bit */
-#define FPCR_V (1 << 28) /* FP overflow flag */
-#define FPCR_C (1 << 29) /* FP carry flag */
-#define FPCR_Z (1 << 30) /* FP zero flag */
-#define FPCR_N (1 << 31) /* FP negative flag */
#define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */
#define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
#define FPCR_LTPSIZE_LENGTH 3
-#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
-#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
+/* Cumulative exception trap enable bits */
+#define FPCR_EEXC_MASK (FPCR_IOE | FPCR_DZE | FPCR_OFE | FPCR_UFE | FPCR_IXE | FPCR_IDE)
+
+/* FPSR bits */
+#define FPSR_IOC (1 << 0) /* Invalid Operation cumulative exception */
+#define FPSR_DZC (1 << 1) /* Divide by Zero cumulative exception */
+#define FPSR_OFC (1 << 2) /* Overflow cumulative exception */
+#define FPSR_UFC (1 << 3) /* Underflow cumulative exception */
+#define FPSR_IXC (1 << 4) /* Inexact cumulative exception */
+#define FPSR_IDC (1 << 7) /* Input Denormal cumulative exception */
+#define FPSR_QC (1 << 27) /* Cumulative saturation bit */
+#define FPSR_V (1 << 28) /* FP overflow flag */
+#define FPSR_C (1 << 29) /* FP carry flag */
+#define FPSR_Z (1 << 30) /* FP zero flag */
+#define FPSR_N (1 << 31) /* FP negative flag */
+
+/* Cumulative exception status bits */
+#define FPSR_CEXC_MASK (FPSR_IOC | FPSR_DZC | FPSR_OFC | FPSR_UFC | FPSR_IXC | FPSR_IDC)
+
+#define FPSR_NZCV_MASK (FPSR_N | FPSR_Z | FPSR_C | FPSR_V)
+#define FPSR_NZCVQC_MASK (FPSR_NZCV_MASK | FPSR_QC)
+
+/* A32 FPSCR bits which architecturally map to FPSR bits */
+#define FPSCR_FPSR_MASK (FPSR_NZCVQC_MASK | FPSR_CEXC_MASK)
+/* A32 FPSCR bits which architecturally map to FPCR bits */
+#define FPSCR_FPCR_MASK (FPCR_EEXC_MASK | FPCR_LEN_MASK | FPCR_FZ16 | \
+ FPCR_STRIDE_MASK | FPCR_RMODE_MASK | \
+ FPCR_FZ | FPCR_DN | FPCR_AHP)
+/* These masks don't overlap: each bit lives in only one place */
+QEMU_BUILD_BUG_ON(FPSCR_FPSR_MASK & FPSCR_FPCR_MASK);
-static inline uint32_t vfp_get_fpsr(CPUARMState *env)
-{
- return vfp_get_fpscr(env) & FPSR_MASK;
-}
+/**
+ * vfp_get_fpsr: read the AArch64 FPSR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPSR value
+ */
+uint32_t vfp_get_fpsr(CPUARMState *env);
-static inline void vfp_set_fpsr(CPUARMState *env, uint32_t val)
-{
- uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPSR_MASK) | (val & FPSR_MASK);
- vfp_set_fpscr(env, new_fpscr);
-}
+/**
+ * vfp_get_fpcr: read the AArch64 FPCR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPCR value
+ */
+uint32_t vfp_get_fpcr(CPUARMState *env);
-static inline uint32_t vfp_get_fpcr(CPUARMState *env)
-{
- return vfp_get_fpscr(env) & FPCR_MASK;
-}
+/**
+ * vfp_set_fpsr: write the AArch64 FPSR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpsr(CPUARMState *env, uint32_t value);
-static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val)
-{
- uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPCR_MASK) | (val & FPCR_MASK);
- vfp_set_fpscr(env, new_fpscr);
-}
+/**
+ * vfp_set_fpcr: write the AArch64 FPCR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpcr(CPUARMState *env, uint32_t value);
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
@@ -3309,8 +3354,8 @@ extern const uint64_t pred_esz_masks[5];
*/
static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x)
{
- ARMCPU *cpu = ARM_CPU(cs);
- if (cpu->env.tagged_addr_enable) {
+ CPUARMState *env = cpu_env(cs);
+ if (env->tagged_addr_enable) {
/*
* TBI is enabled for userspace but not kernelspace addresses.
* Only clear the tag if bit 55 is clear.
diff --git a/target/arm/internals.h b/target/arm/internals.h
index e1aa1a6..da22d04 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -368,6 +368,9 @@ void arm_restore_state_to_opc(CPUState *cs,
#ifdef CONFIG_TCG
void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+
+/* Our implementation of TCGCPUOps::cpu_exec_halt */
+bool arm_cpu_exec_halt(CPUState *cs);
#endif /* CONFIG_TCG */
typedef enum ARMFPRounding {
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 0a722ca..a3c1e05 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -18,6 +18,35 @@ static bool vfp_needed(void *opaque)
: cpu_isar_feature(aa32_vfp_simd, cpu));
}
+static bool vfp_fpcr_fpsr_needed(void *opaque)
+{
+ /*
+ * If either the FPCR or the FPSR include set bits that are not
+ * visible in the AArch32 FPSCR view of floating point control/status
+ * then we must send the FPCR and FPSR as two separate fields in the
+ * cpu/vfp/fpcr_fpsr subsection, and we will send a 0 for the old
+ * FPSCR field in cpu/vfp.
+ *
+ * If all the set bits are representable in an AArch32 FPSCR then we
+ * send that value as the cpu/vfp FPSCR field, and don't send the
+ * cpu/vfp/fpcr_fpsr subsection.
+ *
+ * On incoming migration, if the cpu/vfp FPSCR field is non-zero we
+ * use it, and if the fpcr_fpsr subsection is present we use that.
+ * (The subsection will never be present with a non-zero FPSCR field,
+ * and if FPSCR is zero and the subsection is not present that means
+ * that FPSCR/FPSR/FPCR are zero.)
+ *
+ * This preserves migration compatibility with older QEMU versions,
+ * in both directions.
+ */
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ return (vfp_get_fpcr(env) & ~FPSCR_FPCR_MASK) ||
+ (vfp_get_fpsr(env) & ~FPSCR_FPSR_MASK);
+}
+
static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
const VMStateField *field)
{
@@ -25,7 +54,10 @@ static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
CPUARMState *env = &cpu->env;
uint32_t val = qemu_get_be32(f);
- vfp_set_fpscr(env, val);
+ if (val) {
+ /* 0 means we might have the data in the fpcr_fpsr subsection */
+ vfp_set_fpscr(env, val);
+ }
return 0;
}
@@ -34,8 +66,9 @@ static int put_fpscr(QEMUFile *f, void *opaque, size_t size,
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
+ uint32_t fpscr = vfp_fpcr_fpsr_needed(opaque) ? 0 : vfp_get_fpscr(env);
- qemu_put_be32(f, vfp_get_fpscr(env));
+ qemu_put_be32(f, fpscr);
return 0;
}
@@ -45,6 +78,86 @@ static const VMStateInfo vmstate_fpscr = {
.put = put_fpscr,
};
+static int get_fpcr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+ uint64_t val = qemu_get_be64(f);
+
+ vfp_set_fpcr(env, val);
+ return 0;
+}
+
+static int put_fpcr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ qemu_put_be64(f, vfp_get_fpcr(env));
+ return 0;
+}
+
+static const VMStateInfo vmstate_fpcr = {
+ .name = "fpcr",
+ .get = get_fpcr,
+ .put = put_fpcr,
+};
+
+static int get_fpsr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+ uint64_t val = qemu_get_be64(f);
+
+ vfp_set_fpsr(env, val);
+ return 0;
+}
+
+static int put_fpsr(QEMUFile *f, void *opaque, size_t size,
+ const VMStateField *field, JSONWriter *vmdesc)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ qemu_put_be64(f, vfp_get_fpsr(env));
+ return 0;
+}
+
+static const VMStateInfo vmstate_fpsr = {
+ .name = "fpsr",
+ .get = get_fpsr,
+ .put = put_fpsr,
+};
+
+static const VMStateDescription vmstate_vfp_fpcr_fpsr = {
+ .name = "cpu/vfp/fpcr_fpsr",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vfp_fpcr_fpsr_needed,
+ .fields = (const VMStateField[]) {
+ {
+ .name = "fpcr",
+ .version_id = 0,
+ .size = sizeof(uint64_t),
+ .info = &vmstate_fpcr,
+ .flags = VMS_SINGLE,
+ .offset = 0,
+ },
+ {
+ .name = "fpsr",
+ .version_id = 0,
+ .size = sizeof(uint64_t),
+ .info = &vmstate_fpsr,
+ .flags = VMS_SINGLE,
+ .offset = 0,
+ },
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_vfp = {
.name = "cpu/vfp",
.version_id = 3,
@@ -100,6 +213,10 @@ static const VMStateDescription vmstate_vfp = {
.offset = 0,
},
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_vfp_fpcr_fpsr,
+ NULL
}
};
@@ -785,6 +902,20 @@ static int cpu_pre_load(void *opaque)
CPUARMState *env = &cpu->env;
/*
+ * In an inbound migration where on the source FPSCR/FPSR/FPCR are 0,
+ * there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr()
+ * and vfp_set_fpsr() from get_fpcr() and get_fpsr(); also the get_fpscr()
+ * function will not call vfp_set_fpscr() because it will see a 0 in the
+ * inbound data. Ensure that in this case we have a correctly set up
+ * zero FPSCR/FPCR/FPSR.
+ *
+ * This is not strictly needed because FPSCR is zero out of reset, but
+ * it avoids the possibility of future confusing migration bugs if some
+ * future architecture change makes the reset value non-zero.
+ */
+ vfp_set_fpscr(env, 0);
+
+ /*
* Pre-initialize irq_line_state to a value that's never valid as
* real data, so cpu_post_load() can tell whether we've seen the
* irq-line-state subsection in the incoming migration state.
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 223eac3..2922de7 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -785,6 +785,14 @@ SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
SQRDMLAH_s 0111 1110 ..0 ..... 10000 1 ..... ..... @rrr_e
SQRDMLSH_s 0111 1110 ..0 ..... 10001 1 ..... ..... @rrr_e
+# Decode scalar x scalar as scalar x indexed, with index 0.
+SQDMULL_si 0101 1110 011 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMULL_si 0101 1110 101 rm:5 11010 0 rn:5 rd:5 &rrx_e idx=0 esz=2
+SQDMLAL_si 0101 1110 011 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMLAL_si 0101 1110 101 rm:5 10010 0 rn:5 rd:5 &rrx_e idx=0 esz=2
+SQDMLSL_si 0101 1110 011 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=1
+SQDMLSL_si 0101 1110 101 rm:5 10110 0 rn:5 rd:5 &rrx_e idx=0 esz=2
+
### Advanced SIMD scalar pairwise
FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h
@@ -962,6 +970,42 @@ FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e
FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
+SMULL_v 0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+UMULL_v 0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+SMLAL_v 0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+UMLAL_v 0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+SMLSL_v 0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+UMLSL_v 0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+
+SADDL_v 0.00 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+UADDL_v 0.10 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+SSUBL_v 0.00 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+USUBL_v 0.10 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+SABAL_v 0.00 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+UABAL_v 0.10 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+SABDL_v 0.00 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+UABDL_v 0.10 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+
+SQDMULL_v 0.00 1110 011 ..... 11010 0 ..... ..... @qrrr_h
+SQDMULL_v 0.00 1110 101 ..... 11010 0 ..... ..... @qrrr_s
+SQDMLAL_v 0.00 1110 011 ..... 10010 0 ..... ..... @qrrr_h
+SQDMLAL_v 0.00 1110 101 ..... 10010 0 ..... ..... @qrrr_s
+SQDMLSL_v 0.00 1110 011 ..... 10110 0 ..... ..... @qrrr_h
+SQDMLSL_v 0.00 1110 101 ..... 10110 0 ..... ..... @qrrr_s
+
+SADDW 0.00 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+UADDW 0.10 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+SSUBW 0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+USUBW 0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+
+ADDHN 0.00 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+RADDHN 0.10 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+SUBHN 0.00 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+RSUBHN 0.10 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+
+PMULL_p8 0.00 1110 001 ..... 11100 0 ..... ..... @qrrr_b
+PMULL_p64 0.00 1110 111 ..... 11100 0 ..... ..... @qrrr_b
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
@@ -992,6 +1036,15 @@ SQRDMLAH_si 0111 1111 10 .. .... 1101 . 0 ..... ..... @rrx_s
SQRDMLSH_si 0111 1111 01 .. .... 1111 . 0 ..... ..... @rrx_h
SQRDMLSH_si 0111 1111 10 .. .... 1111 . 0 ..... ..... @rrx_s
+SQDMULL_si 0101 1111 01 .. .... 1011 . 0 ..... ..... @rrx_h
+SQDMULL_si 0101 1111 10 . ..... 1011 . 0 ..... ..... @rrx_s
+
+SQDMLAL_si 0101 1111 01 .. .... 0011 . 0 ..... ..... @rrx_h
+SQDMLAL_si 0101 1111 10 . ..... 0011 . 0 ..... ..... @rrx_s
+
+SQDMLSL_si 0101 1111 01 .. .... 0111 . 0 ..... ..... @rrx_h
+SQDMLSL_si 0101 1111 10 . ..... 0111 . 0 ..... ..... @rrx_s
+
### Advanced SIMD vector x indexed element
FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
@@ -1047,6 +1100,30 @@ FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0
FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1
FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1
+SMULL_vi 0.00 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
+SMULL_vi 0.00 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
+UMULL_vi 0.10 1111 01 .. .... 1010 . 0 ..... ..... @qrrx_h
+UMULL_vi 0.10 1111 10 . ..... 1010 . 0 ..... ..... @qrrx_s
+
+SMLAL_vi 0.00 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
+SMLAL_vi 0.00 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
+UMLAL_vi 0.10 1111 01 .. .... 0010 . 0 ..... ..... @qrrx_h
+UMLAL_vi 0.10 1111 10 . ..... 0010 . 0 ..... ..... @qrrx_s
+
+SMLSL_vi 0.00 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
+SMLSL_vi 0.00 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
+UMLSL_vi 0.10 1111 01 .. .... 0110 . 0 ..... ..... @qrrx_h
+UMLSL_vi 0.10 1111 10 . ..... 0110 . 0 ..... ..... @qrrx_s
+
+SQDMULL_vi 0.00 1111 01 .. .... 1011 . 0 ..... ..... @qrrx_h
+SQDMULL_vi 0.00 1111 10 . ..... 1011 . 0 ..... ..... @qrrx_s
+
+SQDMLAL_vi 0.00 1111 01 .. .... 0011 . 0 ..... ..... @qrrx_h
+SQDMLAL_vi 0.00 1111 10 . ..... 0011 . 0 ..... ..... @qrrx_s
+
+SQDMLSL_vi 0.00 1111 01 .. .... 0111 . 0 ..... ..... @qrrx_h
+SQDMLSL_vi 0.00 1111 10 . ..... 0111 . 0 ..... ..... @qrrx_s
+
# Floating-point conditional select
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
index c059c68..5496f14 100644
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@@ -244,6 +244,7 @@ static const TCGCPUOps arm_v7m_tcg_ops = {
#else
.tlb_fill = arm_cpu_tlb_fill,
.cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
+ .cpu_exec_halt = arm_cpu_exec_halt,
.do_interrupt = arm_v7m_cpu_do_interrupt,
.do_transaction_failed = arm_cpu_do_transaction_failed,
.do_unaligned_access = arm_cpu_do_unaligned_access,
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 8b99736..03ebef5 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -1115,21 +1115,21 @@ static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,
if (update_flags) {
/* Store C, clear NZV. */
- env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
- env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
+ env->vfp.fpsr &= ~FPSR_NZCV_MASK;
+ env->vfp.fpsr |= carry_in * FPSR_C;
}
mve_advance_vpt(env);
}
void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
{
- bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ bool carry_in = env->vfp.fpsr & FPSR_C;
do_vadc(env, vd, vn, vm, 0, carry_in, false);
}
void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
{
- bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ bool carry_in = env->vfp.fpsr & FPSR_C;
do_vadc(env, vd, vn, vm, -1, carry_in, false);
}
@@ -3343,7 +3343,7 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
uint32_t *m = vm;
uint16_t r;
uint16_t mask = mve_element_mask(env);
- bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ bool ieee = !(env->vfp.fpcr & FPCR_AHP);
unsigned e;
float_status *fpst;
float_status scratch_fpst;
@@ -3373,7 +3373,7 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
uint16_t *m = vm;
uint32_t r;
uint16_t mask = mve_element_mask(env);
- bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ bool ieee = !(env->vfp.fpcr & FPCR_AHP);
unsigned e;
float_status *fpst;
float_status scratch_fpst;
diff --git a/target/arm/tcg/translate-a32.h b/target/arm/tcg/translate-a32.h
index 19de6e0..0b1fa57 100644
--- a/target/arm/tcg/translate-a32.h
+++ b/target/arm/tcg/translate-a32.h
@@ -83,6 +83,13 @@ void store_cpu_offset(TCGv_i32 var, int offset, int size);
sizeof_field(CPUARMState, name)); \
})
+/* Store to the low half of a 64-bit field from a TCGv_i32 */
+#define store_cpu_field_low32(val, name) \
+ ({ \
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8); \
+ store_cpu_offset(val, offsetoflow32(CPUARMState, name), 4); \
+ })
+
#define store_cpu_field_constant(val, name) \
store_cpu_field(tcg_constant_i32(val), name)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 6c07aea..559a6cd 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5665,6 +5665,357 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
}
/*
+ * Widening vector x vector/indexed.
+ *
+ * These read from the top or bottom half of a 128-bit vector.
+ * After widening, optionally accumulate with a 128-bit vector.
+ * Implement these inline, as the number of elements are limited
+ * and the related SVE and SME operations on larger vectors use
+ * even/odd elements instead of top/bottom half.
+ *
+ * If idx >= 0, operand 2 is indexed, otherwise vector.
+ * If acc, operand 0 is loaded with rd.
+ */
+
+/* For low half, iterating up. */
+static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
+ int rd, int rn, int rm, int idx,
+ NeonGenTwo64OpFn *fn, bool acc)
+{
+ TCGv_i64 tcg_op0 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ MemOp esz = memop & MO_SIZE;
+ int half = 8 >> esz;
+ int top_swap, top_half;
+
+ /* There are no 64x64->128 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ if (idx >= 0) {
+ read_vec_element(s, tcg_op2, rm, idx, memop);
+ }
+
+ /*
+ * For top half inputs, iterate forward; backward for bottom half.
+ * This means the store to the destination will not occur until
+ * overlapping input inputs are consumed.
+ * Use top_swap to conditionally invert the forward iteration index.
+ */
+ top_swap = top ? 0 : half - 1;
+ top_half = top ? half : 0;
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
+ if (idx < 0) {
+ read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
+ }
+ if (acc) {
+ read_vec_element(s, tcg_op0, rd, elt, memop + 1);
+ }
+ fn(tcg_op0, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_op0, rd, elt, esz + 1);
+ }
+ clear_vec_high(s, 1, rd);
+ return true;
+}
+
+static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t, n, m);
+ tcg_gen_sub_i64(d, d, t);
+}
+
+TRANS(SMULL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_mul_i64, false)
+TRANS(UMULL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_mul_i64, false)
+TRANS(SMLAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_muladd_i64, true)
+TRANS(UMLAL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_muladd_i64, true)
+TRANS(SMLSL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_mulsub_i64, true)
+TRANS(UMLSL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_mulsub_i64, true)
+
+TRANS(SMULL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ tcg_gen_mul_i64, false)
+TRANS(UMULL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ tcg_gen_mul_i64, false)
+TRANS(SMLAL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_muladd_i64, true)
+TRANS(UMLAL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_muladd_i64, true)
+TRANS(SMLSL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_mulsub_i64, true)
+TRANS(UMLSL_vi, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
+ gen_mulsub_i64, true)
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t1, n, m);
+ tcg_gen_sub_i64(t2, m, n);
+ tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t1, n, m);
+ tcg_gen_sub_i64(t2, m, n);
+ tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
+}
+
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, n, m);
+ tcg_gen_add_i64(d, d, t);
+}
+
+TRANS(SADDL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_add_i64, false)
+TRANS(UADDL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_add_i64, false)
+TRANS(SSUBL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_sub_i64, false)
+TRANS(USUBL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ tcg_gen_sub_i64, false)
+TRANS(SABDL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_sabd_i64, false)
+TRANS(UABDL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_uabd_i64, false)
+TRANS(SABAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ gen_saba_i64, true)
+TRANS(UABAL_v, do_3op_widening,
+ a->esz, a->q, a->rd, a->rn, a->rm, -1,
+ gen_uaba_i64, true)
+
+static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_mul_i64(d, n, m);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
+}
+
+static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_mul_i64(d, n, m);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
+}
+
+static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
+ tcg_gen_neg_i64(t, t);
+ gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
+}
+
+static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, n, m);
+ gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
+ tcg_gen_neg_i64(t, t);
+ gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
+}
+
+TRANS(SQDMULL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_v, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
+TRANS(SQDMULL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_vi, do_3op_widening,
+ a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
+static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
+ MemOp sign, bool sub)
+{
+ TCGv_i64 tcg_op0, tcg_op1;
+ MemOp esz = a->esz;
+ int half = 8 >> esz;
+ bool top = a->q;
+ int top_swap = top ? 0 : half - 1;
+ int top_half = top ? half : 0;
+
+ /* There are no 64x64->128 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ tcg_op0 = tcg_temp_new_i64();
+ tcg_op1 = tcg_temp_new_i64();
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
+ read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+ if (sub) {
+ tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+ } else {
+ tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+ }
+ write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
+ }
+ clear_vec_high(s, 1, a->rd);
+ return true;
+}
+
+TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
+TRANS(UADDW, do_addsub_wide, a, 0, false)
+TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
+TRANS(USUBW, do_addsub_wide, a, 0, true)
+
+static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
+ bool sub, bool round)
+{
+ TCGv_i64 tcg_op0, tcg_op1;
+ MemOp esz = a->esz;
+ int half = 8 >> esz;
+ bool top = a->q;
+ int ebits = 8 << esz;
+ uint64_t rbit = 1ull << (ebits - 1);
+ int top_swap, top_half;
+
+ /* There are no 128x128->64 bit operations. */
+ if (esz >= MO_64) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+ tcg_op0 = tcg_temp_new_i64();
+ tcg_op1 = tcg_temp_new_i64();
+
+ /*
+ * For top half inputs, iterate backward; forward for bottom half.
+ * This means the store to the destination will not occur until
+ * overlapping input inputs are consumed.
+ */
+ top_swap = top ? half - 1 : 0;
+ top_half = top ? half : 0;
+
+ for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
+ int elt = elt_fwd ^ top_swap;
+
+ read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
+ read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
+ if (sub) {
+ tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
+ } else {
+ tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
+ }
+ if (round) {
+ tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
+ }
+ tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
+ write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
+ }
+ clear_vec_high(s, top, a->rd);
+ return true;
+}
+
+TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
+TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
+TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
+TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
+
+static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
+{
+ if (fp_access_check(s)) {
+ /* The Q field specifies lo/hi half input for these insns. */
+ gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
+ }
+ return true;
+}
+
+TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
+TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
+
+/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -5815,6 +6166,38 @@ static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
+static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
+ NeonGenTwo64OpFn *fn, bool acc)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ unsigned vsz, dofs;
+
+ if (acc) {
+ read_vec_element(s, t0, a->rd, 0, a->esz + 1);
+ }
+ read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
+ read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
+ fn(t0, t1, t2);
+
+ /* Clear the whole register first, then store scalar. */
+ vsz = vec_full_reg_size(s);
+ dofs = vec_full_reg_offset(s, a->rd);
+ tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
+ write_vec_element(s, t0, a->rd, 0, a->esz + 1);
+ }
+ return true;
+}
+
+TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
+TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
+TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
+ a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
+
static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
@@ -9647,102 +10030,6 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD scalar three different
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +-----+---+-----------+------+---+------+--------+-----+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
- * +-----+---+-----------+------+---+------+--------+-----+------+------+
- */
-static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
-{
- bool is_u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
-
- switch (opcode) {
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- case 0xd: /* SQDMULL, SQDMULL2 */
- if (size == 0 || size == 3) {
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (size == 2) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
- read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
-
- tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
-
- switch (opcode) {
- case 0xd: /* SQDMULL, SQDMULL2 */
- break;
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- tcg_gen_neg_i64(tcg_res, tcg_res);
- /* fall through */
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- read_vec_element(s, tcg_op1, rd, 0, MO_64);
- gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
- tcg_res, tcg_op1);
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
- } else {
- TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
- TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
-
- switch (opcode) {
- case 0xd: /* SQDMULL, SQDMULL2 */
- break;
- case 0xb: /* SQDMLSL, SQDMLSL2 */
- gen_helper_neon_negl_u32(tcg_res, tcg_res);
- /* fall through */
- case 0x9: /* SQDMLAL, SQDMLAL2 */
- {
- TCGv_i64 tcg_op3 = tcg_temp_new_i64();
- read_vec_element(s, tcg_op3, rd, 0, MO_32);
- gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
- tcg_res, tcg_op3);
- break;
- }
- default:
- g_assert_not_reached();
- }
-
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- write_fp_dreg(s, rd, tcg_res);
- }
-}
-
static void handle_2misc_64(DisasContext *s, int opcode, bool u,
TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
@@ -10592,416 +10879,6 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
}
}
-/* Generate code to do a "long" addition or subtraction, ie one done in
- * TCGv_i64 on vector lanes twice the width specified by size.
- */
-static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
- TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
-{
- static NeonGenTwo64OpFn * const fns[3][2] = {
- { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
- { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
- { tcg_gen_add_i64, tcg_gen_sub_i64 },
- };
- NeonGenTwo64OpFn *genfn;
- assert(size < 3);
-
- genfn = fns[size][is_sub];
- genfn(tcg_res, tcg_op1, tcg_op2);
-}
-
-static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
-{
- /* 3-reg-different widening insns: 64 x 64 -> 128 */
- TCGv_i64 tcg_res[2];
- int pass, accop;
-
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- /* Does this op do an adding accumulate, a subtracting accumulate,
- * or no accumulate at all?
- */
- switch (opcode) {
- case 5:
- case 8:
- case 9:
- accop = 1;
- break;
- case 10:
- case 11:
- accop = -1;
- break;
- default:
- accop = 0;
- break;
- }
-
- if (accop != 0) {
- read_vec_element(s, tcg_res[0], rd, 0, MO_64);
- read_vec_element(s, tcg_res[1], rd, 1, MO_64);
- }
-
- /* size == 2 means two 32x32->64 operations; this is worth special
- * casing because we can generally handle it inline.
- */
- if (size == 2) {
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_passres;
- MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
-
- int elt = pass + is_q * 2;
-
- read_vec_element(s, tcg_op1, rn, elt, memop);
- read_vec_element(s, tcg_op2, rm, elt, memop);
-
- if (accop == 0) {
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- switch (opcode) {
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- {
- TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
- TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
-
- tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
- tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
- tcg_passres,
- tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
- break;
- }
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
- tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
- break;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
-
- if (opcode == 9 || opcode == 11) {
- /* saturating accumulate ops */
- if (accop < 0) {
- tcg_gen_neg_i64(tcg_passres, tcg_passres);
- }
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
- tcg_res[pass], tcg_passres);
- } else if (accop > 0) {
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- } else if (accop < 0) {
- tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- }
- }
- } else {
- /* size 0 or 1, generally helper functions */
- for (pass = 0; pass < 2; pass++) {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i64 tcg_passres;
- int elt = pass + is_q * 2;
-
- read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
- read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
-
- if (accop == 0) {
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- switch (opcode) {
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- {
- TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
- static NeonGenWidenFn * const widenfns[2][2] = {
- { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
- { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
- };
- NeonGenWidenFn *widenfn = widenfns[size][is_u];
-
- widenfn(tcg_op2_64, tcg_op2);
- widenfn(tcg_passres, tcg_op1);
- gen_neon_addl(size, (opcode == 2), tcg_passres,
- tcg_passres, tcg_op2_64);
- break;
- }
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- if (size == 0) {
- if (is_u) {
- gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
- }
- } else {
- if (is_u) {
- gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
- }
- }
- break;
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
- if (size == 0) {
- if (is_u) {
- gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
- }
- } else {
- if (is_u) {
- gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
- } else {
- gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
- }
- }
- break;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- assert(size == 1);
- gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
-
- if (accop != 0) {
- if (opcode == 9 || opcode == 11) {
- /* saturating accumulate ops */
- if (accop < 0) {
- gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
- }
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- } else {
- gen_neon_addl(size, (accop < 0), tcg_res[pass],
- tcg_res[pass], tcg_passres);
- }
- }
- }
- }
-
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-}
-
-static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
-{
- TCGv_i64 tcg_res[2];
- int part = is_q ? 2 : 0;
- int pass;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
- static NeonGenWidenFn * const widenfns[3][2] = {
- { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
- { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
- { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
- };
- NeonGenWidenFn *widenfn = widenfns[size][is_u];
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
- widenfn(tcg_op2_wide, tcg_op2);
- tcg_res[pass] = tcg_temp_new_i64();
- gen_neon_addl(size, (opcode == 3),
- tcg_res[pass], tcg_op1, tcg_op2_wide);
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
-}
-
-static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
- tcg_gen_addi_i64(in, in, 1U << 31);
- tcg_gen_extrh_i64_i32(res, in);
-}
-
-static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
-{
- TCGv_i32 tcg_res[2];
- int part = is_q ? 2 : 0;
- int pass;
-
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_wideres = tcg_temp_new_i64();
- static NeonGenNarrowFn * const narrowfns[3][2] = {
- { gen_helper_neon_narrow_high_u8,
- gen_helper_neon_narrow_round_high_u8 },
- { gen_helper_neon_narrow_high_u16,
- gen_helper_neon_narrow_round_high_u16 },
- { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
- };
- NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
-
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
-
- tcg_res[pass] = tcg_temp_new_i32();
- gennarrow(tcg_res[pass], tcg_wideres);
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
- }
- clear_vec_high(s, is_q, rd);
-}
-
-/* AdvSIMD three different
- * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+--------+-----+------+------+
- */
-static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
-{
- /* Instructions in this group fall into three basic classes
- * (in each case with the operation working on each element in
- * the input vectors):
- * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
- * 128 bit input)
- * (2) wide 64 x 128 -> 128
- * (3) narrowing 128 x 128 -> 64
- * Here we do initial decode, catch unallocated cases and
- * dispatch to separate functions for each class.
- */
- int is_q = extract32(insn, 30, 1);
- int is_u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- switch (opcode) {
- case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
- case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
- /* 64 x 128 -> 128 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
- case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
- /* 128 x 128 -> 64 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- case 14: /* PMULL, PMULL2 */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- switch (size) {
- case 0: /* PMULL.P8 */
- if (!fp_access_check(s)) {
- return;
- }
- /* The Q field specifies lo/hi half input for this insn. */
- gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
- gen_helper_neon_pmull_h);
- break;
-
- case 3: /* PMULL.P64 */
- if (!dc_isar_feature(aa64_pmull, s)) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- /* The Q field specifies lo/hi half input for this insn. */
- gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
- gen_helper_gvec_pmull_q);
- break;
-
- default:
- unallocated_encoding(s);
- break;
- }
- return;
- case 9: /* SQDMLAL, SQDMLAL2 */
- case 11: /* SQDMLSL, SQDMLSL2 */
- case 13: /* SQDMULL, SQDMULL2 */
- if (is_u || size == 0) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
- case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
- case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
- case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
- /* 64 x 64 -> 128 */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
- break;
- default:
- /* opcode 15 not allocated */
- unallocated_encoding(s);
- break;
- }
-}
-
static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
int size, int rn, int rd)
{
@@ -11944,268 +11821,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD scalar x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * AdvSIMD vector x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_indexed(DisasContext *s, uint32_t insn)
-{
- /* This encoding has two kinds of instruction:
- * normal, where we perform elt x idxelt => elt for each
- * element in the vector
- * long, where we perform elt x idxelt and generate a result of
- * double the width of the input element
- * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
- */
- bool is_scalar = extract32(insn, 28, 1);
- bool is_q = extract32(insn, 30, 1);
- bool u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int l = extract32(insn, 21, 1);
- int m = extract32(insn, 20, 1);
- /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
- int rm = extract32(insn, 16, 4);
- int opcode = extract32(insn, 12, 4);
- int h = extract32(insn, 11, 1);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- int index;
-
- switch (16 * u + opcode) {
- case 0x02: /* SMLAL, SMLAL2 */
- case 0x12: /* UMLAL, UMLAL2 */
- case 0x06: /* SMLSL, SMLSL2 */
- case 0x16: /* UMLSL, UMLSL2 */
- case 0x0a: /* SMULL, SMULL2 */
- case 0x1a: /* UMULL, UMULL2 */
- if (is_scalar) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0x03: /* SQDMLAL, SQDMLAL2 */
- case 0x07: /* SQDMLSL, SQDMLSL2 */
- case 0x0b: /* SQDMULL, SQDMULL2 */
- break;
- default:
- case 0x00: /* FMLAL */
- case 0x01: /* FMLA */
- case 0x04: /* FMLSL */
- case 0x05: /* FMLS */
- case 0x08: /* MUL */
- case 0x09: /* FMUL */
- case 0x0c: /* SQDMULH */
- case 0x0d: /* SQRDMULH */
- case 0x0e: /* SDOT */
- case 0x0f: /* SUDOT / BFDOT / USDOT / BFMLAL */
- case 0x10: /* MLA */
- case 0x11: /* FCMLA #0 */
- case 0x13: /* FCMLA #90 */
- case 0x14: /* MLS */
- case 0x15: /* FCMLA #180 */
- case 0x17: /* FCMLA #270 */
- case 0x18: /* FMLAL2 */
- case 0x19: /* FMULX */
- case 0x1c: /* FMLSL2 */
- case 0x1d: /* SQRDMLAH */
- case 0x1e: /* UDOT */
- case 0x1f: /* SQRDMLSH */
- unallocated_encoding(s);
- return;
- }
-
- /* Given MemOp size, adjust register and indexing. */
- switch (size) {
- case MO_8:
- case MO_64:
- unallocated_encoding(s);
- return;
- case MO_16:
- index = h << 2 | l << 1 | m;
- break;
- case MO_32:
- index = h << 1 | l;
- rm |= m << 4;
- break;
- default:
- g_assert_not_reached();
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (size == 3) {
- g_assert_not_reached();
- } else {
- /* long ops: 16x16->32 or 32x32->64 */
- TCGv_i64 tcg_res[2];
- int pass;
- bool satop = extract32(opcode, 0, 1);
- MemOp memop = MO_32;
-
- if (satop || !u) {
- memop |= MO_SIGN;
- }
-
- if (size == 2) {
- TCGv_i64 tcg_idx = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_idx, rm, index, memop);
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_passres;
- int passelt;
-
- if (is_scalar) {
- passelt = 0;
- } else {
- passelt = pass + (is_q * 2);
- }
-
- read_vec_element(s, tcg_op, rn, passelt, memop);
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- if (opcode == 0xa || opcode == 0xb) {
- /* Non-accumulating ops */
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
-
- if (satop) {
- /* saturating, doubling */
- gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- }
-
- if (opcode == 0xa || opcode == 0xb) {
- continue;
- }
-
- /* Accumulating op: handle accumulate step */
- read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
-
- switch (opcode) {
- case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- break;
- case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- break;
- case 0x7: /* SQDMLSL, SQDMLSL2 */
- tcg_gen_neg_i64(tcg_passres, tcg_passres);
- /* fall through */
- case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- clear_vec_high(s, !is_scalar, rd);
- } else {
- TCGv_i32 tcg_idx = tcg_temp_new_i32();
-
- assert(size == 1);
- read_vec_element_i32(s, tcg_idx, rm, index, size);
-
- if (!is_scalar) {
- /* The simplest way to handle the 16x16 indexed ops is to
- * duplicate the index into both halves of the 32 bit tcg_idx
- * and then use the usual Neon helpers.
- */
- tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
- }
-
- for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i64 tcg_passres;
-
- if (is_scalar) {
- read_vec_element_i32(s, tcg_op, rn, pass, size);
- } else {
- read_vec_element_i32(s, tcg_op, rn,
- pass + (is_q * 2), MO_32);
- }
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- if (opcode == 0xa || opcode == 0xb) {
- /* Non-accumulating ops */
- tcg_passres = tcg_res[pass];
- } else {
- tcg_passres = tcg_temp_new_i64();
- }
-
- if (memop & MO_SIGN) {
- gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
- } else {
- gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
- }
- if (satop) {
- gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
- tcg_passres, tcg_passres);
- }
-
- if (opcode == 0xa || opcode == 0xb) {
- continue;
- }
-
- /* Accumulating op: handle accumulate step */
- read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
-
- switch (opcode) {
- case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- break;
- case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- break;
- case 0x7: /* SQDMLSL, SQDMLSL2 */
- gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
- /* fall through */
- case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
- tcg_res[pass],
- tcg_passres);
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- if (is_scalar) {
- tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
- }
- }
-
- if (is_scalar) {
- tcg_res[1] = tcg_constant_i64(0);
- }
-
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
- }
-}
-
/* C3.6 Data processing - SIMD, inc Crypto
*
* As the decode gets a little complex we are using a table based
@@ -12213,19 +11828,15 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
*/
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
- { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
- { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
{ 0x0e000000, 0xbf208c00, disas_simd_tb },
{ 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
{ 0x2e000000, 0xbf208400, disas_simd_ext },
- { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
- { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
{ 0x00000000, 0x00000000, NULL }
diff --git a/target/arm/tcg/translate-m-nocp.c b/target/arm/tcg/translate-m-nocp.c
index f564d06..b92773b 100644
--- a/target/arm/tcg/translate-m-nocp.c
+++ b/target/arm/tcg/translate-m-nocp.c
@@ -332,7 +332,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
if (dc_isar_feature(aa32_mve, s)) {
/* QC is only present for MVE; otherwise RES0 */
TCGv_i32 qc = tcg_temp_new_i32();
- tcg_gen_andi_i32(qc, tmp, FPCR_QC);
+ tcg_gen_andi_i32(qc, tmp, FPSR_QC);
/*
* The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
* here writing the same value into all elements is simplest.
@@ -340,11 +340,11 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
16, 16, qc);
}
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
- fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
+ fpscr = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(fpscr, fpscr, ~FPSR_NZCV_MASK);
tcg_gen_or_i32(fpscr, fpscr, tmp);
- store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
+ store_cpu_field_low32(fpscr, vfp.fpsr);
break;
}
case ARM_VFP_FPCXT_NS:
@@ -390,7 +390,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
tcg_gen_deposit_i32(control, control, sfpa,
R_V7M_CONTROL_SFPA_SHIFT, 1);
store_cpu_field(control, v7m.control[M_REG_S]);
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
gen_helper_vfp_set_fpscr(tcg_env, tmp);
s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
break;
@@ -457,7 +457,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
case ARM_VFP_FPSCR_NZCVQC:
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCVQC_MASK);
storefn(s, opaque, tmp, true);
break;
case QEMU_VFP_FPSCR_NZCV:
@@ -465,8 +465,8 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
* Read just NZCV; this is a special case to avoid the
* helper call for the "VMRS to CPSR.NZCV" insn.
*/
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ tmp = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
storefn(s, opaque, tmp, true);
break;
case ARM_VFP_FPCXT_S:
@@ -476,7 +476,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
tmp = tcg_temp_new_i32();
sfpa = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
- tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
@@ -529,7 +529,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
sfpa = tcg_temp_new_i32();
fpscr = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(fpscr, tcg_env);
- tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
+ tcg_gen_andi_i32(tmp, fpscr, ~FPSR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
index 39ec971..cd5b848 100644
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@@ -833,8 +833,8 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
break;
case ARM_VFP_FPSCR:
if (a->rt == 15) {
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ tmp = load_cpu_field_low32(vfp.fpsr);
+ tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
} else {
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, tcg_env);
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index aba21f7..a8672c8 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -351,8 +351,7 @@ static inline TCGv_i32 get_ahp_flag(void)
{
TCGv_i32 ret = tcg_temp_new_i32();
- tcg_gen_ld_i32(ret, tcg_env,
- offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
+ tcg_gen_ld_i32(ret, tcg_env, offsetoflow32(CPUARMState, vfp.fpcr));
tcg_gen_extract_i32(ret, ret, 26, 1);
return ret;
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 50d7042..b3698da 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -85,7 +85,7 @@ static inline int vfp_exceptbits_to_host(int target_bits)
return host_bits;
}
-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
{
uint32_t i;
@@ -99,14 +99,28 @@ static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
return vfp_exceptbits_from_host(i);
}
-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+static void vfp_set_fpsr_to_host(CPUARMState *env, uint32_t val)
{
- int i;
- uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
+ /*
+ * The exception flags are ORed together when we read fpscr so we
+ * only need to preserve the current state in one of our
+ * float_status values.
+ */
+ int i = vfp_exceptbits_to_host(val);
+ set_float_exception_flags(i, &env->vfp.fp_status);
+ set_float_exception_flags(0, &env->vfp.fp_status_f16);
+ set_float_exception_flags(0, &env->vfp.standard_fp_status);
+ set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
+}
+
+static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ uint64_t changed = env->vfp.fpcr;
changed ^= val;
+ changed &= mask;
if (changed & (3 << 22)) {
- i = (val >> 22) & 3;
+ int i = (val >> 22) & 3;
switch (i) {
case FPROUNDING_TIEEVEN:
i = float_round_nearest_even;
@@ -141,52 +155,56 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
}
-
- /*
- * The exception flags are ORed together when we read fpscr so we
- * only need to preserve the current state in one of our
- * float_status values.
- */
- i = vfp_exceptbits_to_host(val);
- set_float_exception_flags(i, &env->vfp.fp_status);
- set_float_exception_flags(0, &env->vfp.fp_status_f16);
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
- set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
}
#else
-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
{
return 0;
}
-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+static void vfp_set_fpsr_to_host(CPUARMState *env, uint32_t val)
+{
+}
+
+static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
{
}
#endif
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+uint32_t vfp_get_fpcr(CPUARMState *env)
{
- uint32_t i, fpscr;
-
- fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
- | (env->vfp.vec_len << 16)
- | (env->vfp.vec_stride << 20);
+ uint32_t fpcr = env->vfp.fpcr
+ | (env->vfp.vec_len << 16)
+ | (env->vfp.vec_stride << 20);
/*
- * M-profile LTPSIZE overlaps A-profile Stride; whichever of the
- * two is not applicable to this CPU will always be zero.
+ * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
+ * of the two is not applicable to this CPU will always be zero.
*/
- fpscr |= env->v7m.ltpsize << 16;
+ fpcr |= env->v7m.ltpsize << 16;
- fpscr |= vfp_get_fpscr_from_host(env);
+ return fpcr;
+}
+
+uint32_t vfp_get_fpsr(CPUARMState *env)
+{
+ uint32_t fpsr = env->vfp.fpsr;
+ uint32_t i;
+
+ fpsr |= vfp_get_fpsr_from_host(env);
i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
- fpscr |= i ? FPCR_QC : 0;
+ fpsr |= i ? FPSR_QC : 0;
+ return fpsr;
+}
- return fpscr;
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+ return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
+ (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
}
uint32_t vfp_get_fpscr(CPUARMState *env)
@@ -194,56 +212,91 @@ uint32_t vfp_get_fpscr(CPUARMState *env)
return HELPER(vfp_get_fpscr)(env);
}
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+void vfp_set_fpsr(CPUARMState *env, uint32_t val)
{
ARMCPU *cpu = env_archcpu(env);
- /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
- if (!cpu_isar_feature(any_fp16, cpu)) {
- val &= ~FPCR_FZ16;
- }
-
- vfp_set_fpscr_to_host(env, val);
-
- if (!arm_feature(env, ARM_FEATURE_M)) {
- /*
- * Short-vector length and stride; on M-profile these bits
- * are used for different purposes.
- * We can't make this conditional be "if MVFR0.FPShVec != 0",
- * because in v7A no-short-vector-support cores still had to
- * allow Stride/Len to be written with the only effect that
- * some insns are required to UNDEF if the guest sets them.
- */
- env->vfp.vec_len = extract32(val, 16, 3);
- env->vfp.vec_stride = extract32(val, 20, 2);
- } else if (cpu_isar_feature(aa32_mve, cpu)) {
- env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
- FPCR_LTPSIZE_LENGTH);
- }
+ vfp_set_fpsr_to_host(env, val);
if (arm_feature(env, ARM_FEATURE_NEON) ||
cpu_isar_feature(aa32_mve, cpu)) {
/*
- * The bit we set within fpscr_q is arbitrary; the register as a
+ * The bit we set within vfp.qc[] is arbitrary; the array as a
* whole being zero/non-zero is what counts.
- * TODO: M-profile MVE also has a QC bit.
*/
- env->vfp.qc[0] = val & FPCR_QC;
+ env->vfp.qc[0] = val & FPSR_QC;
env->vfp.qc[1] = 0;
env->vfp.qc[2] = 0;
env->vfp.qc[3] = 0;
}
/*
+ * The only FPSR bits we keep in vfp.fpsr are NZCV:
+ * the exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
+ * fp_status, and QC is in vfp.qc[]. Store the NZCV bits there,
+ * and zero any of the other FPSR bits.
+ */
+ val &= FPSR_NZCV_MASK;
+ env->vfp.fpsr = val;
+}
+
+static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ /*
+ * We only set FPCR bits defined by mask, and leave the others alone.
+ * We assume the mask is sensible (e.g. doesn't try to set only
+ * part of a field)
+ */
+ ARMCPU *cpu = env_archcpu(env);
+
+ /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
+ if (!cpu_isar_feature(any_fp16, cpu)) {
+ val &= ~FPCR_FZ16;
+ }
+
+ vfp_set_fpcr_to_host(env, val, mask);
+
+ if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * Short-vector length and stride; on M-profile these bits
+ * are used for different purposes.
+ * We can't make this conditional be "if MVFR0.FPShVec != 0",
+ * because in v7A no-short-vector-support cores still had to
+ * allow Stride/Len to be written with the only effect that
+ * some insns are required to UNDEF if the guest sets them.
+ */
+ env->vfp.vec_len = extract32(val, 16, 3);
+ env->vfp.vec_stride = extract32(val, 20, 2);
+ } else if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+ FPCR_LTPSIZE_LENGTH);
+ }
+ }
+
+ /*
* We don't implement trapped exception handling, so the
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
*
- * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
- * fp_status; QC, Len and Stride are stored separately earlier.
- * Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
- * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
+ * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode
+ * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
+ * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
+ * bits.
*/
- env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
+ val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16;
+ env->vfp.fpcr &= ~mask;
+ env->vfp.fpcr |= val;
+}
+
+void vfp_set_fpcr(CPUARMState *env, uint32_t val)
+{
+ vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+ vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
+ vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
}
void vfp_set_fpscr(CPUARMState *env, uint32_t val)
@@ -315,8 +368,7 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
default:
g_assert_not_reached();
}
- env->vfp.xregs[ARM_VFP_FPSCR] =
- deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+ env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
}
/* XXX: check quiet/signaling case */
@@ -1119,8 +1171,7 @@ uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
uint32_t z = (pair >> 32) == 0;
/* Store Z, clear NCV, in FPSCR.NZCV. */
- env->vfp.xregs[ARM_VFP_FPSCR]
- = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
+ env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);
return result;
}
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index f53e119..3132842 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -210,6 +210,7 @@ static const TCGCPUOps avr_tcg_ops = {
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
.restore_state_to_opc = avr_restore_state_to_opc,
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
+ .cpu_exec_halt = avr_cpu_has_work,
.tlb_fill = avr_cpu_tlb_fill,
.do_interrupt = avr_cpu_do_interrupt,
};
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index 535ec39..ff31ca7 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -186,6 +186,7 @@ static const TCGCPUOps crisv10_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = cris_cpu_tlb_fill,
.cpu_exec_interrupt = cris_cpu_exec_interrupt,
+ .cpu_exec_halt = cris_cpu_has_work,
.do_interrupt = crisv10_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
};
@@ -197,6 +198,7 @@ static const TCGCPUOps crisv32_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = cris_cpu_tlb_fill,
.cpu_exec_interrupt = cris_cpu_exec_interrupt,
+ .cpu_exec_halt = cris_cpu_has_work,
.do_interrupt = cris_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
};
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index f050787..7cf2e2f 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -228,6 +228,7 @@ static const TCGCPUOps hppa_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = hppa_cpu_tlb_fill,
.cpu_exec_interrupt = hppa_cpu_exec_interrupt,
+ .cpu_exec_halt = hppa_cpu_has_work,
.do_interrupt = hppa_cpu_do_interrupt,
.do_unaligned_access = hppa_cpu_do_unaligned_access,
.do_transaction_failed = hppa_cpu_do_transaction_failed,
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 270f711..69f9ad7 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -736,6 +736,7 @@ static const TCGCPUOps loongarch_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = loongarch_cpu_tlb_fill,
.cpu_exec_interrupt = loongarch_cpu_exec_interrupt,
+ .cpu_exec_halt = loongarch_cpu_has_work,
.do_interrupt = loongarch_cpu_do_interrupt,
.do_transaction_failed = loongarch_cpu_do_transaction_failed,
#endif
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index efd6bbd..1d49f4c 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -536,6 +536,7 @@ static const TCGCPUOps m68k_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = m68k_cpu_tlb_fill,
.cpu_exec_interrupt = m68k_cpu_exec_interrupt,
+ .cpu_exec_halt = m68k_cpu_has_work,
.do_interrupt = m68k_cpu_do_interrupt,
.do_transaction_failed = m68k_cpu_transaction_failed,
#endif /* !CONFIG_USER_ONLY */
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 41ad47d..135947e 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -413,6 +413,7 @@ static const TCGCPUOps mb_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = mb_cpu_tlb_fill,
.cpu_exec_interrupt = mb_cpu_exec_interrupt,
+ .cpu_exec_halt = mb_cpu_has_work,
.do_interrupt = mb_cpu_do_interrupt,
.do_transaction_failed = mb_cpu_transaction_failed,
.do_unaligned_access = mb_cpu_do_unaligned_access,
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index bbe01d0..89655b1 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -555,6 +555,7 @@ static const TCGCPUOps mips_tcg_ops = {
#if !defined(CONFIG_USER_ONLY)
.tlb_fill = mips_cpu_tlb_fill,
.cpu_exec_interrupt = mips_cpu_exec_interrupt,
+ .cpu_exec_halt = mips_cpu_has_work,
.do_interrupt = mips_cpu_do_interrupt,
.do_transaction_failed = mips_cpu_do_transaction_failed,
.do_unaligned_access = mips_cpu_do_unaligned_access,
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index fdaaa09..6ec54ad 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -233,6 +233,7 @@ static const TCGCPUOps openrisc_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = openrisc_cpu_tlb_fill,
.cpu_exec_interrupt = openrisc_cpu_exec_interrupt,
+ .cpu_exec_halt = openrisc_cpu_has_work,
.do_interrupt = openrisc_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
};
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 01e358a..cdada79 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -1,3 +1,4 @@
+
/*
* PowerPC CPU initialization for qemu.
*
@@ -7481,6 +7482,7 @@ static const TCGCPUOps ppc_tcg_ops = {
#else
.tlb_fill = ppc_cpu_tlb_fill,
.cpu_exec_interrupt = ppc_cpu_exec_interrupt,
+ .cpu_exec_halt = ppc_cpu_has_work,
.do_interrupt = ppc_cpu_do_interrupt,
.cpu_exec_enter = ppc_cpu_exec_enter,
.cpu_exec_exit = ppc_cpu_exec_exit,
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a2640cf..c53b0d5 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -903,7 +903,7 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
return env->pc;
}
-static bool riscv_cpu_has_work(CPUState *cs)
+bool riscv_cpu_has_work(CPUState *cs)
{
#ifndef CONFIG_USER_ONLY
RISCVCPU *cpu = RISCV_CPU(cs);
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 8239ae8..0ac17bc 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -136,4 +136,7 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
}
}
+/* Our implementation of CPUClass::has_work */
+bool riscv_cpu_has_work(CPUState *cs);
+
#endif
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index ae25686..ecf366d 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -21,6 +21,7 @@
#include "exec/exec-all.h"
#include "tcg-cpu.h"
#include "cpu.h"
+#include "internals.h"
#include "pmu.h"
#include "time_helper.h"
#include "qapi/error.h"
@@ -138,6 +139,7 @@ static const TCGCPUOps riscv_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = riscv_cpu_tlb_fill,
.cpu_exec_interrupt = riscv_cpu_exec_interrupt,
+ .cpu_exec_halt = riscv_cpu_has_work,
.do_interrupt = riscv_cpu_do_interrupt,
.do_transaction_failed = riscv_cpu_do_transaction_failed,
.do_unaligned_access = riscv_cpu_do_unaligned_access,
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 8a584f0..36d2a6f 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -192,6 +192,7 @@ static const TCGCPUOps rx_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.cpu_exec_interrupt = rx_cpu_exec_interrupt,
+ .cpu_exec_halt = rx_cpu_has_work,
.do_interrupt = rx_cpu_do_interrupt,
#endif /* !CONFIG_USER_ONLY */
};
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 2bbeaca..0fbfcd3 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -370,6 +370,7 @@ static const TCGCPUOps s390_tcg_ops = {
#else
.tlb_fill = s390_cpu_tlb_fill,
.cpu_exec_interrupt = s390_cpu_exec_interrupt,
+ .cpu_exec_halt = s390_cpu_has_work,
.do_interrupt = s390_cpu_do_interrupt,
.debug_excp_handler = s390x_cpu_debug_excp_handler,
.do_unaligned_access = s390x_cpu_do_unaligned_access,
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 618aa71..8f07261 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -254,6 +254,7 @@ static const TCGCPUOps superh_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = superh_cpu_tlb_fill,
.cpu_exec_interrupt = superh_cpu_exec_interrupt,
+ .cpu_exec_halt = superh_cpu_has_work,
.do_interrupt = superh_cpu_do_interrupt,
.do_unaligned_access = superh_cpu_do_unaligned_access,
.io_recompile_replay_branch = superh_io_recompile_replay_branch,
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 9bacfb6..54cb269 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -926,6 +926,7 @@ static const TCGCPUOps sparc_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = sparc_cpu_tlb_fill,
.cpu_exec_interrupt = sparc_cpu_exec_interrupt,
+ .cpu_exec_halt = sparc_cpu_has_work,
.do_interrupt = sparc_cpu_do_interrupt,
.do_transaction_failed = sparc_cpu_do_transaction_failed,
.do_unaligned_access = sparc_cpu_do_unaligned_access,
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index bdefb84..4d9c036 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -169,6 +169,7 @@ static const TCGCPUOps tricore_tcg_ops = {
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
.restore_state_to_opc = tricore_restore_state_to_opc,
.tlb_fill = tricore_cpu_tlb_fill,
+ .cpu_exec_halt = tricore_cpu_has_work,
};
static void tricore_cpu_class_init(ObjectClass *c, void *data)
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index de907cf..a08c7a0 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -234,6 +234,7 @@ static const TCGCPUOps xtensa_tcg_ops = {
#ifndef CONFIG_USER_ONLY
.tlb_fill = xtensa_cpu_tlb_fill,
.cpu_exec_interrupt = xtensa_cpu_exec_interrupt,
+ .cpu_exec_halt = xtensa_cpu_has_work,
.do_interrupt = xtensa_cpu_do_interrupt,
.do_transaction_failed = xtensa_cpu_do_transaction_failed,
.do_unaligned_access = xtensa_cpu_do_unaligned_access,