aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-08-28 15:14:40 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-08-28 15:14:40 +0100
commitea1bb830cb021cca2e361091cf728aaabc8c0654 (patch)
tree3694098593f669dbd6b351688c4dce6e87335843
parent3e39dac0354c39b4b647940e42360c6b1f3edc02 (diff)
parented78849d9711805bda37ee026018d6ee7a606d0e (diff)
downloadqemu-ea1bb830cb021cca2e361091cf728aaabc8c0654.zip
qemu-ea1bb830cb021cca2e361091cf728aaabc8c0654.tar.gz
qemu-ea1bb830cb021cca2e361091cf728aaabc8c0654.tar.bz2
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200828' into staging
target-arm queue: * target/arm: Cleanup and refactoring preparatory to SVE2 * armsse: Define ARMSSEClass correctly * hw/misc/unimp: Improve information provided in log messages * hw/qdev-clock: Avoid calling qdev_connect_clock_in after DeviceRealize * hw/arm/xilinx_zynq: Call qdev_connect_clock_in() before DeviceRealize * hw/net/allwinner-sun8i-emac: Use AddressSpace for DMA transfers * hw/sd/allwinner-sdhost: Use AddressSpace for DMA transfers * target/arm: Fill in the WnR syndrome bit in mte_check_fail * target/arm: Clarify HCR_EL2 ARMCPRegInfo type * hw/arm/musicpal: Use AddressSpace for DMA transfers * hw/clock: Minor cleanups * hw/arm/sbsa-ref: fix typo breaking PCIe IRQs # gpg: Signature made Fri 28 Aug 2020 10:23:02 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200828: (35 commits) target/arm: Convert sq{, r}dmulh to gvec for aa64 advsimd target/arm: Convert integer multiply-add (indexed) to gvec for aa64 advsimd target/arm: Convert integer multiply (indexed) to gvec for aa64 advsimd target/arm: Generalize inl_qrdmlah_* helper functions target/arm: Tidy SVE tszimm shift formats target/arm: Split out gen_gvec_ool_zz target/arm: Split out gen_gvec_ool_zzz target/arm: Split out gen_gvec_ool_zzp target/arm: Merge helper_sve_clr_* and helper_sve_movz_* target/arm: Split out gen_gvec_ool_zzzp target/arm: Use tcg_gen_gvec_bitsel for trans_SEL_pppp target/arm: Clean up 4-operand predicate expansion target/arm: Merge do_vector2_p into do_mov_p target/arm: Rearrange {sve,fp}_check_access assert target/arm: Split out gen_gvec_fn_zzz, do_zzz_fn target/arm: Split out gen_gvec_fn_zz qemu/int128: Add int128_lshift armsse: Define ARMSSEClass correctly hw/misc/unimp: Display the offset with width of the region size hw/misc/unimp: Display the value with width of the access size ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/allwinner-a10.c2
-rw-r--r--hw/arm/allwinner-h3.c4
-rw-r--r--hw/arm/armsse.c1
-rw-r--r--hw/arm/musicpal.c45
-rw-r--r--hw/arm/sbsa-ref.c2
-rw-r--r--hw/arm/xilinx_zynq.c24
-rw-r--r--hw/core/clock.c7
-rw-r--r--hw/core/qdev-clock.c6
-rw-r--r--hw/misc/unimp.c14
-rw-r--r--hw/net/allwinner-sun8i-emac.c46
-rw-r--r--hw/sd/allwinner-sdhost.c37
-rw-r--r--include/hw/arm/armsse.h2
-rw-r--r--include/hw/char/cadence_uart.h17
-rw-r--r--include/hw/clock.h30
-rw-r--r--include/hw/misc/unimp.h1
-rw-r--r--include/hw/net/allwinner-sun8i-emac.h6
-rw-r--r--include/hw/qdev-clock.h8
-rw-r--r--include/hw/sd/allwinner-sdhost.h6
-rw-r--r--include/qemu/int128.h16
-rw-r--r--target/arm/helper-sve.h5
-rw-r--r--target/arm/helper.c1
-rw-r--r--target/arm/helper.h28
-rw-r--r--target/arm/mte_helper.c19
-rw-r--r--target/arm/sve.decode35
-rw-r--r--target/arm/sve_helper.c70
-rw-r--r--target/arm/translate-a64.c110
-rw-r--r--target/arm/translate-sve.c393
-rw-r--r--target/arm/translate.h1
-rw-r--r--target/arm/vec_helper.c182
29 files changed, 626 insertions, 492 deletions
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index e258463..d404f31 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -155,6 +155,8 @@ static void aw_a10_realize(DeviceState *dev, Error **errp)
}
/* SD/MMC */
+ object_property_set_link(OBJECT(&s->mmc0), "dma-memory",
+ OBJECT(get_system_memory()), &error_fatal);
sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, AW_A10_MMC0_BASE);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0, qdev_get_gpio_in(dev, 32));
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
index 341abe6..88259a9 100644
--- a/hw/arm/allwinner-h3.c
+++ b/hw/arm/allwinner-h3.c
@@ -349,6 +349,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp)
sysbus_mmio_map(SYS_BUS_DEVICE(&s->sid), 0, s->memmap[AW_H3_DEV_SID]);
/* SD/MMC */
+ object_property_set_link(OBJECT(&s->mmc0), "dma-memory",
+ OBJECT(get_system_memory()), &error_fatal);
sysbus_realize(SYS_BUS_DEVICE(&s->mmc0), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc0), 0, s->memmap[AW_H3_DEV_MMC0]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc0), 0,
@@ -363,6 +365,8 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp)
qemu_check_nic_model(&nd_table[0], TYPE_AW_SUN8I_EMAC);
qdev_set_nic_properties(DEVICE(&s->emac), &nd_table[0]);
}
+ object_property_set_link(OBJECT(&s->emac), "dma-memory",
+ OBJECT(get_system_memory()), &error_fatal);
sysbus_realize(SYS_BUS_DEVICE(&s->emac), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->emac), 0, s->memmap[AW_H3_DEV_EMAC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->emac), 0,
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
index 6264eab..a93da37 100644
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@@ -1160,6 +1160,7 @@ static const TypeInfo armsse_info = {
.name = TYPE_ARM_SSE,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ARMSSE),
+ .class_size = sizeof(ARMSSEClass),
.instance_init = armsse_init,
.abstract = true,
.interfaces = (InterfaceInfo[]) {
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index c3b9780..f2f4fc0 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -30,6 +30,7 @@
#include "hw/audio/wm8750.h"
#include "sysemu/block-backend.h"
#include "sysemu/runstate.h"
+#include "sysemu/dma.h"
#include "exec/address-spaces.h"
#include "ui/pixel_ops.h"
#include "qemu/cutils.h"
@@ -163,6 +164,8 @@ typedef struct mv88w8618_eth_state {
MemoryRegion iomem;
qemu_irq irq;
+ MemoryRegion *dma_mr;
+ AddressSpace dma_as;
uint32_t smir;
uint32_t icr;
uint32_t imr;
@@ -176,19 +179,21 @@ typedef struct mv88w8618_eth_state {
NICConf conf;
} mv88w8618_eth_state;
-static void eth_rx_desc_put(uint32_t addr, mv88w8618_rx_desc *desc)
+static void eth_rx_desc_put(AddressSpace *dma_as, uint32_t addr,
+ mv88w8618_rx_desc *desc)
{
cpu_to_le32s(&desc->cmdstat);
cpu_to_le16s(&desc->bytes);
cpu_to_le16s(&desc->buffer_size);
cpu_to_le32s(&desc->buffer);
cpu_to_le32s(&desc->next);
- cpu_physical_memory_write(addr, desc, sizeof(*desc));
+ dma_memory_write(dma_as, addr, desc, sizeof(*desc));
}
-static void eth_rx_desc_get(uint32_t addr, mv88w8618_rx_desc *desc)
+static void eth_rx_desc_get(AddressSpace *dma_as, uint32_t addr,
+ mv88w8618_rx_desc *desc)
{
- cpu_physical_memory_read(addr, desc, sizeof(*desc));
+ dma_memory_read(dma_as, addr, desc, sizeof(*desc));
le32_to_cpus(&desc->cmdstat);
le16_to_cpus(&desc->bytes);
le16_to_cpus(&desc->buffer_size);
@@ -209,9 +214,9 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size)
continue;
}
do {
- eth_rx_desc_get(desc_addr, &desc);
+ eth_rx_desc_get(&s->dma_as, desc_addr, &desc);
if ((desc.cmdstat & MP_ETH_RX_OWN) && desc.buffer_size >= size) {
- cpu_physical_memory_write(desc.buffer + s->vlan_header,
+ dma_memory_write(&s->dma_as, desc.buffer + s->vlan_header,
buf, size);
desc.bytes = size + s->vlan_header;
desc.cmdstat &= ~MP_ETH_RX_OWN;
@@ -221,7 +226,7 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size)
if (s->icr & s->imr) {
qemu_irq_raise(s->irq);
}
- eth_rx_desc_put(desc_addr, &desc);
+ eth_rx_desc_put(&s->dma_as, desc_addr, &desc);
return size;
}
desc_addr = desc.next;
@@ -230,19 +235,21 @@ static ssize_t eth_receive(NetClientState *nc, const uint8_t *buf, size_t size)
return size;
}
-static void eth_tx_desc_put(uint32_t addr, mv88w8618_tx_desc *desc)
+static void eth_tx_desc_put(AddressSpace *dma_as, uint32_t addr,
+ mv88w8618_tx_desc *desc)
{
cpu_to_le32s(&desc->cmdstat);
cpu_to_le16s(&desc->res);
cpu_to_le16s(&desc->bytes);
cpu_to_le32s(&desc->buffer);
cpu_to_le32s(&desc->next);
- cpu_physical_memory_write(addr, desc, sizeof(*desc));
+ dma_memory_write(dma_as, addr, desc, sizeof(*desc));
}
-static void eth_tx_desc_get(uint32_t addr, mv88w8618_tx_desc *desc)
+static void eth_tx_desc_get(AddressSpace *dma_as, uint32_t addr,
+ mv88w8618_tx_desc *desc)
{
- cpu_physical_memory_read(addr, desc, sizeof(*desc));
+ dma_memory_read(dma_as, addr, desc, sizeof(*desc));
le32_to_cpus(&desc->cmdstat);
le16_to_cpus(&desc->res);
le16_to_cpus(&desc->bytes);
@@ -259,17 +266,17 @@ static void eth_send(mv88w8618_eth_state *s, int queue_index)
int len;
do {
- eth_tx_desc_get(desc_addr, &desc);
+ eth_tx_desc_get(&s->dma_as, desc_addr, &desc);
next_desc = desc.next;
if (desc.cmdstat & MP_ETH_TX_OWN) {
len = desc.bytes;
if (len < 2048) {
- cpu_physical_memory_read(desc.buffer, buf, len);
+ dma_memory_read(&s->dma_as, desc.buffer, buf, len);
qemu_send_packet(qemu_get_queue(s->nic), buf, len);
}
desc.cmdstat &= ~MP_ETH_TX_OWN;
s->icr |= 1 << (MP_ETH_IRQ_TXLO_BIT - queue_index);
- eth_tx_desc_put(desc_addr, &desc);
+ eth_tx_desc_put(&s->dma_as, desc_addr, &desc);
}
desc_addr = next_desc;
} while (desc_addr != s->tx_queue[queue_index]);
@@ -405,6 +412,12 @@ static void mv88w8618_eth_realize(DeviceState *dev, Error **errp)
{
mv88w8618_eth_state *s = MV88W8618_ETH(dev);
+ if (!s->dma_mr) {
+ error_setg(errp, TYPE_MV88W8618_ETH " 'dma-memory' link not set");
+ return;
+ }
+
+ address_space_init(&s->dma_as, s->dma_mr, "emac-dma");
s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf,
object_get_typename(OBJECT(dev)), dev->id, s);
}
@@ -428,6 +441,8 @@ static const VMStateDescription mv88w8618_eth_vmsd = {
static Property mv88w8618_eth_properties[] = {
DEFINE_NIC_PROPERTIES(mv88w8618_eth_state, conf),
+ DEFINE_PROP_LINK("dma-memory", mv88w8618_eth_state, dma_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1653,6 +1668,8 @@ static void musicpal_init(MachineState *machine)
qemu_check_nic_model(&nd_table[0], "mv88w8618");
dev = qdev_new(TYPE_MV88W8618_ETH);
qdev_set_nic_properties(dev, &nd_table[0]);
+ object_property_set_link(OBJECT(dev), "dma-memory",
+ OBJECT(get_system_memory()), &error_fatal);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, MP_ETH_BASE);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[MP_ETH_IRQ]);
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index f030a41..2a7d9a6 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -554,7 +554,7 @@ static void create_pcie(SBSAMachineState *sms)
for (i = 0; i < GPEX_NUM_IRQS; i++) {
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
- qdev_get_gpio_in(sms->gic, irq + 1));
+ qdev_get_gpio_in(sms->gic, irq + i));
gpex_set_irq_num(GPEX_HOST(dev), i, irq + i);
}
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 32aa732..969ef07 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -222,18 +222,18 @@ static void zynq_init(MachineState *machine)
1, 0x0066, 0x0022, 0x0000, 0x0000, 0x0555, 0x2aa,
0);
- /* Create slcr, keep a pointer to connect clocks */
- slcr = qdev_new("xilinx,zynq_slcr");
- sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal);
- sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000);
-
/* Create the main clock source, and feed slcr with it */
zynq_machine->ps_clk = CLOCK(object_new(TYPE_CLOCK));
object_property_add_child(OBJECT(zynq_machine), "ps_clk",
OBJECT(zynq_machine->ps_clk));
object_unref(OBJECT(zynq_machine->ps_clk));
clock_set_hz(zynq_machine->ps_clk, PS_CLK_FREQUENCY);
+
+ /* Create slcr, keep a pointer to connect clocks */
+ slcr = qdev_new("xilinx,zynq_slcr");
qdev_connect_clock_in(slcr, "ps_clk", zynq_machine->ps_clk);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(slcr), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(slcr), 0, 0xF8000000);
dev = qdev_new(TYPE_A9MPCORE_PRIV);
qdev_prop_set_uint32(dev, "num-cpu", 1);
@@ -254,12 +254,22 @@ static void zynq_init(MachineState *machine)
sysbus_create_simple(TYPE_CHIPIDEA, 0xE0002000, pic[53 - IRQ_OFFSET]);
sysbus_create_simple(TYPE_CHIPIDEA, 0xE0003000, pic[76 - IRQ_OFFSET]);
- dev = cadence_uart_create(0xE0000000, pic[59 - IRQ_OFFSET], serial_hd(0));
+ dev = qdev_new(TYPE_CADENCE_UART);
+ busdev = SYS_BUS_DEVICE(dev);
+ qdev_prop_set_chr(dev, "chardev", serial_hd(0));
qdev_connect_clock_in(dev, "refclk",
qdev_get_clock_out(slcr, "uart0_ref_clk"));
- dev = cadence_uart_create(0xE0001000, pic[82 - IRQ_OFFSET], serial_hd(1));
+ sysbus_realize_and_unref(busdev, &error_fatal);
+ sysbus_mmio_map(busdev, 0, 0xE0000000);
+ sysbus_connect_irq(busdev, 0, pic[59 - IRQ_OFFSET]);
+ dev = qdev_new(TYPE_CADENCE_UART);
+ busdev = SYS_BUS_DEVICE(dev);
+ qdev_prop_set_chr(dev, "chardev", serial_hd(1));
qdev_connect_clock_in(dev, "refclk",
qdev_get_clock_out(slcr, "uart1_ref_clk"));
+ sysbus_realize_and_unref(busdev, &error_fatal);
+ sysbus_mmio_map(busdev, 0, 0xE0001000);
+ sysbus_connect_irq(busdev, 0, pic[82 - IRQ_OFFSET]);
sysbus_create_varargs("cadence_ttc", 0xF8001000,
pic[42-IRQ_OFFSET], pic[43-IRQ_OFFSET], pic[44-IRQ_OFFSET], NULL);
diff --git a/hw/core/clock.c b/hw/core/clock.c
index 3c0daf7..7066282 100644
--- a/hw/core/clock.c
+++ b/hw/core/clock.c
@@ -34,11 +34,16 @@ void clock_clear_callback(Clock *clk)
clock_set_callback(clk, NULL, NULL);
}
-void clock_set(Clock *clk, uint64_t period)
+bool clock_set(Clock *clk, uint64_t period)
{
+ if (clk->period == period) {
+ return false;
+ }
trace_clock_set(CLOCK_PATH(clk), CLOCK_PERIOD_TO_NS(clk->period),
CLOCK_PERIOD_TO_NS(period));
clk->period = period;
+
+ return true;
}
static void clock_propagate_period(Clock *clk, bool call_callbacks)
diff --git a/hw/core/qdev-clock.c b/hw/core/qdev-clock.c
index 5cc1e82..47ecb5b 100644
--- a/hw/core/qdev-clock.c
+++ b/hw/core/qdev-clock.c
@@ -183,3 +183,9 @@ Clock *qdev_alias_clock(DeviceState *dev, const char *name,
return ncl->clock;
}
+
+void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source)
+{
+ assert(!dev->realized);
+ clock_set_source(qdev_get_clock_in(dev, name), source);
+}
diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c
index bc4084d..6cfc572 100644
--- a/hw/misc/unimp.c
+++ b/hw/misc/unimp.c
@@ -22,9 +22,9 @@ static uint64_t unimp_read(void *opaque, hwaddr offset, unsigned size)
{
UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque);
- qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read "
- "(size %d, offset 0x%" HWADDR_PRIx ")\n",
- s->name, size, offset);
+ qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read "
+ "(size %d, offset 0x%0*" HWADDR_PRIx ")\n",
+ s->name, size, s->offset_fmt_width, offset);
return 0;
}
@@ -34,9 +34,9 @@ static void unimp_write(void *opaque, hwaddr offset,
UnimplementedDeviceState *s = UNIMPLEMENTED_DEVICE(opaque);
qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write "
- "(size %d, value 0x%" PRIx64
- ", offset 0x%" HWADDR_PRIx ")\n",
- s->name, size, value, offset);
+ "(size %d, offset 0x%0*" HWADDR_PRIx
+ ", value 0x%0*" PRIx64 ")\n",
+ s->name, size, s->offset_fmt_width, offset, size << 1, value);
}
static const MemoryRegionOps unimp_ops = {
@@ -63,6 +63,8 @@ static void unimp_realize(DeviceState *dev, Error **errp)
return;
}
+ s->offset_fmt_width = DIV_ROUND_UP(64 - clz64(s->size - 1), 4);
+
memory_region_init_io(&s->iomem, OBJECT(s), &unimp_ops, s,
s->name, s->size);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
index 28637ff..38d3285 100644
--- a/hw/net/allwinner-sun8i-emac.c
+++ b/hw/net/allwinner-sun8i-emac.c
@@ -19,6 +19,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qapi/error.h"
#include "hw/sysbus.h"
#include "migration/vmstate.h"
#include "net/net.h"
@@ -29,6 +30,7 @@
#include "net/checksum.h"
#include "qemu/module.h"
#include "exec/cpu-common.h"
+#include "sysemu/dma.h"
#include "hw/net/allwinner-sun8i-emac.h"
/* EMAC register offsets */
@@ -337,12 +339,13 @@ static void allwinner_sun8i_emac_update_irq(AwSun8iEmacState *s)
qemu_set_irq(s->irq, (s->int_sta & s->int_en) != 0);
}
-static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc,
+static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s,
+ FrameDescriptor *desc,
size_t min_size)
{
uint32_t paddr = desc->next;
- cpu_physical_memory_read(paddr, desc, sizeof(*desc));
+ dma_memory_read(&s->dma_as, paddr, desc, sizeof(*desc));
if ((desc->status & DESC_STATUS_CTL) &&
(desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) {
@@ -352,7 +355,8 @@ static uint32_t allwinner_sun8i_emac_next_desc(FrameDescriptor *desc,
}
}
-static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc,
+static uint32_t allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s,
+ FrameDescriptor *desc,
uint32_t start_addr,
size_t min_size)
{
@@ -360,7 +364,7 @@ static uint32_t allwinner_sun8i_emac_get_desc(FrameDescriptor *desc,
/* Note that the list is a cycle. Last entry points back to the head. */
while (desc_addr != 0) {
- cpu_physical_memory_read(desc_addr, desc, sizeof(*desc));
+ dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc));
if ((desc->status & DESC_STATUS_CTL) &&
(desc->status2 & DESC_STATUS2_BUF_SIZE_MASK) >= min_size) {
@@ -379,20 +383,21 @@ static uint32_t allwinner_sun8i_emac_rx_desc(AwSun8iEmacState *s,
FrameDescriptor *desc,
size_t min_size)
{
- return allwinner_sun8i_emac_get_desc(desc, s->rx_desc_curr, min_size);
+ return allwinner_sun8i_emac_get_desc(s, desc, s->rx_desc_curr, min_size);
}
static uint32_t allwinner_sun8i_emac_tx_desc(AwSun8iEmacState *s,
FrameDescriptor *desc,
size_t min_size)
{
- return allwinner_sun8i_emac_get_desc(desc, s->tx_desc_head, min_size);
+ return allwinner_sun8i_emac_get_desc(s, desc, s->tx_desc_head, min_size);
}
-static void allwinner_sun8i_emac_flush_desc(FrameDescriptor *desc,
+static void allwinner_sun8i_emac_flush_desc(AwSun8iEmacState *s,
+ FrameDescriptor *desc,
uint32_t phys_addr)
{
- cpu_physical_memory_write(phys_addr, desc, sizeof(*desc));
+ dma_memory_write(&s->dma_as, phys_addr, desc, sizeof(*desc));
}
static bool allwinner_sun8i_emac_can_receive(NetClientState *nc)
@@ -450,8 +455,8 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc,
<< RX_DESC_STATUS_FRM_LEN_SHIFT;
}
- cpu_physical_memory_write(desc.addr, buf, desc_bytes);
- allwinner_sun8i_emac_flush_desc(&desc, s->rx_desc_curr);
+ dma_memory_write(&s->dma_as, desc.addr, buf, desc_bytes);
+ allwinner_sun8i_emac_flush_desc(s, &desc, s->rx_desc_curr);
trace_allwinner_sun8i_emac_receive(s->rx_desc_curr, desc.addr,
desc_bytes);
@@ -465,7 +470,7 @@ static ssize_t allwinner_sun8i_emac_receive(NetClientState *nc,
bytes_left -= desc_bytes;
/* Move to the next descriptor */
- s->rx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 64);
+ s->rx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 64);
if (!s->rx_desc_curr) {
/* Not enough buffer space available */
s->int_sta |= INT_STA_RX_BUF_UA;
@@ -501,10 +506,10 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s)
desc.status |= TX_DESC_STATUS_LENGTH_ERR;
break;
}
- cpu_physical_memory_read(desc.addr, packet_buf + packet_bytes, bytes);
+ dma_memory_read(&s->dma_as, desc.addr, packet_buf + packet_bytes, bytes);
packet_bytes += bytes;
desc.status &= ~DESC_STATUS_CTL;
- allwinner_sun8i_emac_flush_desc(&desc, s->tx_desc_curr);
+ allwinner_sun8i_emac_flush_desc(s, &desc, s->tx_desc_curr);
/* After the last descriptor, send the packet */
if (desc.status2 & TX_DESC_STATUS2_LAST_DESC) {
@@ -519,7 +524,7 @@ static void allwinner_sun8i_emac_transmit(AwSun8iEmacState *s)
packet_bytes = 0;
transmitted++;
}
- s->tx_desc_curr = allwinner_sun8i_emac_next_desc(&desc, 0);
+ s->tx_desc_curr = allwinner_sun8i_emac_next_desc(s, &desc, 0);
}
/* Raise transmit completed interrupt */
@@ -623,7 +628,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset,
break;
case REG_TX_CUR_BUF: /* Transmit Current Buffer */
if (s->tx_desc_curr != 0) {
- cpu_physical_memory_read(s->tx_desc_curr, &desc, sizeof(desc));
+ dma_memory_read(&s->dma_as, s->tx_desc_curr, &desc, sizeof(desc));
value = desc.addr;
} else {
value = 0;
@@ -636,7 +641,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset,
break;
case REG_RX_CUR_BUF: /* Receive Current Buffer */
if (s->rx_desc_curr != 0) {
- cpu_physical_memory_read(s->rx_desc_curr, &desc, sizeof(desc));
+ dma_memory_read(&s->dma_as, s->rx_desc_curr, &desc, sizeof(desc));
value = desc.addr;
} else {
value = 0;
@@ -790,6 +795,13 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp)
{
AwSun8iEmacState *s = AW_SUN8I_EMAC(dev);
+ if (!s->dma_mr) {
+ error_setg(errp, TYPE_AW_SUN8I_EMAC " 'dma-memory' link not set");
+ return;
+ }
+
+ address_space_init(&s->dma_as, s->dma_mr, "emac-dma");
+
qemu_macaddr_default_if_unset(&s->conf.macaddr);
s->nic = qemu_new_nic(&net_allwinner_sun8i_emac_info, &s->conf,
object_get_typename(OBJECT(dev)), dev->id, s);
@@ -799,6 +811,8 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp)
static Property allwinner_sun8i_emac_properties[] = {
DEFINE_NIC_PROPERTIES(AwSun8iEmacState, conf),
DEFINE_PROP_UINT8("phy-addr", AwSun8iEmacState, mii_phy_addr, 0),
+ DEFINE_PROP_LINK("dma-memory", AwSun8iEmacState, dma_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index f9eb92c..e82afb7 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -21,7 +21,10 @@
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/units.h"
+#include "qapi/error.h"
#include "sysemu/blockdev.h"
+#include "sysemu/dma.h"
+#include "hw/qdev-properties.h"
#include "hw/irq.h"
#include "hw/sd/allwinner-sdhost.h"
#include "migration/vmstate.h"
@@ -306,7 +309,7 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s,
uint8_t buf[1024];
/* Read descriptor */
- cpu_physical_memory_read(desc_addr, desc, sizeof(*desc));
+ dma_memory_read(&s->dma_as, desc_addr, desc, sizeof(*desc));
if (desc->size == 0) {
desc->size = klass->max_desc_size;
} else if (desc->size > klass->max_desc_size) {
@@ -331,22 +334,24 @@ static uint32_t allwinner_sdhost_process_desc(AwSdHostState *s,
/* Write to SD bus */
if (is_write) {
- cpu_physical_memory_read((desc->addr & DESC_SIZE_MASK) + num_done,
- buf, buf_bytes);
+ dma_memory_read(&s->dma_as,
+ (desc->addr & DESC_SIZE_MASK) + num_done,
+ buf, buf_bytes);
sdbus_write_data(&s->sdbus, buf, buf_bytes);
/* Read from SD bus */
} else {
sdbus_read_data(&s->sdbus, buf, buf_bytes);
- cpu_physical_memory_write((desc->addr & DESC_SIZE_MASK) + num_done,
- buf, buf_bytes);
+ dma_memory_write(&s->dma_as,
+ (desc->addr & DESC_SIZE_MASK) + num_done,
+ buf, buf_bytes);
}
num_done += buf_bytes;
}
/* Clear hold flag and flush descriptor */
desc->status &= ~DESC_STATUS_HOLD;
- cpu_physical_memory_write(desc_addr, desc, sizeof(*desc));
+ dma_memory_write(&s->dma_as, desc_addr, desc, sizeof(*desc));
return num_done;
}
@@ -721,6 +726,12 @@ static const VMStateDescription vmstate_allwinner_sdhost = {
}
};
+static Property allwinner_sdhost_properties[] = {
+ DEFINE_PROP_LINK("dma-memory", AwSdHostState, dma_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void allwinner_sdhost_init(Object *obj)
{
AwSdHostState *s = AW_SDHOST(obj);
@@ -734,6 +745,18 @@ static void allwinner_sdhost_init(Object *obj)
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq);
}
+static void allwinner_sdhost_realize(DeviceState *dev, Error **errp)
+{
+ AwSdHostState *s = AW_SDHOST(dev);
+
+ if (!s->dma_mr) {
+ error_setg(errp, TYPE_AW_SDHOST " 'dma-memory' link not set");
+ return;
+ }
+
+ address_space_init(&s->dma_as, s->dma_mr, "sdhost-dma");
+}
+
static void allwinner_sdhost_reset(DeviceState *dev)
{
AwSdHostState *s = AW_SDHOST(dev);
@@ -792,6 +815,8 @@ static void allwinner_sdhost_class_init(ObjectClass *klass, void *data)
dc->reset = allwinner_sdhost_reset;
dc->vmsd = &vmstate_allwinner_sdhost;
+ dc->realize = allwinner_sdhost_realize;
+ device_class_set_props(dc, allwinner_sdhost_properties);
}
static void allwinner_sdhost_sun4i_class_init(ObjectClass *klass, void *data)
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
index 5298162..347b977 100644
--- a/include/hw/arm/armsse.h
+++ b/include/hw/arm/armsse.h
@@ -220,7 +220,7 @@ typedef struct ARMSSE {
typedef struct ARMSSEInfo ARMSSEInfo;
typedef struct ARMSSEClass {
- DeviceClass parent_class;
+ SysBusDeviceClass parent_class;
const ARMSSEInfo *info;
} ARMSSEClass;
diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h
index ed7b58d..dabc49e 100644
--- a/include/hw/char/cadence_uart.h
+++ b/include/hw/char/cadence_uart.h
@@ -53,21 +53,4 @@ typedef struct {
Clock *refclk;
} CadenceUARTState;
-static inline DeviceState *cadence_uart_create(hwaddr addr,
- qemu_irq irq,
- Chardev *chr)
-{
- DeviceState *dev;
- SysBusDevice *s;
-
- dev = qdev_new(TYPE_CADENCE_UART);
- s = SYS_BUS_DEVICE(dev);
- qdev_prop_set_chr(dev, "chardev", chr);
- sysbus_realize_and_unref(s, &error_fatal);
- sysbus_mmio_map(s, 0, addr);
- sysbus_connect_irq(s, 0, irq);
-
- return dev;
-}
-
#endif
diff --git a/include/hw/clock.h b/include/hw/clock.h
index f822a94..9ecd78b 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -127,17 +127,19 @@ void clock_set_source(Clock *clk, Clock *src);
* @value: the clock's value, 0 means unclocked
*
* Set the local cached period value of @clk to @value.
+ *
+ * @return: true if the clock is changed.
*/
-void clock_set(Clock *clk, uint64_t value);
+bool clock_set(Clock *clk, uint64_t value);
-static inline void clock_set_hz(Clock *clk, unsigned hz)
+static inline bool clock_set_hz(Clock *clk, unsigned hz)
{
- clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz));
+ return clock_set(clk, CLOCK_PERIOD_FROM_HZ(hz));
}
-static inline void clock_set_ns(Clock *clk, unsigned ns)
+static inline bool clock_set_ns(Clock *clk, unsigned ns)
{
- clock_set(clk, CLOCK_PERIOD_FROM_NS(ns));
+ return clock_set(clk, CLOCK_PERIOD_FROM_NS(ns));
}
/**
@@ -163,8 +165,9 @@ void clock_propagate(Clock *clk);
*/
static inline void clock_update(Clock *clk, uint64_t value)
{
- clock_set(clk, value);
- clock_propagate(clk);
+ if (clock_set(clk, value)) {
+ clock_propagate(clk);
+ }
}
static inline void clock_update_hz(Clock *clk, unsigned hz)
@@ -209,17 +212,4 @@ static inline bool clock_is_enabled(const Clock *clk)
return clock_get(clk) != 0;
}
-static inline void clock_init(Clock *clk, uint64_t value)
-{
- clock_set(clk, value);
-}
-static inline void clock_init_hz(Clock *clk, uint64_t value)
-{
- clock_set_hz(clk, value);
-}
-static inline void clock_init_ns(Clock *clk, uint64_t value)
-{
- clock_set_ns(clk, value);
-}
-
#endif /* QEMU_HW_CLOCK_H */
diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h
index 4c1d13c..c63968a 100644
--- a/include/hw/misc/unimp.h
+++ b/include/hw/misc/unimp.h
@@ -20,6 +20,7 @@
typedef struct {
SysBusDevice parent_obj;
MemoryRegion iomem;
+ unsigned offset_fmt_width;
char *name;
uint64_t size;
} UnimplementedDeviceState;
diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h
index eda034e..dd1d7b9 100644
--- a/include/hw/net/allwinner-sun8i-emac.h
+++ b/include/hw/net/allwinner-sun8i-emac.h
@@ -49,6 +49,12 @@ typedef struct AwSun8iEmacState {
/** Interrupt output signal to notify CPU */
qemu_irq irq;
+ /** Memory region where DMA transfers are done */
+ MemoryRegion *dma_mr;
+
+ /** Address space used internally for DMA transfers */
+ AddressSpace dma_as;
+
/** Generic Network Interface Controller (NIC) for networking API */
NICState *nic;
diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h
index a340f65..64ca4d2 100644
--- a/include/hw/qdev-clock.h
+++ b/include/hw/qdev-clock.h
@@ -70,12 +70,10 @@ Clock *qdev_get_clock_out(DeviceState *dev, const char *name);
*
* Set the source clock of input clock @name of device @dev to @source.
* @source period update will be propagated to @name clock.
+ *
+ * Must be called before @dev is realized.
*/
-static inline void qdev_connect_clock_in(DeviceState *dev, const char *name,
- Clock *source)
-{
- clock_set_source(qdev_get_clock_in(dev, name), source);
-}
+void qdev_connect_clock_in(DeviceState *dev, const char *name, Clock *source);
/**
* qdev_alias_clock:
diff --git a/include/hw/sd/allwinner-sdhost.h b/include/hw/sd/allwinner-sdhost.h
index d94606a..839732e 100644
--- a/include/hw/sd/allwinner-sdhost.h
+++ b/include/hw/sd/allwinner-sdhost.h
@@ -71,6 +71,12 @@ typedef struct AwSdHostState {
/** Interrupt output signal to notify CPU */
qemu_irq irq;
+ /** Memory region where DMA transfers are done */
+ MemoryRegion *dma_mr;
+
+ /** Address space used internally for DMA transfers */
+ AddressSpace dma_as;
+
/** Number of bytes left in current DMA transfer */
uint32_t transfer_cnt;
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 5c9890d..76ea405 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -63,6 +63,11 @@ static inline Int128 int128_rshift(Int128 a, int n)
return a >> n;
}
+static inline Int128 int128_lshift(Int128 a, int n)
+{
+ return a << n;
+}
+
static inline Int128 int128_add(Int128 a, Int128 b)
{
return a + b;
@@ -217,6 +222,17 @@ static inline Int128 int128_rshift(Int128 a, int n)
}
}
+static inline Int128 int128_lshift(Int128 a, int n)
+{
+ uint64_t l = a.lo << (n & 63);
+ if (n >= 64) {
+ return int128_make128(0, l);
+ } else if (n > 0) {
+ return int128_make128(l, (a.hi << n) | (a.lo >> (64 - n)));
+ }
+ return a;
+}
+
static inline Int128 int128_add(Int128 a, Int128 b)
{
uint64_t lo = a.lo + b.lo;
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 63c4a08..4411c47 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6b4f0eb..44d6666 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5105,7 +5105,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
.access = PL2_RW,
.readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
{ .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
- .type = ARM_CP_NO_RAW,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW,
.type = ARM_CP_CONST, .resetvalue = 0 },
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 759639a..3ca73a1 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -758,6 +758,34 @@ DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 1047520..891306f 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -514,11 +514,12 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
}
/* Record a tag check failure. */
-static void mte_check_fail(CPUARMState *env, int mmu_idx,
+static void mte_check_fail(CPUARMState *env, uint32_t desc,
uint64_t dirty_ptr, uintptr_t ra)
{
+ int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
- int el, reg_el, tcf, select;
+ int el, reg_el, tcf, select, is_write, syn;
uint64_t sctlr;
reg_el = regime_el(env, arm_mmu_idx);
@@ -546,9 +547,10 @@ static void mte_check_fail(CPUARMState *env, int mmu_idx,
*/
cpu_restore_state(env_cpu(env), ra, true);
env->exception.vaddress = dirty_ptr;
- raise_exception(env, EXCP_DATA_ABORT,
- syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11),
- exception_target_el(env));
+
+ is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+ syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+ raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
/* noreturn, but fall through to the assert anyway */
case 0:
@@ -639,8 +641,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc,
}
if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
- int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
- mte_check_fail(env, mmu_idx, ptr, ra);
+ mte_check_fail(env, desc, ptr, ra);
}
return useronly_clean_ptr(ptr);
@@ -810,7 +811,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
fail_ofs = tag_first + n * TAG_GRANULE - ptr;
fail_ofs = ROUND_UP(fail_ofs, esize);
- mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+ mte_check_fail(env, desc, ptr + fail_ofs, ra);
}
done:
@@ -922,7 +923,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
fail:
/* Locate the first nibble that differs. */
i = ctz64(mem_tag ^ ptr_tag) >> 4;
- mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra);
+ mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra);
done:
return useronly_clean_ptr(ptr);
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 4f580a2..6425396 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -150,13 +150,17 @@
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
# Two register operand, one immediate operand, with predicate,
-# element size encoded as TSZHL. User must fill in imm.
-@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \
- &rpri_esz rn=%reg_movprfx esz=%tszimm_esz
+# element size encoded as TSZHL.
+@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl
+@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr
# Similarly without predicate.
-@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \
- &rri_esz esz=%tszimm16_esz
+@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl
+@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr
# Two register operand, one immediate operand, with 4-bit predicate.
# User must fill in imm.
@@ -289,14 +293,10 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
### SVE Shift by Immediate - Predicated Group
# SVE bitwise shift by immediate (predicated)
-ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shl
-ASRD 00000100 .. 000 100 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
+ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl
+ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
# SVE bitwise shift by vector (predicated)
ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
@@ -400,12 +400,9 @@ RDVL 00000100 101 11111 01010 imm:s6 rd:5
### SVE Bitwise Shift - Unpredicated Group
# SVE bitwise shift by immediate (unpredicated)
-ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shl
+ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr
+LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr
+LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl
# SVE bitwise shift by wide elements (unpredicated)
# Note esz != 3
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 382fa82..4758d46 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -956,85 +956,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
return flags;
}
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result. In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
*/
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- if (pg[H1(i)] & 1) {
- d[i] = 0;
- }
- }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements. */
void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
}
}
void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+ d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
}
}
@@ -1043,8 +1001,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn;
uint8_t *pg = vg;
+ uint8_t inv = simd_data(desc);
+
for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+ d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
}
}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0fc5e12..4ba6918 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -678,6 +678,20 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
tcg_temp_free_ptr(fpst);
}
+/* Expand a 3-operand + qc + operation using an out-of-line helper. */
+static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), qc_ptr,
+ is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@@ -1156,18 +1170,18 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
* unallocated-encoding checks (otherwise the syndrome information
* for the resulting exception will be incorrect).
*/
-static inline bool fp_access_check(DisasContext *s)
+static bool fp_access_check(DisasContext *s)
{
- assert(!s->fp_access_checked);
- s->fp_access_checked = true;
+ if (s->fp_excp_el) {
+ assert(!s->fp_access_checked);
+ s->fp_access_checked = true;
- if (!s->fp_excp_el) {
- return true;
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ return false;
}
-
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return false;
+ s->fp_access_checked = true;
+ return true;
}
/* Check that SVE access is enabled. If it is, return true.
@@ -1176,10 +1190,14 @@ static inline bool fp_access_check(DisasContext *s)
bool sve_access_check(DisasContext *s)
{
if (s->sve_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
- s->sve_excp_el);
+ assert(!s->sve_access_checked);
+ s->sve_access_checked = true;
+
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_sve_access_trap(), s->sve_excp_el);
return false;
}
+ s->sve_access_checked = true;
return fp_access_check(s);
}
@@ -11730,6 +11748,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
}
return;
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static gen_helper_gvec_3_ptr * const fns[2][2] = {
+ { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
+ { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
+ };
+ gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
+ }
+ return;
case 0x11:
if (!u) { /* CMTST */
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
@@ -11841,16 +11868,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0x16: /* SQDMULH, SQRDMULH */
- {
- static NeonGenTwoOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
- { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
- };
- assert(size == 1 || size == 2);
- genenvfn = fns[size - 1][u];
- break;
- }
default:
g_assert_not_reached();
}
@@ -13484,6 +13501,56 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
data, gen_helper_gvec_fmlal_idx_a64);
}
return;
+
+ case 0x08: /* MUL */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_3 * const fns[3] = {
+ gen_helper_gvec_mul_idx_h,
+ gen_helper_gvec_mul_idx_s,
+ gen_helper_gvec_mul_idx_d,
+ };
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x10: /* MLA */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mla_idx_h,
+ gen_helper_gvec_mla_idx_s,
+ gen_helper_gvec_mla_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x14: /* MLS */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mls_idx_h,
+ gen_helper_gvec_mls_idx_s,
+ gen_helper_gvec_mls_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
}
if (size == 3) {
@@ -14529,6 +14596,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
s->base.pc_next += 4;
s->fp_access_checked = false;
+ s->sve_access_checked = false;
if (dc_isar_feature(aa64_bti, s)) {
if (s->base.num_insns == 1) {
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d97cb37..15ad6c7 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -142,35 +142,76 @@ static int pred_gvec_reg_size(DisasContext *s)
return size_for_gvec(pred_full_reg_size(s));
}
+/* Invoke an out-of-line helper on 2 Zregs. */
+static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
+ int rd, int rn, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs. */
+static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int rm, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
+static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
+static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int pg, int data)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+}
+
/* Invoke a vector expander on two Zregs. */
-static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
+static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
+ int esz, int rd, int rn)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), vsz, vsz);
}
/* Invoke a vector expander on three Zregs. */
-static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), vsz, vsz);
- }
- return true;
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vsz, vsz);
}
/* Invoke a vector move on two Zregs. */
static bool do_mov_z(DisasContext *s, int rd, int rn)
{
- return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
+ }
+ return true;
}
/* Initialize a Zreg with replications of a 64-bit immediate. */
@@ -180,52 +221,27 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
}
-/* Invoke a vector expander on two Pregs. */
-static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn), psz, psz);
- }
- return true;
-}
-
/* Invoke a vector expander on three Pregs. */
-static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
+static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int rd, int rn, int rm)
{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm), psz, psz);
- }
- return true;
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(MO_64, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm), psz, psz);
}
-/* Invoke a vector operation on four Pregs. */
-static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
- int rd, int rn, int rm, int rg)
+/* Invoke a vector move on two Pregs. */
+static bool do_mov_p(DisasContext *s, int rd, int rn)
{
if (sve_access_check(s)) {
unsigned psz = pred_gvec_reg_size(s);
- tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm),
- pred_full_reg_offset(s, rg),
- psz, psz, gvec_op);
+ tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn), psz, psz);
}
return true;
}
-/* Invoke a vector move on two Pregs. */
-static bool do_mov_p(DisasContext *s, int rd, int rn)
-{
- return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
-}
-
/* Set the cpu flags as per a return from an SVE helper. */
static void do_pred_flags(TCGv_i32 t)
{
@@ -273,24 +289,32 @@ const uint64_t pred_esz_masks[4] = {
*** SVE Logical - Unpredicated Group
*/
+static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
+{
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
+ }
+ return true;
+}
+
static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_and);
}
static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_or);
}
static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_xor);
}
static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_andc);
}
/*
@@ -299,32 +323,32 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_add);
}
static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sub);
}
static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
}
static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
}
static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
}
static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
{
- return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
+ return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
}
/*
@@ -333,16 +357,11 @@ static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
{
- unsigned vsz = vec_full_reg_size(s);
if (fn == NULL) {
return false;
}
if (sve_access_check(s)) {
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -356,12 +375,7 @@ static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+ gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
}
#define DO_ZPZZ(NAME, name) \
@@ -433,11 +447,7 @@ static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
}
return true;
}
@@ -608,48 +618,29 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
*** SVE Shift by Immediate - Predicated Group
*/
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
*/
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_clr_b, gen_helper_sve_clr_h,
- gen_helper_sve_clr_s, gen_helper_sve_clr_d,
- };
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
- }
- return true;
-}
-
-/* Copy Zn into Zd, storing zeros into inactive elements. */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+ int esz, bool invert)
{
static gen_helper_gvec_3 * const fns[4] = {
gen_helper_sve_movz_b, gen_helper_sve_movz_h,
gen_helper_sve_movz_s, gen_helper_sve_movz_d,
};
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
+
+ if (sve_access_check(s)) {
+ gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
+ }
+ return true;
}
static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
}
return true;
}
@@ -682,7 +673,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -700,7 +691,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -718,7 +709,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. For arithmetic
right shift for division, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else {
return do_zpzi_ool(s, a, fns[a->esz]);
}
@@ -799,11 +790,7 @@ static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -977,11 +964,7 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->imm, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
}
return true;
}
@@ -1022,10 +1005,7 @@ static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -1042,11 +1022,7 @@ static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
return false;
}
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -1068,6 +1044,11 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
int mofs = pred_full_reg_offset(s, a->rm);
int gofs = pred_full_reg_offset(s, a->pg);
+ if (!a->s) {
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ return true;
+ }
+
if (psz == 8) {
/* Do the operation and the flags generation in temps. */
TCGv_i64 pd = tcg_temp_new_i64();
@@ -1127,19 +1108,24 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_and_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->rn == a->rm) {
- if (a->pg == a->rn) {
- return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
+
+ if (!a->s) {
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ if (a->rn == a->rm) {
+ if (a->pg == a->rn) {
+ do_mov_p(s, a->rd, a->rn);
+ } else {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
+ }
+ return true;
+ } else if (a->pg == a->rn || a->pg == a->rm) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
+ return true;
}
- } else if (a->pg == a->rn || a->pg == a->rm) {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1163,13 +1149,14 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_bic_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn) {
- return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+
+ if (!a->s && a->pg == a->rn) {
+ if (sve_access_check(s)) {
+ gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
+ }
+ return true;
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1193,41 +1180,22 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_eor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pn, pn, pg);
- tcg_gen_andc_i64(pm, pm, pg);
- tcg_gen_or_i64(pd, pn, pm);
-}
-
-static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pn, pn, pg);
- tcg_gen_andc_vec(vece, pm, pm, pg);
- tcg_gen_or_vec(vece, pd, pn, pm);
+ return do_pppp_flags(s, a, &op);
}
static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
{
- static const GVecGen4 op = {
- .fni8 = gen_sel_pg_i64,
- .fniv = gen_sel_pg_vec,
- .fno = gen_helper_sve_sel_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
if (a->s) {
return false;
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ pred_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->rm), psz, psz);
+ }
+ return true;
}
static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1251,13 +1219,11 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orr_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn && a->rn == a->rm) {
+
+ if (!a->s && a->pg == a->rn && a->rn == a->rm) {
return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
}
+ return do_pppp_flags(s, a, &op);
}
static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1281,11 +1247,7 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_orn_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1309,11 +1271,7 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nor_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
@@ -1337,11 +1295,7 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
.fno = gen_helper_sve_nand_pppp,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
+ return do_pppp_flags(s, a, &op);
}
/*
@@ -2103,10 +2057,7 @@ static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
}
return true;
}
@@ -2119,11 +2070,7 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
+ gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -2296,11 +2243,7 @@ static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
gen_helper_gvec_3 *fn)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, data, fn);
+ gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
}
return true;
}
@@ -2745,12 +2688,8 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
{
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->esz, gen_helper_sve_splice);
+ gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
+ a->rd, a->rn, a->rm, a->pg, 0);
}
return true;
}
@@ -3429,11 +3368,7 @@ static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
}
return true;
}
@@ -3446,11 +3381,7 @@ static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
};
if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->index, fns[a->u][a->sz]);
+ gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
}
return true;
}
@@ -5093,8 +5024,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Zero the inactive elements. */
gen_set_label(over);
- do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
- return true;
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
}
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -5877,8 +5807,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
{
- if (sve_access_check(s)) {
- do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
- }
- return true;
+ return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6d6d4c0..423b0e0 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -64,6 +64,7 @@ typedef struct DisasContext {
* that it is set at the point where we actually touch the FP regs.
*/
bool fp_access_checked;
+ bool sve_access_checked;
/* ARMv8 single-step state (this is distinct from the QEMU gdbstub
* single-step support).
*/
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7d76412..a6c53d2 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -37,19 +37,24 @@
#endif
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
+static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+ bool neg, bool round, uint32_t *sat)
{
- /* Simplify:
+ /*
+ * Simplify:
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
* = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
*/
int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) + ret + (1 << 14);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int32_t)src3 << 15) + (round << 14);
ret >>= 15;
+
if (ret != (int16_t)ret) {
*sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
+ ret = (ret < 0 ? INT16_MIN : INT16_MAX);
}
return ret;
}
@@ -58,8 +63,9 @@ uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ false, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -73,35 +79,18 @@ void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
- int16_t src3, uint32_t *sat)
-{
- /* Similarly, using subtraction:
- * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
- * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
- */
- int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) - ret + (1 << 14);
- ret >>= 15;
- if (ret != (int16_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? -0x8000 : 0x7fff);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
- uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ true, true, sat);
return deposit32(e1, 16, 16, e2);
}
@@ -115,19 +104,47 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
+static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+ bool neg, bool round, uint32_t *sat)
{
/* Simplify similarly to int_qrdmlah_s16 above. */
int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) + ret + (1 << 30);
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int64_t)src3 << 31) + (round << 30);
ret >>= 31;
+
if (ret != (int32_t)ret) {
*sat = 1;
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
@@ -139,7 +156,7 @@ uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlah_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, false, true, sat);
}
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
@@ -152,31 +169,16 @@ void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
-/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
- int32_t src3, uint32_t *sat)
-{
- /* Simplify similarly to int_qrdmlsh_s16 above. */
- int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) - ret + (1 << 30);
- ret >>= 31;
- if (ret != (int32_t)ret) {
- *sat = 1;
- ret = (ret < 0 ? INT32_MIN : INT32_MAX);
- }
- return ret;
-}
-
uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
int32_t src2, int32_t src3)
{
uint32_t *sat = &env->vfp.qc[0];
- return inl_qrdmlsh_s32(src1, src2, src3, sat);
+ return do_sqrdmlah_s(src1, src2, src3, true, true, sat);
}
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
@@ -189,7 +191,31 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -733,6 +759,52 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
*/
#define DO_MUL_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
+DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
+
+#undef DO_MUL_IDX
+
+#define DO_MLA_IDX(NAME, TYPE, OP, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = a[i + j] OP n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
+DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, )
+
+DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
+DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
+
+#undef DO_MLA_IDX
+
+#define DO_FMUL_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
@@ -747,11 +819,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
-DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
-DO_MUL_IDX(gvec_fmul_idx_d, float64, )
+DO_FMUL_IDX(gvec_fmul_idx_h, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, float64, )
-#undef DO_MUL_IDX
+#undef DO_FMUL_IDX
#define DO_FMLA_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \