aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-05-18 18:25:29 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-05-18 18:25:29 +0100
commit5bcf917ee37a5efbef99f091a96db54a5276becb (patch)
tree46b15d3b22e7121f4db061104c335ba2de6533be
parentd32e41a1188e929cc0fb16829ce3736046951e39 (diff)
parentb94f8f60bd841c5b737185cd38263e26822f77ab (diff)
downloadqemu-5bcf917ee37a5efbef99f091a96db54a5276becb.zip
qemu-5bcf917ee37a5efbef99f091a96db54a5276becb.tar.gz
qemu-5bcf917ee37a5efbef99f091a96db54a5276becb.tar.bz2
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180518' into staging
target-arm queue: * Initial part of SVE implementation (currently disabled) * smmuv3: fix some minor Coverity issues * add model of Xilinx ZynqMP generic DMA controller * expose (most) Arm coprocessor/system registers to gdb via QEMU's gdbstub, for reads only # gpg: Signature made Fri 18 May 2018 18:18:27 BST # gpg: using RSA key 3C2525ED14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" # gpg: aka "Peter Maydell <pmaydell@gmail.com>" # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20180518: (32 commits) target/arm: Implement SVE Permute - Extract Group target/arm: Implement SVE Integer Wide Immediate - Predicated Group target/arm: Implement SVE Bitwise Immediate Group target/arm: Implement SVE Element Count Group target/arm: Implement SVE floating-point trig select coefficient target/arm: Implement SVE floating-point exponential accelerator target/arm: Implement SVE Compute Vector Address Group target/arm: Implement SVE Bitwise Shift - Unpredicated Group target/arm: Implement SVE Stack Allocation Group target/arm: Implement SVE Index Generation Group target/arm: Implement SVE Integer Arithmetic - Unpredicated Group target/arm: Implement SVE Integer Multiply-Add Group target/arm: Implement SVE Integer Arithmetic - Unary Predicated Group target/arm: Implement SVE bitwise shift by wide elements (predicated) target/arm: Implement SVE bitwise shift by vector (predicated) target/arm: Implement SVE bitwise shift by immediate (predicated) target/arm: Implement SVE Integer Reduction Group target/arm: Implement SVE Integer Binary Arithmetic - Predicated Group target/arm: Implement SVE Predicate Misc Group target/arm: Implement SVE Predicate Logical Operations Group ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--.gitignore1
-rw-r--r--gdbstub.c10
-rw-r--r--hw/arm/smmu-common.c4
-rw-r--r--hw/arm/smmuv3.c2
-rw-r--r--hw/arm/xlnx-zynqmp.c53
-rw-r--r--hw/dma/Makefile.objs1
-rw-r--r--hw/dma/xlnx-zdma.c832
-rw-r--r--include/hw/arm/xlnx-zynqmp.h5
-rw-r--r--include/hw/dma/xlnx-zdma.h84
-rw-r--r--include/qom/cpu.h5
-rw-r--r--target/arm/Makefile.objs10
-rw-r--r--target/arm/cpu.c1
-rw-r--r--target/arm/cpu.h37
-rw-r--r--target/arm/gdbstub.c76
-rw-r--r--target/arm/helper-sve.h427
-rw-r--r--target/arm/helper.c57
-rw-r--r--target/arm/helper.h1
-rw-r--r--target/arm/sve.decode419
-rw-r--r--target/arm/sve_helper.c1562
-rw-r--r--target/arm/translate-a64.c119
-rw-r--r--target/arm/translate-a64.h118
-rw-r--r--target/arm/translate-sve.c2070
22 files changed, 5778 insertions, 116 deletions
diff --git a/.gitignore b/.gitignore
index 4055e12..81e1f2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -206,3 +206,4 @@ trace-dtrace-root.h
trace-dtrace-root.dtrace
trace-ust-all.h
trace-ust-all.c
+/target/arm/decode-sve.inc.c
diff --git a/gdbstub.c b/gdbstub.c
index 3c38073..9682e16 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -675,6 +675,16 @@ static const char *get_feature_xml(const char *p, const char **newp,
}
return target_xml;
}
+ if (cc->gdb_get_dynamic_xml) {
+ CPUState *cpu = first_cpu;
+ char *xmlname = g_strndup(p, len);
+ const char *xml = cc->gdb_get_dynamic_xml(cpu, xmlname);
+
+ g_free(xmlname);
+ if (xml) {
+ return xml;
+ }
+ }
for (i = 0; ; i++) {
name = xml_builtin[i][0];
if (!name || (strncmp(name, p, len) == 0 && strlen(name) == len))
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 01c7be8..3c5f724 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -83,9 +83,9 @@ static inline hwaddr get_table_pte_address(uint64_t pte, int granule_sz)
static inline hwaddr get_block_pte_address(uint64_t pte, int level,
int granule_sz, uint64_t *bsz)
{
- int n = (granule_sz - 3) * (4 - level) + 3;
+ int n = level_shift(level, granule_sz);
- *bsz = 1 << n;
+ *bsz = 1ULL << n;
return PTE_ADDRESS(pte, n);
}
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index b3026de..42dc521 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -143,7 +143,7 @@ static MemTxResult smmuv3_write_eventq(SMMUv3State *s, Evt *evt)
void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
{
- Evt evt;
+ Evt evt = {};
MemTxResult r;
if (!smmuv3_eventq_enabled(s)) {
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 505253e..2045b9d 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -90,6 +90,24 @@ static const int spi_intr[XLNX_ZYNQMP_NUM_SPIS] = {
19, 20,
};
+static const uint64_t gdma_ch_addr[XLNX_ZYNQMP_NUM_GDMA_CH] = {
+ 0xFD500000, 0xFD510000, 0xFD520000, 0xFD530000,
+ 0xFD540000, 0xFD550000, 0xFD560000, 0xFD570000
+};
+
+static const int gdma_ch_intr[XLNX_ZYNQMP_NUM_GDMA_CH] = {
+ 124, 125, 126, 127, 128, 129, 130, 131
+};
+
+static const uint64_t adma_ch_addr[XLNX_ZYNQMP_NUM_ADMA_CH] = {
+ 0xFFA80000, 0xFFA90000, 0xFFAA0000, 0xFFAB0000,
+ 0xFFAC0000, 0xFFAD0000, 0xFFAE0000, 0xFFAF0000
+};
+
+static const int adma_ch_intr[XLNX_ZYNQMP_NUM_ADMA_CH] = {
+ 77, 78, 79, 80, 81, 82, 83, 84
+};
+
typedef struct XlnxZynqMPGICRegion {
int region_index;
uint32_t address;
@@ -197,6 +215,16 @@ static void xlnx_zynqmp_init(Object *obj)
object_initialize(&s->rtc, sizeof(s->rtc), TYPE_XLNX_ZYNQMP_RTC);
qdev_set_parent_bus(DEVICE(&s->rtc), sysbus_get_default());
+
+ for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
+ object_initialize(&s->gdma[i], sizeof(s->gdma[i]), TYPE_XLNX_ZDMA);
+ qdev_set_parent_bus(DEVICE(&s->gdma[i]), sysbus_get_default());
+ }
+
+ for (i = 0; i < XLNX_ZYNQMP_NUM_ADMA_CH; i++) {
+ object_initialize(&s->adma[i], sizeof(s->adma[i]), TYPE_XLNX_ZDMA);
+ qdev_set_parent_bus(DEVICE(&s->adma[i]), sysbus_get_default());
+ }
}
static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -492,6 +520,31 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
}
sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
+
+ for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
+ object_property_set_uint(OBJECT(&s->gdma[i]), 128, "bus-width", &err);
+ object_property_set_bool(OBJECT(&s->gdma[i]), true, "realized", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->gdma[i]), 0, gdma_ch_addr[i]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->gdma[i]), 0,
+ gic_spi[gdma_ch_intr[i]]);
+ }
+
+ for (i = 0; i < XLNX_ZYNQMP_NUM_ADMA_CH; i++) {
+ object_property_set_bool(OBJECT(&s->adma[i]), true, "realized", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->adma[i]), 0, adma_ch_addr[i]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->adma[i]), 0,
+ gic_spi[adma_ch_intr[i]]);
+ }
}
static Property xlnx_zynqmp_props[] = {
diff --git a/hw/dma/Makefile.objs b/hw/dma/Makefile.objs
index c2afecb..79affec 100644
--- a/hw/dma/Makefile.objs
+++ b/hw/dma/Makefile.objs
@@ -10,6 +10,7 @@ common-obj-$(CONFIG_ETRAXFS) += etraxfs_dma.o
common-obj-$(CONFIG_STP2000) += sparc32_dma.o
obj-$(CONFIG_XLNX_ZYNQMP) += xlnx_dpdma.o
obj-$(CONFIG_XLNX_ZYNQMP_ARM) += xlnx_dpdma.o
+common-obj-$(CONFIG_XLNX_ZYNQMP_ARM) += xlnx-zdma.o
obj-$(CONFIG_OMAP) += omap_dma.o soc_dma.o
obj-$(CONFIG_PXA2XX) += pxa2xx_dma.o
diff --git a/hw/dma/xlnx-zdma.c b/hw/dma/xlnx-zdma.c
new file mode 100644
index 0000000..14d86c2
--- /dev/null
+++ b/hw/dma/xlnx-zdma.c
@@ -0,0 +1,832 @@
+/*
+ * QEMU model of the ZynqMP generic DMA
+ *
+ * Copyright (c) 2014 Xilinx Inc.
+ * Copyright (c) 2018 FEIMTECH AB
+ *
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>,
+ * Francisco Iglesias <francisco.iglesias@feimtech.se>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/dma/xlnx-zdma.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+
+#ifndef XLNX_ZDMA_ERR_DEBUG
+#define XLNX_ZDMA_ERR_DEBUG 0
+#endif
+
+REG32(ZDMA_ERR_CTRL, 0x0)
+ FIELD(ZDMA_ERR_CTRL, APB_ERR_RES, 0, 1)
+REG32(ZDMA_CH_ISR, 0x100)
+ FIELD(ZDMA_CH_ISR, DMA_PAUSE, 11, 1)
+ FIELD(ZDMA_CH_ISR, DMA_DONE, 10, 1)
+ FIELD(ZDMA_CH_ISR, AXI_WR_DATA, 9, 1)
+ FIELD(ZDMA_CH_ISR, AXI_RD_DATA, 8, 1)
+ FIELD(ZDMA_CH_ISR, AXI_RD_DST_DSCR, 7, 1)
+ FIELD(ZDMA_CH_ISR, AXI_RD_SRC_DSCR, 6, 1)
+ FIELD(ZDMA_CH_ISR, IRQ_DST_ACCT_ERR, 5, 1)
+ FIELD(ZDMA_CH_ISR, IRQ_SRC_ACCT_ERR, 4, 1)
+ FIELD(ZDMA_CH_ISR, BYTE_CNT_OVRFL, 3, 1)
+ FIELD(ZDMA_CH_ISR, DST_DSCR_DONE, 2, 1)
+ FIELD(ZDMA_CH_ISR, SRC_DSCR_DONE, 1, 1)
+ FIELD(ZDMA_CH_ISR, INV_APB, 0, 1)
+REG32(ZDMA_CH_IMR, 0x104)
+ FIELD(ZDMA_CH_IMR, DMA_PAUSE, 11, 1)
+ FIELD(ZDMA_CH_IMR, DMA_DONE, 10, 1)
+ FIELD(ZDMA_CH_IMR, AXI_WR_DATA, 9, 1)
+ FIELD(ZDMA_CH_IMR, AXI_RD_DATA, 8, 1)
+ FIELD(ZDMA_CH_IMR, AXI_RD_DST_DSCR, 7, 1)
+ FIELD(ZDMA_CH_IMR, AXI_RD_SRC_DSCR, 6, 1)
+ FIELD(ZDMA_CH_IMR, IRQ_DST_ACCT_ERR, 5, 1)
+ FIELD(ZDMA_CH_IMR, IRQ_SRC_ACCT_ERR, 4, 1)
+ FIELD(ZDMA_CH_IMR, BYTE_CNT_OVRFL, 3, 1)
+ FIELD(ZDMA_CH_IMR, DST_DSCR_DONE, 2, 1)
+ FIELD(ZDMA_CH_IMR, SRC_DSCR_DONE, 1, 1)
+ FIELD(ZDMA_CH_IMR, INV_APB, 0, 1)
+REG32(ZDMA_CH_IEN, 0x108)
+ FIELD(ZDMA_CH_IEN, DMA_PAUSE, 11, 1)
+ FIELD(ZDMA_CH_IEN, DMA_DONE, 10, 1)
+ FIELD(ZDMA_CH_IEN, AXI_WR_DATA, 9, 1)
+ FIELD(ZDMA_CH_IEN, AXI_RD_DATA, 8, 1)
+ FIELD(ZDMA_CH_IEN, AXI_RD_DST_DSCR, 7, 1)
+ FIELD(ZDMA_CH_IEN, AXI_RD_SRC_DSCR, 6, 1)
+ FIELD(ZDMA_CH_IEN, IRQ_DST_ACCT_ERR, 5, 1)
+ FIELD(ZDMA_CH_IEN, IRQ_SRC_ACCT_ERR, 4, 1)
+ FIELD(ZDMA_CH_IEN, BYTE_CNT_OVRFL, 3, 1)
+ FIELD(ZDMA_CH_IEN, DST_DSCR_DONE, 2, 1)
+ FIELD(ZDMA_CH_IEN, SRC_DSCR_DONE, 1, 1)
+ FIELD(ZDMA_CH_IEN, INV_APB, 0, 1)
+REG32(ZDMA_CH_IDS, 0x10c)
+ FIELD(ZDMA_CH_IDS, DMA_PAUSE, 11, 1)
+ FIELD(ZDMA_CH_IDS, DMA_DONE, 10, 1)
+ FIELD(ZDMA_CH_IDS, AXI_WR_DATA, 9, 1)
+ FIELD(ZDMA_CH_IDS, AXI_RD_DATA, 8, 1)
+ FIELD(ZDMA_CH_IDS, AXI_RD_DST_DSCR, 7, 1)
+ FIELD(ZDMA_CH_IDS, AXI_RD_SRC_DSCR, 6, 1)
+ FIELD(ZDMA_CH_IDS, IRQ_DST_ACCT_ERR, 5, 1)
+ FIELD(ZDMA_CH_IDS, IRQ_SRC_ACCT_ERR, 4, 1)
+ FIELD(ZDMA_CH_IDS, BYTE_CNT_OVRFL, 3, 1)
+ FIELD(ZDMA_CH_IDS, DST_DSCR_DONE, 2, 1)
+ FIELD(ZDMA_CH_IDS, SRC_DSCR_DONE, 1, 1)
+ FIELD(ZDMA_CH_IDS, INV_APB, 0, 1)
+REG32(ZDMA_CH_CTRL0, 0x110)
+ FIELD(ZDMA_CH_CTRL0, OVR_FETCH, 7, 1)
+ FIELD(ZDMA_CH_CTRL0, POINT_TYPE, 6, 1)
+ FIELD(ZDMA_CH_CTRL0, MODE, 4, 2)
+ FIELD(ZDMA_CH_CTRL0, RATE_CTRL, 3, 1)
+ FIELD(ZDMA_CH_CTRL0, CONT_ADDR, 2, 1)
+ FIELD(ZDMA_CH_CTRL0, CONT, 1, 1)
+REG32(ZDMA_CH_CTRL1, 0x114)
+ FIELD(ZDMA_CH_CTRL1, DST_ISSUE, 5, 5)
+ FIELD(ZDMA_CH_CTRL1, SRC_ISSUE, 0, 5)
+REG32(ZDMA_CH_FCI, 0x118)
+ FIELD(ZDMA_CH_FCI, PROG_CELL_CNT, 2, 2)
+ FIELD(ZDMA_CH_FCI, SIDE, 1, 1)
+ FIELD(ZDMA_CH_FCI, EN, 0, 1)
+REG32(ZDMA_CH_STATUS, 0x11c)
+ FIELD(ZDMA_CH_STATUS, STATE, 0, 2)
+REG32(ZDMA_CH_DATA_ATTR, 0x120)
+ FIELD(ZDMA_CH_DATA_ATTR, ARBURST, 26, 2)
+ FIELD(ZDMA_CH_DATA_ATTR, ARCACHE, 22, 4)
+ FIELD(ZDMA_CH_DATA_ATTR, ARQOS, 18, 4)
+ FIELD(ZDMA_CH_DATA_ATTR, ARLEN, 14, 4)
+ FIELD(ZDMA_CH_DATA_ATTR, AWBURST, 12, 2)
+ FIELD(ZDMA_CH_DATA_ATTR, AWCACHE, 8, 4)
+ FIELD(ZDMA_CH_DATA_ATTR, AWQOS, 4, 4)
+ FIELD(ZDMA_CH_DATA_ATTR, AWLEN, 0, 4)
+REG32(ZDMA_CH_DSCR_ATTR, 0x124)
+ FIELD(ZDMA_CH_DSCR_ATTR, AXCOHRNT, 8, 1)
+ FIELD(ZDMA_CH_DSCR_ATTR, AXCACHE, 4, 4)
+ FIELD(ZDMA_CH_DSCR_ATTR, AXQOS, 0, 4)
+REG32(ZDMA_CH_SRC_DSCR_WORD0, 0x128)
+REG32(ZDMA_CH_SRC_DSCR_WORD1, 0x12c)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD1, MSB, 0, 17)
+REG32(ZDMA_CH_SRC_DSCR_WORD2, 0x130)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD2, SIZE, 0, 30)
+REG32(ZDMA_CH_SRC_DSCR_WORD3, 0x134)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD3, CMD, 3, 2)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD3, INTR, 2, 1)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD3, TYPE, 1, 1)
+ FIELD(ZDMA_CH_SRC_DSCR_WORD3, COHRNT, 0, 1)
+REG32(ZDMA_CH_DST_DSCR_WORD0, 0x138)
+REG32(ZDMA_CH_DST_DSCR_WORD1, 0x13c)
+ FIELD(ZDMA_CH_DST_DSCR_WORD1, MSB, 0, 17)
+REG32(ZDMA_CH_DST_DSCR_WORD2, 0x140)
+ FIELD(ZDMA_CH_DST_DSCR_WORD2, SIZE, 0, 30)
+REG32(ZDMA_CH_DST_DSCR_WORD3, 0x144)
+ FIELD(ZDMA_CH_DST_DSCR_WORD3, INTR, 2, 1)
+ FIELD(ZDMA_CH_DST_DSCR_WORD3, TYPE, 1, 1)
+ FIELD(ZDMA_CH_DST_DSCR_WORD3, COHRNT, 0, 1)
+REG32(ZDMA_CH_WR_ONLY_WORD0, 0x148)
+REG32(ZDMA_CH_WR_ONLY_WORD1, 0x14c)
+REG32(ZDMA_CH_WR_ONLY_WORD2, 0x150)
+REG32(ZDMA_CH_WR_ONLY_WORD3, 0x154)
+REG32(ZDMA_CH_SRC_START_LSB, 0x158)
+REG32(ZDMA_CH_SRC_START_MSB, 0x15c)
+ FIELD(ZDMA_CH_SRC_START_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_DST_START_LSB, 0x160)
+REG32(ZDMA_CH_DST_START_MSB, 0x164)
+ FIELD(ZDMA_CH_DST_START_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_RATE_CTRL, 0x18c)
+ FIELD(ZDMA_CH_RATE_CTRL, CNT, 0, 12)
+REG32(ZDMA_CH_SRC_CUR_PYLD_LSB, 0x168)
+REG32(ZDMA_CH_SRC_CUR_PYLD_MSB, 0x16c)
+ FIELD(ZDMA_CH_SRC_CUR_PYLD_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_DST_CUR_PYLD_LSB, 0x170)
+REG32(ZDMA_CH_DST_CUR_PYLD_MSB, 0x174)
+ FIELD(ZDMA_CH_DST_CUR_PYLD_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_SRC_CUR_DSCR_LSB, 0x178)
+REG32(ZDMA_CH_SRC_CUR_DSCR_MSB, 0x17c)
+ FIELD(ZDMA_CH_SRC_CUR_DSCR_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_DST_CUR_DSCR_LSB, 0x180)
+REG32(ZDMA_CH_DST_CUR_DSCR_MSB, 0x184)
+ FIELD(ZDMA_CH_DST_CUR_DSCR_MSB, ADDR, 0, 17)
+REG32(ZDMA_CH_TOTAL_BYTE, 0x188)
+REG32(ZDMA_CH_RATE_CNTL, 0x18c)
+ FIELD(ZDMA_CH_RATE_CNTL, CNT, 0, 12)
+REG32(ZDMA_CH_IRQ_SRC_ACCT, 0x190)
+ FIELD(ZDMA_CH_IRQ_SRC_ACCT, CNT, 0, 8)
+REG32(ZDMA_CH_IRQ_DST_ACCT, 0x194)
+ FIELD(ZDMA_CH_IRQ_DST_ACCT, CNT, 0, 8)
+REG32(ZDMA_CH_DBG0, 0x198)
+ FIELD(ZDMA_CH_DBG0, CMN_BUF_FREE, 0, 9)
+REG32(ZDMA_CH_DBG1, 0x19c)
+ FIELD(ZDMA_CH_DBG1, CMN_BUF_OCC, 0, 9)
+REG32(ZDMA_CH_CTRL2, 0x200)
+ FIELD(ZDMA_CH_CTRL2, EN, 0, 1)
+
+enum {
+ PT_REG = 0,
+ PT_MEM = 1,
+};
+
+enum {
+ CMD_HALT = 1,
+ CMD_STOP = 2,
+};
+
+enum {
+ RW_MODE_RW = 0,
+ RW_MODE_WO = 1,
+ RW_MODE_RO = 2,
+};
+
+enum {
+ DTYPE_LINEAR = 0,
+ DTYPE_LINKED = 1,
+};
+
+enum {
+ AXI_BURST_FIXED = 0,
+ AXI_BURST_INCR = 1,
+};
+
+static void zdma_ch_imr_update_irq(XlnxZDMA *s)
+{
+ bool pending;
+
+ pending = s->regs[R_ZDMA_CH_ISR] & ~s->regs[R_ZDMA_CH_IMR];
+
+ qemu_set_irq(s->irq_zdma_ch_imr, pending);
+}
+
+static void zdma_ch_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+ XlnxZDMA *s = XLNX_ZDMA(reg->opaque);
+ zdma_ch_imr_update_irq(s);
+}
+
+static uint64_t zdma_ch_ien_prew(RegisterInfo *reg, uint64_t val64)
+{
+ XlnxZDMA *s = XLNX_ZDMA(reg->opaque);
+ uint32_t val = val64;
+
+ s->regs[R_ZDMA_CH_IMR] &= ~val;
+ zdma_ch_imr_update_irq(s);
+ return 0;
+}
+
+static uint64_t zdma_ch_ids_prew(RegisterInfo *reg, uint64_t val64)
+{
+ XlnxZDMA *s = XLNX_ZDMA(reg->opaque);
+ uint32_t val = val64;
+
+ s->regs[R_ZDMA_CH_IMR] |= val;
+ zdma_ch_imr_update_irq(s);
+ return 0;
+}
+
+static void zdma_set_state(XlnxZDMA *s, XlnxZDMAState state)
+{
+ s->state = state;
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_STATUS, STATE, state);
+
+ /* Signal error if we have an error condition. */
+ if (s->error) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_STATUS, STATE, 3);
+ }
+}
+
+static void zdma_src_done(XlnxZDMA *s)
+{
+ unsigned int cnt;
+ cnt = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_IRQ_SRC_ACCT, CNT);
+ cnt++;
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_IRQ_SRC_ACCT, CNT, cnt);
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, SRC_DSCR_DONE, true);
+
+ /* Did we overflow? */
+ if (cnt != ARRAY_FIELD_EX32(s->regs, ZDMA_CH_IRQ_SRC_ACCT, CNT)) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, IRQ_SRC_ACCT_ERR, true);
+ }
+ zdma_ch_imr_update_irq(s);
+}
+
+static void zdma_dst_done(XlnxZDMA *s)
+{
+ unsigned int cnt;
+ cnt = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_IRQ_DST_ACCT, CNT);
+ cnt++;
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_IRQ_DST_ACCT, CNT, cnt);
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, DST_DSCR_DONE, true);
+
+ /* Did we overflow? */
+ if (cnt != ARRAY_FIELD_EX32(s->regs, ZDMA_CH_IRQ_DST_ACCT, CNT)) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, IRQ_DST_ACCT_ERR, true);
+ }
+ zdma_ch_imr_update_irq(s);
+}
+
+static uint64_t zdma_get_regaddr64(XlnxZDMA *s, unsigned int basereg)
+{
+ uint64_t addr;
+
+ addr = s->regs[basereg + 1];
+ addr <<= 32;
+ addr |= s->regs[basereg];
+
+ return addr;
+}
+
+static void zdma_put_regaddr64(XlnxZDMA *s, unsigned int basereg, uint64_t addr)
+{
+ s->regs[basereg] = addr;
+ s->regs[basereg + 1] = addr >> 32;
+}
+
+static bool zdma_load_descriptor(XlnxZDMA *s, uint64_t addr, void *buf)
+{
+ /* ZDMA descriptors must be aligned to their own size. */
+ if (addr % sizeof(XlnxZDMADescr)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "zdma: unaligned descriptor at %" PRIx64,
+ addr);
+ memset(buf, 0xdeadbeef, sizeof(XlnxZDMADescr));
+ s->error = true;
+ return false;
+ }
+
+ address_space_rw(s->dma_as, addr, s->attr,
+ buf, sizeof(XlnxZDMADescr), false);
+ return true;
+}
+
+static void zdma_load_src_descriptor(XlnxZDMA *s)
+{
+ uint64_t src_addr;
+ unsigned int ptype = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, POINT_TYPE);
+
+ if (ptype == PT_REG) {
+ memcpy(&s->dsc_src, &s->regs[R_ZDMA_CH_SRC_DSCR_WORD0],
+ sizeof(s->dsc_src));
+ return;
+ }
+
+ src_addr = zdma_get_regaddr64(s, R_ZDMA_CH_SRC_CUR_DSCR_LSB);
+
+ if (!zdma_load_descriptor(s, src_addr, &s->dsc_src)) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, AXI_RD_SRC_DSCR, true);
+ }
+}
+
+static void zdma_load_dst_descriptor(XlnxZDMA *s)
+{
+ uint64_t dst_addr;
+ unsigned int ptype = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, POINT_TYPE);
+
+ if (ptype == PT_REG) {
+ memcpy(&s->dsc_dst, &s->regs[R_ZDMA_CH_DST_DSCR_WORD0],
+ sizeof(s->dsc_dst));
+ return;
+ }
+
+ dst_addr = zdma_get_regaddr64(s, R_ZDMA_CH_DST_CUR_DSCR_LSB);
+
+ if (!zdma_load_descriptor(s, dst_addr, &s->dsc_dst)) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, AXI_RD_DST_DSCR, true);
+ }
+}
+
+static uint64_t zdma_update_descr_addr(XlnxZDMA *s, bool type,
+ unsigned int basereg)
+{
+ uint64_t addr, next;
+
+ if (type == DTYPE_LINEAR) {
+ next = zdma_get_regaddr64(s, basereg);
+ next += sizeof(s->dsc_dst);
+ zdma_put_regaddr64(s, basereg, next);
+ } else {
+ addr = zdma_get_regaddr64(s, basereg);
+ addr += sizeof(s->dsc_dst);
+ address_space_rw(s->dma_as, addr, s->attr, (void *) &next, 8, false);
+ zdma_put_regaddr64(s, basereg, next);
+ }
+ return next;
+}
+
+static void zdma_write_dst(XlnxZDMA *s, uint8_t *buf, uint32_t len)
+{
+ uint32_t dst_size, dlen;
+ bool dst_intr, dst_type;
+ unsigned int ptype = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, POINT_TYPE);
+ unsigned int rw_mode = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, MODE);
+ unsigned int burst_type = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_DATA_ATTR,
+ AWBURST);
+
+ /* FIXED burst types are only supported in simple dma mode. */
+ if (ptype != PT_REG) {
+ burst_type = AXI_BURST_INCR;
+ }
+
+ while (len) {
+ dst_size = FIELD_EX32(s->dsc_dst.words[2], ZDMA_CH_DST_DSCR_WORD2,
+ SIZE);
+ dst_type = FIELD_EX32(s->dsc_dst.words[3], ZDMA_CH_DST_DSCR_WORD3,
+ TYPE);
+ if (dst_size == 0 && ptype == PT_MEM) {
+ uint64_t next;
+ next = zdma_update_descr_addr(s, dst_type,
+ R_ZDMA_CH_DST_CUR_DSCR_LSB);
+ zdma_load_descriptor(s, next, &s->dsc_dst);
+ dst_size = FIELD_EX32(s->dsc_dst.words[2], ZDMA_CH_DST_DSCR_WORD2,
+ SIZE);
+ dst_type = FIELD_EX32(s->dsc_dst.words[3], ZDMA_CH_DST_DSCR_WORD3,
+ TYPE);
+ }
+
+ /* Match what hardware does by ignoring the dst_size and only using
+ * the src size for Simple register mode. */
+ if (ptype == PT_REG && rw_mode != RW_MODE_WO) {
+ dst_size = len;
+ }
+
+ dst_intr = FIELD_EX32(s->dsc_dst.words[3], ZDMA_CH_DST_DSCR_WORD3,
+ INTR);
+
+ dlen = len > dst_size ? dst_size : len;
+ if (burst_type == AXI_BURST_FIXED) {
+ if (dlen > (s->cfg.bus_width / 8)) {
+ dlen = s->cfg.bus_width / 8;
+ }
+ }
+
+ address_space_rw(s->dma_as, s->dsc_dst.addr, s->attr, buf, dlen,
+ true);
+ if (burst_type == AXI_BURST_INCR) {
+ s->dsc_dst.addr += dlen;
+ }
+ dst_size -= dlen;
+ buf += dlen;
+ len -= dlen;
+
+ if (dst_size == 0 && dst_intr) {
+ zdma_dst_done(s);
+ }
+
+ /* Write back to buffered descriptor. */
+ s->dsc_dst.words[2] = FIELD_DP32(s->dsc_dst.words[2],
+ ZDMA_CH_DST_DSCR_WORD2,
+ SIZE,
+ dst_size);
+ }
+}
+
+static void zdma_process_descr(XlnxZDMA *s)
+{
+ uint64_t src_addr;
+ uint32_t src_size, len;
+ unsigned int src_cmd;
+ bool src_intr, src_type;
+ unsigned int ptype = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, POINT_TYPE);
+ unsigned int rw_mode = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, MODE);
+ unsigned int burst_type = ARRAY_FIELD_EX32(s->regs, ZDMA_CH_DATA_ATTR,
+ ARBURST);
+
+ src_addr = s->dsc_src.addr;
+ src_size = FIELD_EX32(s->dsc_src.words[2], ZDMA_CH_SRC_DSCR_WORD2, SIZE);
+ src_cmd = FIELD_EX32(s->dsc_src.words[3], ZDMA_CH_SRC_DSCR_WORD3, CMD);
+ src_type = FIELD_EX32(s->dsc_src.words[3], ZDMA_CH_SRC_DSCR_WORD3, TYPE);
+ src_intr = FIELD_EX32(s->dsc_src.words[3], ZDMA_CH_SRC_DSCR_WORD3, INTR);
+
+ /* FIXED burst types and non-rw modes are only supported in
+ * simple dma mode.
+ */
+ if (ptype != PT_REG) {
+ if (rw_mode != RW_MODE_RW) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "zDMA: rw-mode=%d but not simple DMA mode.\n",
+ rw_mode);
+ }
+ if (burst_type != AXI_BURST_INCR) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "zDMA: burst_type=%d but not simple DMA mode.\n",
+ burst_type);
+ }
+ burst_type = AXI_BURST_INCR;
+ rw_mode = RW_MODE_RW;
+ }
+
+ if (rw_mode == RW_MODE_WO) {
+ /* In Simple DMA Write-Only, we need to push DST size bytes
+ * regardless of what SRC size is set to. */
+ src_size = FIELD_EX32(s->dsc_dst.words[2], ZDMA_CH_DST_DSCR_WORD2,
+ SIZE);
+ memcpy(s->buf, &s->regs[R_ZDMA_CH_WR_ONLY_WORD0], s->cfg.bus_width / 8);
+ }
+
+ while (src_size) {
+ len = src_size > ARRAY_SIZE(s->buf) ? ARRAY_SIZE(s->buf) : src_size;
+ if (burst_type == AXI_BURST_FIXED) {
+ if (len > (s->cfg.bus_width / 8)) {
+ len = s->cfg.bus_width / 8;
+ }
+ }
+
+ if (rw_mode == RW_MODE_WO) {
+ if (len > s->cfg.bus_width / 8) {
+ len = s->cfg.bus_width / 8;
+ }
+ } else {
+ address_space_rw(s->dma_as, src_addr, s->attr, s->buf, len,
+ false);
+ if (burst_type == AXI_BURST_INCR) {
+ src_addr += len;
+ }
+ }
+
+ if (rw_mode != RW_MODE_RO) {
+ zdma_write_dst(s, s->buf, len);
+ }
+
+ s->regs[R_ZDMA_CH_TOTAL_BYTE] += len;
+ src_size -= len;
+ }
+
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, DMA_DONE, true);
+
+ if (src_intr) {
+ zdma_src_done(s);
+ }
+
+ /* Load next descriptor. */
+ if (ptype == PT_REG || src_cmd == CMD_STOP) {
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_CTRL2, EN, 0);
+ zdma_set_state(s, DISABLED);
+ return;
+ }
+
+ if (src_cmd == CMD_HALT) {
+ zdma_set_state(s, PAUSED);
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, DMA_PAUSE, 1);
+ zdma_ch_imr_update_irq(s);
+ return;
+ }
+
+ zdma_update_descr_addr(s, src_type, R_ZDMA_CH_SRC_CUR_DSCR_LSB);
+}
+
+static void zdma_run(XlnxZDMA *s)
+{
+ while (s->state == ENABLED && !s->error) {
+ zdma_load_src_descriptor(s);
+
+ if (s->error) {
+ zdma_set_state(s, DISABLED);
+ } else {
+ zdma_process_descr(s);
+ }
+ }
+
+ zdma_ch_imr_update_irq(s);
+}
+
+static void zdma_update_descr_addr_from_start(XlnxZDMA *s)
+{
+ uint64_t src_addr, dst_addr;
+
+ src_addr = zdma_get_regaddr64(s, R_ZDMA_CH_SRC_START_LSB);
+ zdma_put_regaddr64(s, R_ZDMA_CH_SRC_CUR_DSCR_LSB, src_addr);
+ dst_addr = zdma_get_regaddr64(s, R_ZDMA_CH_DST_START_LSB);
+ zdma_put_regaddr64(s, R_ZDMA_CH_DST_CUR_DSCR_LSB, dst_addr);
+ zdma_load_dst_descriptor(s);
+}
+
+static void zdma_ch_ctrlx_postw(RegisterInfo *reg, uint64_t val64)
+{
+ XlnxZDMA *s = XLNX_ZDMA(reg->opaque);
+
+ if (ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL2, EN)) {
+ s->error = false;
+
+ if (s->state == PAUSED &&
+ ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, CONT)) {
+ if (ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, CONT_ADDR) == 1) {
+ zdma_update_descr_addr_from_start(s);
+ } else {
+ bool src_type = FIELD_EX32(s->dsc_src.words[3],
+ ZDMA_CH_SRC_DSCR_WORD3, TYPE);
+ zdma_update_descr_addr(s, src_type,
+ R_ZDMA_CH_SRC_CUR_DSCR_LSB);
+ }
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_CTRL0, CONT, false);
+ zdma_set_state(s, ENABLED);
+ } else if (s->state == DISABLED) {
+ zdma_update_descr_addr_from_start(s);
+ zdma_set_state(s, ENABLED);
+ }
+ } else {
+ /* Leave Paused state? */
+ if (s->state == PAUSED &&
+ ARRAY_FIELD_EX32(s->regs, ZDMA_CH_CTRL0, CONT)) {
+ zdma_set_state(s, DISABLED);
+ }
+ }
+
+ zdma_run(s);
+}
+
+static RegisterAccessInfo zdma_regs_info[] = {
+ { .name = "ZDMA_ERR_CTRL", .addr = A_ZDMA_ERR_CTRL,
+ .rsvd = 0xfffffffe,
+ },{ .name = "ZDMA_CH_ISR", .addr = A_ZDMA_CH_ISR,
+ .rsvd = 0xfffff000,
+ .w1c = 0xfff,
+ .post_write = zdma_ch_isr_postw,
+ },{ .name = "ZDMA_CH_IMR", .addr = A_ZDMA_CH_IMR,
+ .reset = 0xfff,
+ .rsvd = 0xfffff000,
+ .ro = 0xfff,
+ },{ .name = "ZDMA_CH_IEN", .addr = A_ZDMA_CH_IEN,
+ .rsvd = 0xfffff000,
+ .pre_write = zdma_ch_ien_prew,
+ },{ .name = "ZDMA_CH_IDS", .addr = A_ZDMA_CH_IDS,
+ .rsvd = 0xfffff000,
+ .pre_write = zdma_ch_ids_prew,
+ },{ .name = "ZDMA_CH_CTRL0", .addr = A_ZDMA_CH_CTRL0,
+ .reset = 0x80,
+ .rsvd = 0xffffff01,
+ .post_write = zdma_ch_ctrlx_postw,
+ },{ .name = "ZDMA_CH_CTRL1", .addr = A_ZDMA_CH_CTRL1,
+ .reset = 0x3ff,
+ .rsvd = 0xfffffc00,
+ },{ .name = "ZDMA_CH_FCI", .addr = A_ZDMA_CH_FCI,
+ .rsvd = 0xffffffc0,
+ },{ .name = "ZDMA_CH_STATUS", .addr = A_ZDMA_CH_STATUS,
+ .rsvd = 0xfffffffc,
+ .ro = 0x3,
+ },{ .name = "ZDMA_CH_DATA_ATTR", .addr = A_ZDMA_CH_DATA_ATTR,
+ .reset = 0x483d20f,
+ .rsvd = 0xf0000000,
+ },{ .name = "ZDMA_CH_DSCR_ATTR", .addr = A_ZDMA_CH_DSCR_ATTR,
+ .rsvd = 0xfffffe00,
+ },{ .name = "ZDMA_CH_SRC_DSCR_WORD0", .addr = A_ZDMA_CH_SRC_DSCR_WORD0,
+ },{ .name = "ZDMA_CH_SRC_DSCR_WORD1", .addr = A_ZDMA_CH_SRC_DSCR_WORD1,
+ .rsvd = 0xfffe0000,
+ },{ .name = "ZDMA_CH_SRC_DSCR_WORD2", .addr = A_ZDMA_CH_SRC_DSCR_WORD2,
+ .rsvd = 0xc0000000,
+ },{ .name = "ZDMA_CH_SRC_DSCR_WORD3", .addr = A_ZDMA_CH_SRC_DSCR_WORD3,
+ .rsvd = 0xffffffe0,
+ },{ .name = "ZDMA_CH_DST_DSCR_WORD0", .addr = A_ZDMA_CH_DST_DSCR_WORD0,
+ },{ .name = "ZDMA_CH_DST_DSCR_WORD1", .addr = A_ZDMA_CH_DST_DSCR_WORD1,
+ .rsvd = 0xfffe0000,
+ },{ .name = "ZDMA_CH_DST_DSCR_WORD2", .addr = A_ZDMA_CH_DST_DSCR_WORD2,
+ .rsvd = 0xc0000000,
+ },{ .name = "ZDMA_CH_DST_DSCR_WORD3", .addr = A_ZDMA_CH_DST_DSCR_WORD3,
+ .rsvd = 0xfffffffa,
+ },{ .name = "ZDMA_CH_WR_ONLY_WORD0", .addr = A_ZDMA_CH_WR_ONLY_WORD0,
+ },{ .name = "ZDMA_CH_WR_ONLY_WORD1", .addr = A_ZDMA_CH_WR_ONLY_WORD1,
+ },{ .name = "ZDMA_CH_WR_ONLY_WORD2", .addr = A_ZDMA_CH_WR_ONLY_WORD2,
+ },{ .name = "ZDMA_CH_WR_ONLY_WORD3", .addr = A_ZDMA_CH_WR_ONLY_WORD3,
+ },{ .name = "ZDMA_CH_SRC_START_LSB", .addr = A_ZDMA_CH_SRC_START_LSB,
+ },{ .name = "ZDMA_CH_SRC_START_MSB", .addr = A_ZDMA_CH_SRC_START_MSB,
+ .rsvd = 0xfffe0000,
+ },{ .name = "ZDMA_CH_DST_START_LSB", .addr = A_ZDMA_CH_DST_START_LSB,
+ },{ .name = "ZDMA_CH_DST_START_MSB", .addr = A_ZDMA_CH_DST_START_MSB,
+ .rsvd = 0xfffe0000,
+ },{ .name = "ZDMA_CH_SRC_CUR_PYLD_LSB", .addr = A_ZDMA_CH_SRC_CUR_PYLD_LSB,
+ .ro = 0xffffffff,
+ },{ .name = "ZDMA_CH_SRC_CUR_PYLD_MSB", .addr = A_ZDMA_CH_SRC_CUR_PYLD_MSB,
+ .rsvd = 0xfffe0000,
+ .ro = 0x1ffff,
+ },{ .name = "ZDMA_CH_DST_CUR_PYLD_LSB", .addr = A_ZDMA_CH_DST_CUR_PYLD_LSB,
+ .ro = 0xffffffff,
+ },{ .name = "ZDMA_CH_DST_CUR_PYLD_MSB", .addr = A_ZDMA_CH_DST_CUR_PYLD_MSB,
+ .rsvd = 0xfffe0000,
+ .ro = 0x1ffff,
+ },{ .name = "ZDMA_CH_SRC_CUR_DSCR_LSB", .addr = A_ZDMA_CH_SRC_CUR_DSCR_LSB,
+ .ro = 0xffffffff,
+ },{ .name = "ZDMA_CH_SRC_CUR_DSCR_MSB", .addr = A_ZDMA_CH_SRC_CUR_DSCR_MSB,
+ .rsvd = 0xfffe0000,
+ .ro = 0x1ffff,
+ },{ .name = "ZDMA_CH_DST_CUR_DSCR_LSB", .addr = A_ZDMA_CH_DST_CUR_DSCR_LSB,
+ .ro = 0xffffffff,
+ },{ .name = "ZDMA_CH_DST_CUR_DSCR_MSB", .addr = A_ZDMA_CH_DST_CUR_DSCR_MSB,
+ .rsvd = 0xfffe0000,
+ .ro = 0x1ffff,
+ },{ .name = "ZDMA_CH_TOTAL_BYTE", .addr = A_ZDMA_CH_TOTAL_BYTE,
+ .w1c = 0xffffffff,
+ },{ .name = "ZDMA_CH_RATE_CNTL", .addr = A_ZDMA_CH_RATE_CNTL,
+ .rsvd = 0xfffff000,
+ },{ .name = "ZDMA_CH_IRQ_SRC_ACCT", .addr = A_ZDMA_CH_IRQ_SRC_ACCT,
+ .rsvd = 0xffffff00,
+ .ro = 0xff,
+ .cor = 0xff,
+ },{ .name = "ZDMA_CH_IRQ_DST_ACCT", .addr = A_ZDMA_CH_IRQ_DST_ACCT,
+ .rsvd = 0xffffff00,
+ .ro = 0xff,
+ .cor = 0xff,
+ },{ .name = "ZDMA_CH_DBG0", .addr = A_ZDMA_CH_DBG0,
+ .rsvd = 0xfffffe00,
+ .ro = 0x1ff,
+ },{ .name = "ZDMA_CH_DBG1", .addr = A_ZDMA_CH_DBG1,
+ .rsvd = 0xfffffe00,
+ .ro = 0x1ff,
+ },{ .name = "ZDMA_CH_CTRL2", .addr = A_ZDMA_CH_CTRL2,
+ .rsvd = 0xfffffffe,
+ .post_write = zdma_ch_ctrlx_postw,
+ }
+};
+
+static void zdma_reset(DeviceState *dev)
+{
+ XlnxZDMA *s = XLNX_ZDMA(dev);
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+ register_reset(&s->regs_info[i]);
+ }
+
+ zdma_ch_imr_update_irq(s);
+}
+
+static uint64_t zdma_read(void *opaque, hwaddr addr, unsigned size)
+{
+ XlnxZDMA *s = XLNX_ZDMA(opaque);
+ RegisterInfo *r = &s->regs_info[addr / 4];
+
+ if (!r->data) {
+ qemu_log("%s: Decode error: read from %" HWADDR_PRIx "\n",
+ object_get_canonical_path(OBJECT(s)),
+ addr);
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, INV_APB, true);
+ zdma_ch_imr_update_irq(s);
+ return 0;
+ }
+ return register_read(r, ~0, NULL, false);
+}
+
+static void zdma_write(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+ XlnxZDMA *s = XLNX_ZDMA(opaque);
+ RegisterInfo *r = &s->regs_info[addr / 4];
+
+ if (!r->data) {
+ qemu_log("%s: Decode error: write to %" HWADDR_PRIx "=%" PRIx64 "\n",
+ object_get_canonical_path(OBJECT(s)),
+ addr, value);
+ ARRAY_FIELD_DP32(s->regs, ZDMA_CH_ISR, INV_APB, true);
+ zdma_ch_imr_update_irq(s);
+ return;
+ }
+ register_write(r, value, ~0, NULL, false);
+}
+
+static const MemoryRegionOps zdma_ops = {
+ .read = zdma_read,
+ .write = zdma_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static void zdma_realize(DeviceState *dev, Error **errp)
+{
+ XlnxZDMA *s = XLNX_ZDMA(dev);
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(zdma_regs_info); ++i) {
+ RegisterInfo *r = &s->regs_info[zdma_regs_info[i].addr / 4];
+
+ *r = (RegisterInfo) {
+ .data = (uint8_t *)&s->regs[
+ zdma_regs_info[i].addr / 4],
+ .data_size = sizeof(uint32_t),
+ .access = &zdma_regs_info[i],
+ .opaque = s,
+ };
+ }
+
+ if (s->dma_mr) {
+ s->dma_as = g_malloc0(sizeof(AddressSpace));
+ address_space_init(s->dma_as, s->dma_mr, NULL);
+ } else {
+ s->dma_as = &address_space_memory;
+ }
+ s->attr = MEMTXATTRS_UNSPECIFIED;
+}
+
+static void zdma_init(Object *obj)
+{
+ XlnxZDMA *s = XLNX_ZDMA(obj);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+ memory_region_init_io(&s->iomem, obj, &zdma_ops, s,
+ TYPE_XLNX_ZDMA, ZDMA_R_MAX * 4);
+ sysbus_init_mmio(sbd, &s->iomem);
+ sysbus_init_irq(sbd, &s->irq_zdma_ch_imr);
+
+ object_property_add_link(obj, "dma", TYPE_MEMORY_REGION,
+ (Object **)&s->dma_mr,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+}
+
+static const VMStateDescription vmstate_zdma = {
+ .name = TYPE_XLNX_ZDMA,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(regs, XlnxZDMA, ZDMA_R_MAX),
+ VMSTATE_UINT32(state, XlnxZDMA),
+ VMSTATE_UINT32_ARRAY(dsc_src.words, XlnxZDMA, 4),
+ VMSTATE_UINT32_ARRAY(dsc_dst.words, XlnxZDMA, 4),
+ VMSTATE_END_OF_LIST(),
+ }
+};
+
+static Property zdma_props[] = {
+ DEFINE_PROP_UINT32("bus-width", XlnxZDMA, cfg.bus_width, 64),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void zdma_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->reset = zdma_reset;
+ dc->realize = zdma_realize;
+ dc->props = zdma_props;
+ dc->vmsd = &vmstate_zdma;
+}
+
+static const TypeInfo zdma_info = {
+ .name = TYPE_XLNX_ZDMA,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(XlnxZDMA),
+ .class_init = zdma_class_init,
+ .instance_init = zdma_init,
+};
+
+static void zdma_register_types(void)
+{
+ type_register_static(&zdma_info);
+}
+
+type_init(zdma_register_types)
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 3b613e3..82b6ec2 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -27,6 +27,7 @@
#include "hw/sd/sdhci.h"
#include "hw/ssi/xilinx_spips.h"
#include "hw/dma/xlnx_dpdma.h"
+#include "hw/dma/xlnx-zdma.h"
#include "hw/display/xlnx_dp.h"
#include "hw/intc/xlnx-zynqmp-ipi.h"
#include "hw/timer/xlnx-zynqmp-rtc.h"
@@ -41,6 +42,8 @@
#define XLNX_ZYNQMP_NUM_UARTS 2
#define XLNX_ZYNQMP_NUM_SDHCI 2
#define XLNX_ZYNQMP_NUM_SPIS 2
+#define XLNX_ZYNQMP_NUM_GDMA_CH 8
+#define XLNX_ZYNQMP_NUM_ADMA_CH 8
#define XLNX_ZYNQMP_NUM_QSPI_BUS 2
#define XLNX_ZYNQMP_NUM_QSPI_BUS_CS 2
@@ -94,6 +97,8 @@ typedef struct XlnxZynqMPState {
XlnxDPDMAState dpdma;
XlnxZynqMPIPI ipi;
XlnxZynqMPRTC rtc;
+ XlnxZDMA gdma[XLNX_ZYNQMP_NUM_GDMA_CH];
+ XlnxZDMA adma[XLNX_ZYNQMP_NUM_ADMA_CH];
char *boot_cpu;
ARMCPU *boot_cpu_ptr;
diff --git a/include/hw/dma/xlnx-zdma.h b/include/hw/dma/xlnx-zdma.h
new file mode 100644
index 0000000..0b240b4
--- /dev/null
+++ b/include/hw/dma/xlnx-zdma.h
@@ -0,0 +1,84 @@
+/*
+ * QEMU model of the ZynqMP generic DMA
+ *
+ * Copyright (c) 2014 Xilinx Inc.
+ * Copyright (c) 2018 FEIMTECH AB
+ *
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>,
+ * Francisco Iglesias <francisco.iglesias@feimtech.se>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_ZDMA_H
+#define XLNX_ZDMA_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "sysemu/dma.h"
+
+#define ZDMA_R_MAX (0x204 / 4)
+
+typedef enum {
+ DISABLED = 0,
+ ENABLED = 1,
+ PAUSED = 2,
+} XlnxZDMAState;
+
+typedef union {
+ struct {
+ uint64_t addr;
+ uint32_t size;
+ uint32_t attr;
+ };
+ uint32_t words[4];
+} XlnxZDMADescr;
+
+typedef struct XlnxZDMA {
+ SysBusDevice parent_obj;
+ MemoryRegion iomem;
+ MemTxAttrs attr;
+ MemoryRegion *dma_mr;
+ AddressSpace *dma_as;
+ qemu_irq irq_zdma_ch_imr;
+
+ struct {
+ uint32_t bus_width;
+ } cfg;
+
+ XlnxZDMAState state;
+ bool error;
+
+ XlnxZDMADescr dsc_src;
+ XlnxZDMADescr dsc_dst;
+
+ uint32_t regs[ZDMA_R_MAX];
+ RegisterInfo regs_info[ZDMA_R_MAX];
+
+ /* We don't model the common bufs. Must be at least 16 bytes
+ to model write only mode. */
+ uint8_t buf[2048];
+} XlnxZDMA;
+
+#define TYPE_XLNX_ZDMA "xlnx.zdma"
+
+#define XLNX_ZDMA(obj) \
+ OBJECT_CHECK(XlnxZDMA, (obj), TYPE_XLNX_ZDMA)
+
+#endif /* XLNX_ZDMA_H */
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 14e45c4..9d3afc6 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -132,6 +132,9 @@ struct TranslationBlock;
* before the insn which triggers a watchpoint rather than after it.
* @gdb_arch_name: Optional callback that returns the architecture name known
* to GDB. The caller must free the returned string with g_free.
+ * @gdb_get_dynamic_xml: Callback to return dynamically generated XML for the
+ * gdb stub. Returns a pointer to the XML contents for the specified XML file
+ * or NULL if the CPU doesn't have a dynamically generated content for it.
* @cpu_exec_enter: Callback for cpu_exec preparation.
* @cpu_exec_exit: Callback for cpu_exec cleanup.
* @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec.
@@ -198,7 +201,7 @@ typedef struct CPUClass {
const struct VMStateDescription *vmsd;
const char *gdb_core_xml_file;
gchar * (*gdb_arch_name)(CPUState *cpu);
-
+ const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname);
void (*cpu_exec_enter)(CPUState *cpu);
void (*cpu_exec_exit)(CPUState *cpu);
bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
index 1297bea..11c7baf 100644
--- a/target/arm/Makefile.objs
+++ b/target/arm/Makefile.objs
@@ -10,3 +10,13 @@ obj-y += gdbstub.o
obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
obj-y += crypto_helper.o
obj-$(CONFIG_SOFTMMU) += arm-powerctl.o
+
+DECODETREE = $(SRC_PATH)/scripts/decodetree.py
+
+target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
+ $(call quiet-command,\
+ $(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
+ "GEN", $(TARGET_DIR)$@)
+
+target/arm/translate-sve.o: target/arm/decode-sve.inc.c
+obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 7939c6b..5d60893 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1908,6 +1908,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
cc->gdb_num_core_regs = 26;
cc->gdb_core_xml_file = "arm-core.xml";
cc->gdb_arch_name = arm_gdb_arch_name;
+ cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml;
cc->gdb_stop_before_watchpoint = true;
cc->debug_excp_handler = arm_debug_excp_handler;
cc->debug_check_watchpoint = arm_debug_check_watchpoint;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 3b086be..8488273 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -133,6 +133,19 @@ enum {
s<2n+1> maps to the most significant half of d<n>
*/
+/**
+ * DynamicGDBXMLInfo:
+ * @desc: Contains the XML descriptions.
+ * @num_cpregs: Number of the Coprocessor registers seen by GDB.
+ * @cpregs_keys: Array that contains the corresponding Key of
+ * a given cpreg with the same order of the cpreg in the XML description.
+ */
+typedef struct DynamicGDBXMLInfo {
+ char *desc;
+ int num_cpregs;
+ uint32_t *cpregs_keys;
+} DynamicGDBXMLInfo;
+
/* CPU state for each instance of a generic timer (in cp15 c14) */
typedef struct ARMGenericTimer {
uint64_t cval; /* Timer CompareValue register */
@@ -527,7 +540,10 @@ typedef struct CPUARMState {
#ifdef TARGET_AARCH64
/* Store FFR as pregs[16] to make it easier to treat as any other. */
+#define FFR_PRED_NUM 16
ARMPredicateReg pregs[17];
+ /* Scratch space for aa64 sve predicate temporary. */
+ ARMPredicateReg preg_tmp;
#endif
uint32_t xregs[16];
@@ -535,7 +551,7 @@ typedef struct CPUARMState {
int vec_len;
int vec_stride;
- /* scratch space when Tn are not sufficient. */
+ /* Scratch space for aa32 neon expansion. */
uint32_t scratch[8];
/* There are a number of distinct float control structures:
@@ -687,6 +703,8 @@ struct ARMCPU {
uint64_t *cpreg_vmstate_values;
int32_t cpreg_vmstate_array_len;
+ DynamicGDBXMLInfo dyn_xml;
+
/* Timers used by the generic (architected) timer */
QEMUTimer *gt_timer[NUM_GTIMERS];
/* GPIO outputs for generic timer */
@@ -868,6 +886,17 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
int arm_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+/* Dynamically generates for gdb stub an XML description of the sysregs from
+ * the cp_regs hashtable. Returns the registered sysregs number.
+ */
+int arm_gen_dynamic_xml(CPUState *cpu);
+
+/* Returns the dynamically generated XML for the gdb stub.
+ * Returns a pointer to the XML contents for the specified XML file or NULL
+ * if the XML name doesn't match the predefined one.
+ */
+const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname);
+
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque);
int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
@@ -1821,10 +1850,11 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
#define ARM_CP_FPU 0x1000
#define ARM_CP_SVE 0x2000
+#define ARM_CP_NO_GDB 0x4000
/* Used only as a terminator for ARMCPRegInfo lists */
#define ARM_CP_SENTINEL 0xffff
/* Mask of only the flag bits in a type field */
-#define ARM_CP_FLAG_MASK 0x30ff
+#define ARM_CP_FLAG_MASK 0x70ff
/* Valid values for ARMCPRegInfo state field, indicating which of
* the AArch32 and AArch64 execution states this register is visible in.
@@ -2946,4 +2976,7 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
return &env->vfp.zregs[regno].d[0];
}
+/* Shared between translate-sve.c and sve_helper.c. */
+extern const uint64_t pred_esz_masks[4];
+
#endif
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 04c1208..e80cfb4 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -22,6 +22,11 @@
#include "cpu.h"
#include "exec/gdbstub.h"
+typedef struct RegisterSysregXmlParam {
+ CPUState *cs;
+ GString *s;
+} RegisterSysregXmlParam;
+
/* Old gdb always expect FPA registers. Newer (xml-aware) gdb only expect
whatever the target description contains. Due to a historical mishap
the FPA registers appear in between core integer regs and the CPSR.
@@ -101,3 +106,74 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
/* Unknown register. */
return 0;
}
+
+static void arm_gen_one_xml_reg_tag(GString *s, DynamicGDBXMLInfo *dyn_xml,
+ ARMCPRegInfo *ri, uint32_t ri_key,
+ int bitsize)
+{
+ g_string_append_printf(s, "<reg name=\"%s\"", ri->name);
+ g_string_append_printf(s, " bitsize=\"%d\"", bitsize);
+ g_string_append_printf(s, " group=\"cp_regs\"/>");
+ dyn_xml->num_cpregs++;
+ dyn_xml->cpregs_keys[dyn_xml->num_cpregs - 1] = ri_key;
+}
+
+static void arm_register_sysreg_for_xml(gpointer key, gpointer value,
+ gpointer p)
+{
+ uint32_t ri_key = *(uint32_t *)key;
+ ARMCPRegInfo *ri = value;
+ RegisterSysregXmlParam *param = (RegisterSysregXmlParam *)p;
+ GString *s = param->s;
+ ARMCPU *cpu = ARM_CPU(param->cs);
+ CPUARMState *env = &cpu->env;
+ DynamicGDBXMLInfo *dyn_xml = &cpu->dyn_xml;
+
+ if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_NO_GDB))) {
+ if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+ if (ri->state == ARM_CP_STATE_AA64) {
+ arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 64);
+ }
+ } else {
+ if (ri->state == ARM_CP_STATE_AA32) {
+ if (!arm_feature(env, ARM_FEATURE_EL3) &&
+ (ri->secure & ARM_CP_SECSTATE_S)) {
+ return;
+ }
+ if (ri->type & ARM_CP_64BIT) {
+ arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 64);
+ } else {
+ arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 32);
+ }
+ }
+ }
+ }
+}
+
+int arm_gen_dynamic_xml(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ GString *s = g_string_new(NULL);
+ RegisterSysregXmlParam param = {cs, s};
+
+ cpu->dyn_xml.num_cpregs = 0;
+ cpu->dyn_xml.cpregs_keys = g_malloc(sizeof(uint32_t *) *
+ g_hash_table_size(cpu->cp_regs));
+ g_string_printf(s, "<?xml version=\"1.0\"?>");
+ g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">");
+ g_string_append_printf(s, "<feature name=\"org.qemu.gdb.arm.sys.regs\">");
+ g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_xml, &param);
+ g_string_append_printf(s, "</feature>");
+ cpu->dyn_xml.desc = g_string_free(s, false);
+ return cpu->dyn_xml.num_cpregs;
+}
+
+const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+
+ if (strcmp(xmlname, "system-registers.xml") == 0) {
+ return cpu->dyn_xml.desc;
+ }
+ return NULL;
+}
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
new file mode 100644
index 0000000..94f4356
--- /dev/null
+++ b/target/arm/helper-sve.h
@@ -0,0 +1,427 @@
+/*
+ * AArch64 SVE specific helper definitions
+ *
+ * Copyright (c) 2018 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+DEF_HELPER_FLAGS_2(sve_predtest1, TCG_CALL_NO_WG, i32, i64, i64)
+DEF_HELPER_FLAGS_3(sve_predtest, TCG_CALL_NO_WG, i32, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_pfirst, TCG_CALL_NO_WG, i32, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_pnext, TCG_CALL_NO_WG, i32, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_and_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_and_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_and_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_and_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_eor_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_eor_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_eor_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_eor_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_orr_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_orr_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_orr_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_orr_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_bic_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bic_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bic_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bic_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_add_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_add_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_add_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_add_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_sub_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sub_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sub_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sub_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_smax_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smax_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smax_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smax_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_umax_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umax_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umax_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umax_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_smin_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smin_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smin_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smin_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_umin_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umin_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umin_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umin_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_sabd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sabd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sabd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sabd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_uabd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_uabd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_uabd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_uabd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_mul_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_mul_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_mul_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_mul_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_smulh_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smulh_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smulh_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_smulh_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_umulh_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umulh_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umulh_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_umulh_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_udiv_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_udiv_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_asr_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_asr_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_asr_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_asr_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_lsr_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsr_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsr_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsr_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_lsl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_asr_zpzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_lsr_zpzw_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsr_zpzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsr_zpzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_lsl_zpzw_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsl_zpzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_lsl_zpzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_orv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_orv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_orv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_orv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_eorv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_eorv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_eorv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_eorv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_andv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_andv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_andv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_andv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_saddv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_saddv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_saddv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_uaddv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uaddv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uaddv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uaddv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_smaxv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_smaxv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_smaxv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_smaxv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_umaxv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_umaxv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_umaxv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_umaxv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_sminv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_sminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_uminv_b, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_asr_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asr_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asr_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asr_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_lsr_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsr_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsr_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsr_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_lsl_zpzi_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsl_zpzi_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsl_zpzi_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsl_zpzi_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_asrd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asrd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asrd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asrd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cls_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cls_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_clz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_clz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_clz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_clz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cnt_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnt_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnt_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnt_zpz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_cnot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnot_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_cnot_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_not_zpz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_sxtb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_sxtb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_sxtb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_uxtb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uxtb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uxtb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_sxth_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_sxth_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_uxth_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uxth_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_sxtw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_uxtw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_abs_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_abs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_abs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_abs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_neg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_neg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_neg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_neg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve_mla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mla_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mla_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mla_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve_mls_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mls_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mls_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve_mls_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_index_b, TCG_CALL_NO_RWG, void, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_4(sve_index_h, TCG_CALL_NO_RWG, void, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_4(sve_index_s, TCG_CALL_NO_RWG, void, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_4(sve_index_d, TCG_CALL_NO_RWG, void, ptr, i64, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_asr_zzw_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asr_zzw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_asr_zzw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_lsr_zzw_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsr_zzw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsr_zzw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_lsl_zzw_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsl_zzw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_lsl_zzw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_adr_p32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_adr_p64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_adr_s32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_adr_u32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve_fexpa_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_fexpa_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve_fexpa_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_ftssel_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ftssel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve_ftssel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve_sqaddi_b, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32)
+DEF_HELPER_FLAGS_4(sve_sqaddi_h, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32)
+DEF_HELPER_FLAGS_4(sve_sqaddi_s, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32)
+DEF_HELPER_FLAGS_4(sve_sqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32)
+
+DEF_HELPER_FLAGS_4(sve_uqaddi_b, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32)
+DEF_HELPER_FLAGS_4(sve_uqaddi_h, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32)
+DEF_HELPER_FLAGS_4(sve_uqaddi_s, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32)
+DEF_HELPER_FLAGS_4(sve_uqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_uqsubi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_5(sve_cpy_m_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_5(sve_cpy_m_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_cpy_z_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(sve_ext, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_orr_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_orn_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_nor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_nand_pppp, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index db8bbe5..c0f7399 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -215,6 +215,29 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
}
}
+static int arm_gdb_get_sysreg(CPUARMState *env, uint8_t *buf, int reg)
+{
+ ARMCPU *cpu = arm_env_get_cpu(env);
+ const ARMCPRegInfo *ri;
+ uint32_t key;
+
+ key = cpu->dyn_xml.cpregs_keys[reg];
+ ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+ if (ri) {
+ if (cpreg_field_is_64bit(ri)) {
+ return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
+ } else {
+ return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
+ }
+ }
+ return 0;
+}
+
+static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg)
+{
+ return 0;
+}
+
static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
{
/* Return true if the regdef would cause an assertion if you called
@@ -690,12 +713,12 @@ static const ARMCPRegInfo cp_reginfo[] = {
* the secure register to be properly reset and migrated. There is also no
* v8 EL1 version of the register so the non-secure instance stands alone.
*/
- { .name = "FCSEIDR(NS)",
+ { .name = "FCSEIDR",
.cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0,
.access = PL1_RW, .secure = ARM_CP_SECSTATE_NS,
.fieldoffset = offsetof(CPUARMState, cp15.fcseidr_ns),
.resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
- { .name = "FCSEIDR(S)",
+ { .name = "FCSEIDR_S",
.cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0,
.access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
.fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s),
@@ -711,7 +734,7 @@ static const ARMCPRegInfo cp_reginfo[] = {
.access = PL1_RW, .secure = ARM_CP_SECSTATE_NS,
.fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]),
.resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
- { .name = "CONTEXTIDR(S)", .state = ARM_CP_STATE_AA32,
+ { .name = "CONTEXTIDR_S", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
.access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
.fieldoffset = offsetof(CPUARMState, cp15.contextidr_s),
@@ -1981,7 +2004,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
cp15.c14_timer[GTIMER_PHYS].ctl),
.writefn = gt_phys_ctl_write, .raw_writefn = raw_write,
},
- { .name = "CNTP_CTL(S)",
+ { .name = "CNTP_CTL_S",
.cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
.secure = ARM_CP_SECSTATE_S,
.type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
@@ -2020,7 +2043,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.accessfn = gt_ptimer_access,
.readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write,
},
- { .name = "CNTP_TVAL(S)",
+ { .name = "CNTP_TVAL_S",
.cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
.secure = ARM_CP_SECSTATE_S,
.type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
@@ -2074,7 +2097,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.accessfn = gt_ptimer_access,
.writefn = gt_phys_cval_write, .raw_writefn = raw_write,
},
- { .name = "CNTP_CVAL(S)", .cp = 15, .crm = 14, .opc1 = 2,
+ { .name = "CNTP_CVAL_S", .cp = 15, .crm = 14, .opc1 = 2,
.secure = ARM_CP_SECSTATE_S,
.access = PL1_RW | PL0_R,
.type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
@@ -5488,6 +5511,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
19, "arm-vfp.xml", 0);
}
+ gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg,
+ arm_gen_dynamic_xml(cs),
+ "system-registers.xml", 0);
}
/* Sort alphabetically by type name, except for "any". */
@@ -5577,7 +5603,8 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
void *opaque, int state, int secstate,
- int crm, int opc1, int opc2)
+ int crm, int opc1, int opc2,
+ const char *name)
{
/* Private utility function for define_one_arm_cp_reg_with_opaque():
* add a single reginfo struct to the hash table.
@@ -5587,6 +5614,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0;
int ns = (secstate & ARM_CP_SECSTATE_NS) ? 1 : 0;
+ r2->name = g_strdup(name);
/* Reset the secure state to the specific incoming state. This is
* necessary as the register may have been defined with both states.
*/
@@ -5678,7 +5706,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
if (((r->crm == CP_ANY) && crm != 0) ||
((r->opc1 == CP_ANY) && opc1 != 0) ||
((r->opc2 == CP_ANY) && opc2 != 0)) {
- r2->type |= ARM_CP_ALIAS;
+ r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB;
}
/* Check that raw accesses are either forbidden or handled. Note that
@@ -5818,19 +5846,24 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
/* Under AArch32 CP registers can be common
* (same for secure and non-secure world) or banked.
*/
+ char *name;
+
switch (r->secure) {
case ARM_CP_SECSTATE_S:
case ARM_CP_SECSTATE_NS:
add_cpreg_to_hashtable(cpu, r, opaque, state,
- r->secure, crm, opc1, opc2);
+ r->secure, crm, opc1, opc2,
+ r->name);
break;
default:
+ name = g_strdup_printf("%s_S", r->name);
add_cpreg_to_hashtable(cpu, r, opaque, state,
ARM_CP_SECSTATE_S,
- crm, opc1, opc2);
+ crm, opc1, opc2, name);
+ g_free(name);
add_cpreg_to_hashtable(cpu, r, opaque, state,
ARM_CP_SECSTATE_NS,
- crm, opc1, opc2);
+ crm, opc1, opc2, r->name);
break;
}
} else {
@@ -5838,7 +5871,7 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
* of AArch32 */
add_cpreg_to_hashtable(cpu, r, opaque, state,
ARM_CP_SECSTATE_NS,
- crm, opc1, opc2);
+ crm, opc1, opc2, r->name);
}
}
}
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 047f3bc..0c6a144 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -603,4 +603,5 @@ DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
#ifdef TARGET_AARCH64
#include "helper-a64.h"
+#include "helper-sve.h"
#endif
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
new file mode 100644
index 0000000..4761d19
--- /dev/null
+++ b/target/arm/sve.decode
@@ -0,0 +1,419 @@
+# AArch64 SVE instruction descriptions
+#
+# Copyright (c) 2017 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+###########################################################################
+# Named fields. These are primarily for disjoint fields.
+
+%imm4_16_p1 16:4 !function=plus1
+%imm6_22_5 22:1 5:5
+%imm8_16_10 16:5 10:3
+%imm9_16_10 16:s6 10:3
+
+# A combination of tsz:imm3 -- extract esize.
+%tszimm_esz 22:2 5:5 !function=tszimm_esz
+# A combination of tsz:imm3 -- extract (2 * esize) - (tsz:imm3)
+%tszimm_shr 22:2 5:5 !function=tszimm_shr
+# A combination of tsz:imm3 -- extract (tsz:imm3) - esize
+%tszimm_shl 22:2 5:5 !function=tszimm_shl
+
+# Similarly for the tszh/tszl pair at 22/16 for zzi
+%tszimm16_esz 22:2 16:5 !function=tszimm_esz
+%tszimm16_shr 22:2 16:5 !function=tszimm_shr
+%tszimm16_shl 22:2 16:5 !function=tszimm_shl
+
+# Signed 8-bit immediate, optionally shifted left by 8.
+%sh8_i8s 5:9 !function=expand_imm_sh8s
+
+# Either a copy of rd (at bit 0), or a different source
+# as propagated via the MOVPRFX instruction.
+%reg_movprfx 0:5
+
+###########################################################################
+# Named attribute sets. These are used to make nice(er) names
+# when creating helpers common to those for the individual
+# instruction patterns.
+
+&rr_esz rd rn esz
+&rri rd rn imm
+&rr_dbm rd rn dbm
+&rrri rd rn rm imm
+&rri_esz rd rn imm esz
+&rrr_esz rd rn rm esz
+&rpr_esz rd pg rn esz
+&rprr_s rd pg rn rm s
+&rprr_esz rd pg rn rm esz
+&rprrr_esz rd pg rn rm ra esz
+&rpri_esz rd pg rn imm esz
+&ptrue rd esz pat s
+&incdec_cnt rd pat esz imm d u
+&incdec2_cnt rd rn pat esz imm d u
+
+###########################################################################
+# Named instruction formats. These are generally used to
+# reduce the amount of duplication between instruction patterns.
+
+# Two operand with unused vector element size
+@pd_pn_e0 ........ ........ ....... rn:4 . rd:4 &rr_esz esz=0
+
+# Two operand
+@pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz
+@rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz
+
+# Three operand with unused vector element size
+@rd_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 &rrr_esz esz=0
+
+# Three predicate operand, with governing predicate, flag setting
+@pd_pg_pn_pm_s ........ . s:1 .. rm:4 .. pg:4 . rn:4 . rd:4 &rprr_s
+
+# Three operand, vector element size
+@rd_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 &rrr_esz
+
+# Three operand with "memory" size, aka immediate left shift
+@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
+
+# Two register operand, with governing predicate, vector element size
+@rdn_pg_rm ........ esz:2 ... ... ... pg:3 rm:5 rd:5 \
+ &rprr_esz rn=%reg_movprfx
+@rdm_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 \
+ &rprr_esz rm=%reg_movprfx
+
+# Three register operand, with governing predicate, vector element size
+@rda_pg_rn_rm ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5 \
+ &rprrr_esz ra=%reg_movprfx
+@rdn_pg_ra_rm ........ esz:2 . rm:5 ... pg:3 ra:5 rd:5 \
+ &rprrr_esz rn=%reg_movprfx
+
+# One register operand, with governing predicate, vector element size
+@rd_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 &rpr_esz
+
+# Two register operands with a 6-bit signed immediate.
+@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
+
+# Two register operand, one immediate operand, with predicate,
+# element size encoded as TSZHL. User must fill in imm.
+@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz
+
+# Similarly without predicate.
+@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz
+
+# Two register operand, one immediate operand, with 4-bit predicate.
+# User must fill in imm.
+@rdn_pg4 ........ esz:2 .. pg:4 ... ........ rd:5 \
+ &rpri_esz rn=%reg_movprfx
+
+# Two register operand, one encoded bitmask.
+@rdn_dbm ........ .. .... dbm:13 rd:5 \
+ &rr_dbm rn=%reg_movprfx
+
+# Basic Load/Store with 9-bit immediate offset
+@pd_rn_i9 ........ ........ ...... rn:5 . rd:4 \
+ &rri imm=%imm9_16_10
+@rd_rn_i9 ........ ........ ...... rn:5 rd:5 \
+ &rri imm=%imm9_16_10
+
+# One register, pattern, and uint4+1.
+# User must fill in U and D.
+@incdec_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \
+ &incdec_cnt imm=%imm4_16_p1
+@incdec2_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \
+ &incdec2_cnt imm=%imm4_16_p1 rn=%reg_movprfx
+
+###########################################################################
+# Instruction patterns. Grouped according to the SVE encodingindex.xhtml.
+
+### SVE Integer Arithmetic - Binary Predicated Group
+
+# SVE bitwise logical vector operations (predicated)
+ORR_zpzz 00000100 .. 011 000 000 ... ..... ..... @rdn_pg_rm
+EOR_zpzz 00000100 .. 011 001 000 ... ..... ..... @rdn_pg_rm
+AND_zpzz 00000100 .. 011 010 000 ... ..... ..... @rdn_pg_rm
+BIC_zpzz 00000100 .. 011 011 000 ... ..... ..... @rdn_pg_rm
+
+# SVE integer add/subtract vectors (predicated)
+ADD_zpzz 00000100 .. 000 000 000 ... ..... ..... @rdn_pg_rm
+SUB_zpzz 00000100 .. 000 001 000 ... ..... ..... @rdn_pg_rm
+SUB_zpzz 00000100 .. 000 011 000 ... ..... ..... @rdm_pg_rn # SUBR
+
+# SVE integer min/max/difference (predicated)
+SMAX_zpzz 00000100 .. 001 000 000 ... ..... ..... @rdn_pg_rm
+UMAX_zpzz 00000100 .. 001 001 000 ... ..... ..... @rdn_pg_rm
+SMIN_zpzz 00000100 .. 001 010 000 ... ..... ..... @rdn_pg_rm
+UMIN_zpzz 00000100 .. 001 011 000 ... ..... ..... @rdn_pg_rm
+SABD_zpzz 00000100 .. 001 100 000 ... ..... ..... @rdn_pg_rm
+UABD_zpzz 00000100 .. 001 101 000 ... ..... ..... @rdn_pg_rm
+
+# SVE integer multiply/divide (predicated)
+MUL_zpzz 00000100 .. 010 000 000 ... ..... ..... @rdn_pg_rm
+SMULH_zpzz 00000100 .. 010 010 000 ... ..... ..... @rdn_pg_rm
+UMULH_zpzz 00000100 .. 010 011 000 ... ..... ..... @rdn_pg_rm
+# Note that divide requires size >= 2; below 2 is unallocated.
+SDIV_zpzz 00000100 .. 010 100 000 ... ..... ..... @rdn_pg_rm
+UDIV_zpzz 00000100 .. 010 101 000 ... ..... ..... @rdn_pg_rm
+SDIV_zpzz 00000100 .. 010 110 000 ... ..... ..... @rdm_pg_rn # SDIVR
+UDIV_zpzz 00000100 .. 010 111 000 ... ..... ..... @rdm_pg_rn # UDIVR
+
+### SVE Integer Reduction Group
+
+# SVE bitwise logical reduction (predicated)
+ORV 00000100 .. 011 000 001 ... ..... ..... @rd_pg_rn
+EORV 00000100 .. 011 001 001 ... ..... ..... @rd_pg_rn
+ANDV 00000100 .. 011 010 001 ... ..... ..... @rd_pg_rn
+
+# SVE integer add reduction (predicated)
+# Note that saddv requires size != 3.
+UADDV 00000100 .. 000 001 001 ... ..... ..... @rd_pg_rn
+SADDV 00000100 .. 000 000 001 ... ..... ..... @rd_pg_rn
+
+# SVE integer min/max reduction (predicated)
+SMAXV 00000100 .. 001 000 001 ... ..... ..... @rd_pg_rn
+UMAXV 00000100 .. 001 001 001 ... ..... ..... @rd_pg_rn
+SMINV 00000100 .. 001 010 001 ... ..... ..... @rd_pg_rn
+UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
+
+### SVE Shift by Immediate - Predicated Group
+
+# SVE bitwise shift by immediate (predicated)
+ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \
+ @rdn_pg_tszimm imm=%tszimm_shr
+LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \
+ @rdn_pg_tszimm imm=%tszimm_shr
+LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \
+ @rdn_pg_tszimm imm=%tszimm_shl
+ASRD 00000100 .. 000 100 100 ... .. ... ..... \
+ @rdn_pg_tszimm imm=%tszimm_shr
+
+# SVE bitwise shift by vector (predicated)
+ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
+LSR_zpzz 00000100 .. 010 001 100 ... ..... ..... @rdn_pg_rm
+LSL_zpzz 00000100 .. 010 011 100 ... ..... ..... @rdn_pg_rm
+ASR_zpzz 00000100 .. 010 100 100 ... ..... ..... @rdm_pg_rn # ASRR
+LSR_zpzz 00000100 .. 010 101 100 ... ..... ..... @rdm_pg_rn # LSRR
+LSL_zpzz 00000100 .. 010 111 100 ... ..... ..... @rdm_pg_rn # LSLR
+
+# SVE bitwise shift by wide elements (predicated)
+# Note these require size != 3.
+ASR_zpzw 00000100 .. 011 000 100 ... ..... ..... @rdn_pg_rm
+LSR_zpzw 00000100 .. 011 001 100 ... ..... ..... @rdn_pg_rm
+LSL_zpzw 00000100 .. 011 011 100 ... ..... ..... @rdn_pg_rm
+
+### SVE Integer Arithmetic - Unary Predicated Group
+
+# SVE unary bit operations (predicated)
+# Note esz != 0 for FABS and FNEG.
+CLS 00000100 .. 011 000 101 ... ..... ..... @rd_pg_rn
+CLZ 00000100 .. 011 001 101 ... ..... ..... @rd_pg_rn
+CNT_zpz 00000100 .. 011 010 101 ... ..... ..... @rd_pg_rn
+CNOT 00000100 .. 011 011 101 ... ..... ..... @rd_pg_rn
+NOT_zpz 00000100 .. 011 110 101 ... ..... ..... @rd_pg_rn
+FABS 00000100 .. 011 100 101 ... ..... ..... @rd_pg_rn
+FNEG 00000100 .. 011 101 101 ... ..... ..... @rd_pg_rn
+
+# SVE integer unary operations (predicated)
+# Note esz > original size for extensions.
+ABS 00000100 .. 010 110 101 ... ..... ..... @rd_pg_rn
+NEG 00000100 .. 010 111 101 ... ..... ..... @rd_pg_rn
+SXTB 00000100 .. 010 000 101 ... ..... ..... @rd_pg_rn
+UXTB 00000100 .. 010 001 101 ... ..... ..... @rd_pg_rn
+SXTH 00000100 .. 010 010 101 ... ..... ..... @rd_pg_rn
+UXTH 00000100 .. 010 011 101 ... ..... ..... @rd_pg_rn
+SXTW 00000100 .. 010 100 101 ... ..... ..... @rd_pg_rn
+UXTW 00000100 .. 010 101 101 ... ..... ..... @rd_pg_rn
+
+### SVE Integer Multiply-Add Group
+
+# SVE integer multiply-add writing addend (predicated)
+MLA 00000100 .. 0 ..... 010 ... ..... ..... @rda_pg_rn_rm
+MLS 00000100 .. 0 ..... 011 ... ..... ..... @rda_pg_rn_rm
+
+# SVE integer multiply-add writing multiplicand (predicated)
+MLA 00000100 .. 0 ..... 110 ... ..... ..... @rdn_pg_ra_rm # MAD
+MLS 00000100 .. 0 ..... 111 ... ..... ..... @rdn_pg_ra_rm # MSB
+
+### SVE Integer Arithmetic - Unpredicated Group
+
+# SVE integer add/subtract vectors (unpredicated)
+ADD_zzz 00000100 .. 1 ..... 000 000 ..... ..... @rd_rn_rm
+SUB_zzz 00000100 .. 1 ..... 000 001 ..... ..... @rd_rn_rm
+SQADD_zzz 00000100 .. 1 ..... 000 100 ..... ..... @rd_rn_rm
+UQADD_zzz 00000100 .. 1 ..... 000 101 ..... ..... @rd_rn_rm
+SQSUB_zzz 00000100 .. 1 ..... 000 110 ..... ..... @rd_rn_rm
+UQSUB_zzz 00000100 .. 1 ..... 000 111 ..... ..... @rd_rn_rm
+
+### SVE Logical - Unpredicated Group
+
+# SVE bitwise logical operations (unpredicated)
+AND_zzz 00000100 00 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+ORR_zzz 00000100 01 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+EOR_zzz 00000100 10 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+BIC_zzz 00000100 11 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+
+### SVE Index Generation Group
+
+# SVE index generation (immediate start, immediate increment)
+INDEX_ii 00000100 esz:2 1 imm2:s5 010000 imm1:s5 rd:5
+
+# SVE index generation (immediate start, register increment)
+INDEX_ir 00000100 esz:2 1 rm:5 010010 imm:s5 rd:5
+
+# SVE index generation (register start, immediate increment)
+INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
+
+# SVE index generation (register start, register increment)
+INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
+
+### SVE Stack Allocation Group
+
+# SVE stack frame adjustment
+ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
+ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
+
+# SVE stack frame size
+RDVL 00000100 101 11111 01010 imm:s6 rd:5
+
+### SVE Bitwise Shift - Unpredicated Group
+
+# SVE bitwise shift by immediate (unpredicated)
+ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \
+ @rd_rn_tszimm imm=%tszimm16_shr
+LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \
+ @rd_rn_tszimm imm=%tszimm16_shr
+LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \
+ @rd_rn_tszimm imm=%tszimm16_shl
+
+# SVE bitwise shift by wide elements (unpredicated)
+# Note esz != 3
+ASR_zzw 00000100 .. 1 ..... 1000 00 ..... ..... @rd_rn_rm
+LSR_zzw 00000100 .. 1 ..... 1000 01 ..... ..... @rd_rn_rm
+LSL_zzw 00000100 .. 1 ..... 1000 11 ..... ..... @rd_rn_rm
+
+### SVE Compute Vector Address Group
+
+# SVE vector address generation
+ADR_s32 00000100 00 1 ..... 1010 .. ..... ..... @rd_rn_msz_rm
+ADR_u32 00000100 01 1 ..... 1010 .. ..... ..... @rd_rn_msz_rm
+ADR_p32 00000100 10 1 ..... 1010 .. ..... ..... @rd_rn_msz_rm
+ADR_p64 00000100 11 1 ..... 1010 .. ..... ..... @rd_rn_msz_rm
+
+### SVE Integer Misc - Unpredicated Group
+
+# SVE floating-point exponential accelerator
+# Note esz != 0
+FEXPA 00000100 .. 1 00000 101110 ..... ..... @rd_rn
+
+# SVE floating-point trig select coefficient
+# Note esz != 0
+FTSSEL 00000100 .. 1 ..... 101100 ..... ..... @rd_rn_rm
+
+### SVE Element Count Group
+
+# SVE element count
+CNT_r 00000100 .. 10 .... 1110 0 0 ..... ..... @incdec_cnt d=0 u=1
+
+# SVE inc/dec register by element count
+INCDEC_r 00000100 .. 11 .... 1110 0 d:1 ..... ..... @incdec_cnt u=1
+
+# SVE saturating inc/dec register by element count
+SINCDEC_r_32 00000100 .. 10 .... 1111 d:1 u:1 ..... ..... @incdec_cnt
+SINCDEC_r_64 00000100 .. 11 .... 1111 d:1 u:1 ..... ..... @incdec_cnt
+
+# SVE inc/dec vector by element count
+# Note this requires esz != 0.
+INCDEC_v 00000100 .. 1 1 .... 1100 0 d:1 ..... ..... @incdec2_cnt u=1
+
+# SVE saturating inc/dec vector by element count
+# Note these require esz != 0.
+SINCDEC_v 00000100 .. 1 0 .... 1100 d:1 u:1 ..... ..... @incdec2_cnt
+
+### SVE Bitwise Immediate Group
+
+# SVE bitwise logical with immediate (unpredicated)
+ORR_zzi 00000101 00 0000 ............. ..... @rdn_dbm
+EOR_zzi 00000101 01 0000 ............. ..... @rdn_dbm
+AND_zzi 00000101 10 0000 ............. ..... @rdn_dbm
+
+# SVE broadcast bitmask immediate
+DUPM 00000101 11 0000 dbm:13 rd:5
+
+### SVE Integer Wide Immediate - Predicated Group
+
+# SVE copy floating-point immediate (predicated)
+FCPY 00000101 .. 01 .... 110 imm:8 ..... @rdn_pg4
+
+# SVE copy integer immediate (predicated)
+CPY_m_i 00000101 .. 01 .... 01 . ........ ..... @rdn_pg4 imm=%sh8_i8s
+CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s
+
+### SVE Permute - Extract Group
+
+# SVE extract vector (immediate offset)
+EXT 00000101 001 ..... 000 ... rm:5 rd:5 \
+ &rrri rn=%reg_movprfx imm=%imm8_16_10
+
+### SVE Predicate Logical Operations Group
+
+# SVE predicate logical operations
+AND_pppp 00100101 0. 00 .... 01 .... 0 .... 0 .... @pd_pg_pn_pm_s
+BIC_pppp 00100101 0. 00 .... 01 .... 0 .... 1 .... @pd_pg_pn_pm_s
+EOR_pppp 00100101 0. 00 .... 01 .... 1 .... 0 .... @pd_pg_pn_pm_s
+SEL_pppp 00100101 0. 00 .... 01 .... 1 .... 1 .... @pd_pg_pn_pm_s
+ORR_pppp 00100101 1. 00 .... 01 .... 0 .... 0 .... @pd_pg_pn_pm_s
+ORN_pppp 00100101 1. 00 .... 01 .... 0 .... 1 .... @pd_pg_pn_pm_s
+NOR_pppp 00100101 1. 00 .... 01 .... 1 .... 0 .... @pd_pg_pn_pm_s
+NAND_pppp 00100101 1. 00 .... 01 .... 1 .... 1 .... @pd_pg_pn_pm_s
+
+### SVE Predicate Misc Group
+
+# SVE predicate test
+PTEST 00100101 01 010000 11 pg:4 0 rn:4 0 0000
+
+# SVE predicate initialize
+PTRUE 00100101 esz:2 01100 s:1 111000 pat:5 0 rd:4
+
+# SVE initialize FFR
+SETFFR 00100101 0010 1100 1001 0000 0000 0000
+
+# SVE zero predicate register
+PFALSE 00100101 0001 1000 1110 0100 0000 rd:4
+
+# SVE predicate read from FFR (predicated)
+RDFFR_p 00100101 0 s:1 0110001111000 pg:4 0 rd:4
+
+# SVE predicate read from FFR (unpredicated)
+RDFFR 00100101 0001 1001 1111 0000 0000 rd:4
+
+# SVE FFR write from predicate (WRFFR)
+WRFFR 00100101 0010 1000 1001 000 rn:4 00000
+
+# SVE predicate first active
+PFIRST 00100101 01 011 000 11000 00 .... 0 .... @pd_pn_e0
+
+# SVE predicate next active
+PNEXT 00100101 .. 011 001 11000 10 .... 0 .... @pd_pn
+
+### SVE Memory - 32-bit Gather and Unsized Contiguous Group
+
+# SVE load predicate register
+LDR_pri 10000101 10 ...... 000 ... ..... 0 .... @pd_rn_i9
+
+# SVE load vector register
+LDR_zri 10000101 10 ...... 010 ... ..... ..... @rd_rn_i9
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
new file mode 100644
index 0000000..b825e44
--- /dev/null
+++ b/target/arm/sve_helper.c
@@ -0,0 +1,1562 @@
+/*
+ * ARM SVE Operations
+ *
+ * Copyright (c) 2018 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "fpu/softfloat.h"
+
+
+/* Note that vector data is stored in host-endian 64-bit chunks,
+ so addressing units smaller than that needs a host-endian fixup. */
+#ifdef HOST_WORDS_BIGENDIAN
+#define H1(x) ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x) ((x) ^ 3)
+#define H4(x) ((x) ^ 1)
+#else
+#define H1(x) (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x) (x)
+#define H4(x) (x)
+#endif
+
+/* Return a value for NZCV as per the ARM PredTest pseudofunction.
+ *
+ * The return value has bit 31 set if N is set, bit 1 set if Z is clear,
+ * and bit 0 set if C is set. Compare the definitions of these variables
+ * within CPUARMState.
+ */
+
+/* For no G bits set, NZCV = C. */
+#define PREDTEST_INIT 1
+
+/* This is an iterative function, called for each Pd and Pg word
+ * moving forward.
+ */
+static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+ if (likely(g)) {
+ /* Compute N from first D & G.
+ Use bit 2 to signal first G bit seen. */
+ if (!(flags & 4)) {
+ flags |= ((d & (g & -g)) != 0) << 31;
+ flags |= 4;
+ }
+
+ /* Accumulate Z from each D & G. */
+ flags |= ((d & g) != 0) << 1;
+
+ /* Compute C from last !(D & G). Replace previous. */
+ flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0);
+ }
+ return flags;
+}
+
+/* The same for a single word predicate. */
+uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
+{
+ return iter_predtest_fwd(d, g, PREDTEST_INIT);
+}
+
+/* The same for a multi-word predicate. */
+uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
+{
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg;
+ uintptr_t i = 0;
+
+ do {
+ flags = iter_predtest_fwd(d[i], g[i], flags);
+ } while (++i < words);
+
+ return flags;
+}
+
+/* Expand active predicate bits to bytes, for byte elements.
+ * for (i = 0; i < 256; ++i) {
+ * unsigned long m = 0;
+ * for (j = 0; j < 8; j++) {
+ * if ((i >> j) & 1) {
+ * m |= 0xfful << (j << 3);
+ * }
+ * }
+ * printf("0x%016lx,\n", m);
+ * }
+ */
+static inline uint64_t expand_pred_b(uint8_t byte)
+{
+ static const uint64_t word[256] = {
+ 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
+ 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
+ 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
+ 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
+ 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
+ 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
+ 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
+ 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
+ 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
+ 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
+ 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
+ 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
+ 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
+ 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
+ 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
+ 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
+ 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
+ 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
+ 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
+ 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
+ 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
+ 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
+ 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
+ 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
+ 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
+ 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
+ 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
+ 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
+ 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
+ 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
+ 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
+ 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
+ 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
+ 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
+ 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
+ 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
+ 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
+ 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
+ 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
+ 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
+ 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
+ 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
+ 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
+ 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
+ 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
+ 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
+ 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
+ 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
+ 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
+ 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
+ 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
+ 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
+ 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
+ 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
+ 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
+ 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
+ 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
+ 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
+ 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
+ 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
+ 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
+ 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
+ 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
+ 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
+ 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
+ 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
+ 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
+ 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
+ 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
+ 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
+ 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
+ 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
+ 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
+ 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
+ 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
+ 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
+ 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
+ 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
+ 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
+ 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
+ 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
+ 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
+ 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
+ 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
+ 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
+ 0xffffffffffffffff,
+ };
+ return word[byte];
+}
+
+/* Similarly for half-word elements.
+ * for (i = 0; i < 256; ++i) {
+ * unsigned long m = 0;
+ * if (i & 0xaa) {
+ * continue;
+ * }
+ * for (j = 0; j < 8; j += 2) {
+ * if ((i >> j) & 1) {
+ * m |= 0xfffful << (j << 3);
+ * }
+ * }
+ * printf("[0x%x] = 0x%016lx,\n", i, m);
+ * }
+ */
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+ static const uint64_t word[] = {
+ [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
+ [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
+ [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
+ [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
+ [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
+ [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
+ [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
+ [0x55] = 0xffffffffffffffff,
+ };
+ return word[byte & 0x55];
+}
+
+/* Similarly for single word elements. */
+static inline uint64_t expand_pred_s(uint8_t byte)
+{
+ static const uint64_t word[] = {
+ [0x01] = 0x00000000ffffffffull,
+ [0x10] = 0xffffffff00000000ull,
+ [0x11] = 0xffffffffffffffffull,
+ };
+ return word[byte & 0x11];
+}
+
+#define LOGICAL_PPPP(NAME, FUNC) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ uintptr_t opr_sz = simd_oprsz(desc); \
+ uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \
+ uintptr_t i; \
+ for (i = 0; i < opr_sz / 8; ++i) { \
+ d[i] = FUNC(n[i], m[i], g[i]); \
+ } \
+}
+
+#define DO_AND(N, M, G) (((N) & (M)) & (G))
+#define DO_BIC(N, M, G) (((N) & ~(M)) & (G))
+#define DO_EOR(N, M, G) (((N) ^ (M)) & (G))
+#define DO_ORR(N, M, G) (((N) | (M)) & (G))
+#define DO_ORN(N, M, G) (((N) | ~(M)) & (G))
+#define DO_NOR(N, M, G) (~((N) | (M)) & (G))
+#define DO_NAND(N, M, G) (~((N) & (M)) & (G))
+#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G)))
+
+LOGICAL_PPPP(sve_and_pppp, DO_AND)
+LOGICAL_PPPP(sve_bic_pppp, DO_BIC)
+LOGICAL_PPPP(sve_eor_pppp, DO_EOR)
+LOGICAL_PPPP(sve_sel_pppp, DO_SEL)
+LOGICAL_PPPP(sve_orr_pppp, DO_ORR)
+LOGICAL_PPPP(sve_orn_pppp, DO_ORN)
+LOGICAL_PPPP(sve_nor_pppp, DO_NOR)
+LOGICAL_PPPP(sve_nand_pppp, DO_NAND)
+
+#undef DO_AND
+#undef DO_BIC
+#undef DO_EOR
+#undef DO_ORR
+#undef DO_ORN
+#undef DO_NOR
+#undef DO_NAND
+#undef DO_SEL
+#undef LOGICAL_PPPP
+
+/* Fully general three-operand expander, controlled by a predicate.
+ * This is complicated by the host-endian storage of the register file.
+ */
+/* ??? I don't expect the compiler could ever vectorize this itself.
+ * With some tables we can convert bit masks to byte masks, and with
+ * extra care wrt byte/word ordering we could use gcc generic vectors
+ * and do 16 bytes at a time.
+ */
+#define DO_ZPZZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i], mm = m[i]; \
+ d[i] = OP(nn, mm); \
+ } \
+ } \
+}
+
+#define DO_AND(N, M) (N & M)
+#define DO_EOR(N, M) (N ^ M)
+#define DO_ORR(N, M) (N | M)
+#define DO_BIC(N, M) (N & ~M)
+#define DO_ADD(N, M) (N + M)
+#define DO_SUB(N, M) (N - M)
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
+#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
+#define DO_MUL(N, M) (N * M)
+#define DO_DIV(N, M) (M ? N / M : 0)
+
+DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND)
+DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND)
+DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND)
+DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND)
+
+DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR)
+DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR)
+
+DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR)
+DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR)
+
+DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC)
+DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC)
+
+DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB)
+DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB)
+
+DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN)
+
+DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD)
+
+DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD)
+
+/* Because the computation type is at least twice as large as required,
+ these work for both signed and unsigned source types. */
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
+{
+ return (n * m) >> 8;
+}
+
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
+{
+ return (n * m) >> 16;
+}
+
+static inline uint32_t do_mulh_s(int64_t n, int64_t m)
+{
+ return (n * m) >> 32;
+}
+
+static inline uint64_t do_smulh_d(uint64_t n, uint64_t m)
+{
+ uint64_t lo, hi;
+ muls64(&lo, &hi, n, m);
+ return hi;
+}
+
+static inline uint64_t do_umulh_d(uint64_t n, uint64_t m)
+{
+ uint64_t lo, hi;
+ mulu64(&lo, &hi, n, m);
+ return hi;
+}
+
+DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL)
+DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL)
+
+DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d)
+
+DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d)
+
+DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_DIV)
+DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_DIV)
+
+DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_DIV)
+DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_DIV)
+
+/* Note that all bits of the shift are significant
+ and not modulo the element size. */
+#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1))
+#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0)
+#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0)
+
+DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL)
+
+DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR)
+DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR)
+DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
+
+#undef DO_ZPZZ
+#undef DO_ZPZZ_D
+
+/* Three-operand expander, controlled by a predicate, in which the
+ * third operand is "wide". That is, for D = N op M, the same 64-bit
+ * value of M is used with all of the narrower values of N.
+ */
+#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint8_t pg = *(uint8_t *)(vg + H1(i >> 3)); \
+ TYPEW mm = *(TYPEW *)(vm + i); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 7); \
+ } \
+}
+
+DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZPZW
+
+/* Fully general two-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i]; \
+ d[i] = OP(nn); \
+ } \
+ } \
+}
+
+#define DO_CLS_B(N) (clrsb32(N) - 24)
+#define DO_CLS_H(N) (clrsb32(N) - 16)
+
+DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B)
+DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H)
+DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32)
+DO_ZPZ_D(sve_cls_d, int64_t, clrsb64)
+
+#define DO_CLZ_B(N) (clz32(N) - 24)
+#define DO_CLZ_H(N) (clz32(N) - 16)
+
+DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B)
+DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H)
+DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32)
+DO_ZPZ_D(sve_clz_d, uint64_t, clz64)
+
+DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8)
+DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16)
+DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32)
+DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64)
+
+#define DO_CNOT(N) (N == 0)
+
+DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT)
+DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT)
+DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT)
+DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT)
+
+#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
+DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
+DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
+
+#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
+DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
+DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
+
+#define DO_NOT(N) (~N)
+
+DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
+DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT)
+DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT)
+DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT)
+
+#define DO_SXTB(N) ((int8_t)N)
+#define DO_SXTH(N) ((int16_t)N)
+#define DO_SXTS(N) ((int32_t)N)
+#define DO_UXTB(N) ((uint8_t)N)
+#define DO_UXTH(N) ((uint16_t)N)
+#define DO_UXTS(N) ((uint32_t)N)
+
+DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB)
+DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB)
+DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH)
+DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB)
+DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH)
+DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS)
+
+DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB)
+DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB)
+DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH)
+DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB)
+DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH)
+DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS)
+
+#define DO_ABS(N) (N < 0 ? -N : N)
+
+DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS)
+DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS)
+DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS)
+DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS)
+
+#define DO_NEG(N) (-N)
+
+DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG)
+DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
+DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
+DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
+
+/* Three-operand expander, unpredicated, in which the third operand is "wide".
+ */
+#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ TYPEW mm = *(TYPEW *)(vm + i); \
+ do { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ i += sizeof(TYPE); \
+ } while (i & 7); \
+ } \
+}
+
+DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZZW
+
+#undef DO_CLS_B
+#undef DO_CLS_H
+#undef DO_CLZ_B
+#undef DO_CLZ_H
+#undef DO_CNOT
+#undef DO_FABS
+#undef DO_FNEG
+#undef DO_ABS
+#undef DO_NEG
+#undef DO_ZPZ
+#undef DO_ZPZ_D
+
+/* Two-operand reduction expander, controlled by a predicate.
+ * The difference between TYPERED and TYPERET has to do with
+ * sign-extension. E.g. for SMAX, TYPERED must be signed,
+ * but TYPERET must be unsigned so that e.g. a 32-bit value
+ * is not sign-extended to the ABI uint64_t return type.
+ */
+/* ??? If we were to vectorize this by hand the reduction ordering
+ * would change. For integer operands, this is perfectly fine.
+ */
+#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPERED ret = INIT; \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEELT nn = *(TYPEELT *)(vn + H(i)); \
+ ret = OP(ret, nn); \
+ } \
+ i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \
+ } while (i & 15); \
+ } \
+ return (TYPERET)ret; \
+}
+
+#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPEE *n = vn; \
+ uint8_t *pg = vg; \
+ TYPER ret = INIT; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPEE nn = n[i]; \
+ ret = OP(ret, nn); \
+ } \
+ } \
+ return ret; \
+}
+
+DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR)
+DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR)
+DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR)
+DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR)
+
+DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR)
+DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR)
+DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR)
+DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR)
+
+DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND)
+DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND)
+DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND)
+DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND)
+
+DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+
+DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD)
+
+DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX)
+DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX)
+
+DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX)
+DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX)
+DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX)
+DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX)
+
+DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN)
+DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN)
+DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN)
+DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN)
+
+DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN)
+DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN)
+DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN)
+DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
+
+#undef DO_VPZ
+#undef DO_VPZ_D
+
+#undef DO_AND
+#undef DO_ORR
+#undef DO_EOR
+#undef DO_BIC
+#undef DO_ADD
+#undef DO_SUB
+#undef DO_MAX
+#undef DO_MIN
+#undef DO_ABD
+#undef DO_MUL
+#undef DO_DIV
+#undef DO_ASR
+#undef DO_LSR
+#undef DO_LSL
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ result is multiplied by the element size. This includes the not found
+ indication; e.g. not found for esz=3 is -8. */
+static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz)
+{
+ uint64_t mask = pred_esz_masks[esz];
+ intptr_t i = words;
+
+ do {
+ uint64_t this_g = g[--i] & mask;
+ if (this_g) {
+ return i * 64 + (63 - clz64(this_g));
+ }
+ } while (i > 0);
+ return (intptr_t)-1 << esz;
+}
+
+uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t words)
+{
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg;
+ intptr_t i = 0;
+
+ do {
+ uint64_t this_d = d[i];
+ uint64_t this_g = g[i];
+
+ if (this_g) {
+ if (!(flags & 4)) {
+ /* Set in D the first bit of G. */
+ this_d |= this_g & -this_g;
+ d[i] = this_d;
+ }
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ } while (++i < words);
+
+ return flags;
+}
+
+uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = extract32(pred_desc, 0, SIMD_OPRSZ_BITS);
+ intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg, esz_mask;
+ intptr_t i, next;
+
+ next = last_active_element(vd, words, esz) + (1 << esz);
+ esz_mask = pred_esz_masks[esz];
+
+ /* Similar to the pseudocode for pnext, but scaled by ESZ
+ so that we find the correct bit. */
+ if (next < words * 64) {
+ uint64_t mask = -1;
+
+ if (next & 63) {
+ mask = ~((1ull << (next & 63)) - 1);
+ next &= -64;
+ }
+ do {
+ uint64_t this_g = g[next / 64] & esz_mask & mask;
+ if (this_g != 0) {
+ next = (next & -64) + ctz64(this_g);
+ break;
+ }
+ next += 64;
+ mask = -1;
+ } while (next < words * 64);
+ }
+
+ i = 0;
+ do {
+ uint64_t this_d = 0;
+ if (i == next / 64) {
+ this_d = 1ull << (next & 63);
+ }
+ d[i] = this_d;
+ flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags);
+ } while (++i < words);
+
+ return flags;
+}
+
+/* Store zero into every active element of Zd. We will use this for two
+ * and three-operand predicated instructions for which logic dictates a
+ * zero result. In particular, logical shift by element size, which is
+ * otherwise undefined on the host.
+ *
+ * For element sizes smaller than uint64_t, we use tables to expand
+ * the N bits of the controlling predicate to a byte mask, and clear
+ * those bytes.
+ */
+void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] &= ~expand_pred_b(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] &= ~expand_pred_h(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] &= ~expand_pred_s(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+ for (i = 0; i < opr_sz; i += 1) {
+ if (pg[H1(i)] & 1) {
+ d[i] = 0;
+ }
+ }
+}
+
+/* Three-operand expander, immediate operand, controlled by a predicate.
+ */
+#define DO_ZPZI(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE imm = simd_data(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, imm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZI_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn; \
+ TYPE imm = simd_data(desc); \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i]; \
+ d[i] = OP(nn, imm); \
+ } \
+ } \
+}
+
+#define DO_SHR(N, M) (N >> M)
+#define DO_SHL(N, M) (N << M)
+
+/* Arithmetic shift right for division. This rounds negative numbers
+ toward zero as per signed division. Therefore before shifting,
+ when N is negative, add 2**M-1. */
+#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+
+DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
+
+DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
+
+DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
+DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
+
+DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
+DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
+DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
+DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+
+#undef DO_SHR
+#undef DO_SHL
+#undef DO_ASRD
+#undef DO_ZPZI
+#undef DO_ZPZI_D
+
+/* Fully general four-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZZZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
+ void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ TYPE aa = *(TYPE *)(va + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(aa, nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
+ void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *a = va, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE aa = a[i], nn = n[i], mm = m[i]; \
+ d[i] = OP(aa, nn, mm); \
+ } \
+ } \
+}
+
+#define DO_MLA(A, N, M) (A + N * M)
+#define DO_MLS(A, N, M) (A - N * M)
+
+DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA)
+DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS)
+
+DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA)
+DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS)
+
+DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA)
+DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS)
+
+DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA)
+DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS)
+
+#undef DO_MLA
+#undef DO_MLS
+#undef DO_ZPZZZ
+#undef DO_ZPZZZ_D
+
+void HELPER(sve_index_b)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H1(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_h)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H2(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_s)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H4(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_d)(void *vd, uint64_t start,
+ uint64_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = start + i * incr;
+ }
+}
+
+void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t sh = simd_data(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + (m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + (m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh);
+ }
+}
+
+void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint16_t coeff[] = {
+ 0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8,
+ 0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189,
+ 0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295,
+ 0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint16_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 5);
+ uint16_t exp = extract32(nn, 5, 5);
+ d[i] = coeff[idx] | (exp << 10);
+ }
+}
+
+void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint32_t coeff[] = {
+ 0x000000, 0x0164d2, 0x02cd87, 0x043a29,
+ 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5,
+ 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc,
+ 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d,
+ 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda,
+ 0x1ef532, 0x20b051, 0x227043, 0x243516,
+ 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+ 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4,
+ 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b,
+ 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd,
+ 0x45672a, 0x478d75, 0x49b9be, 0x4bec15,
+ 0x4e248c, 0x506334, 0x52a81e, 0x54f35b,
+ 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5,
+ 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+ 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177,
+ 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint32_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 6);
+ uint32_t exp = extract32(nn, 6, 8);
+ d[i] = coeff[idx] | (exp << 23);
+ }
+}
+
+void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint64_t coeff[] = {
+ 0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull,
+ 0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull,
+ 0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull,
+ 0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull,
+ 0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull,
+ 0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull,
+ 0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull,
+ 0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull,
+ 0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull,
+ 0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull,
+ 0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull,
+ 0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull,
+ 0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull,
+ 0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull,
+ 0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull,
+ 0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull,
+ 0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull,
+ 0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull,
+ 0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull,
+ 0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull,
+ 0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull,
+ 0xFA7C1819E90D8ull,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint64_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 6);
+ uint64_t exp = extract32(nn, 6, 11);
+ d[i] = coeff[idx] | (exp << 52);
+ }
+}
+
+void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint16_t nn = n[i];
+ uint16_t mm = m[i];
+ if (mm & 1) {
+ nn = float16_one;
+ }
+ d[i] = nn ^ (mm & 2) << 14;
+ }
+}
+
+void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint32_t nn = n[i];
+ uint32_t mm = m[i];
+ if (mm & 1) {
+ nn = float32_one;
+ }
+ d[i] = nn ^ (mm & 2) << 30;
+ }
+}
+
+void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t mm = m[i];
+ if (mm & 1) {
+ nn = float64_one;
+ }
+ d[i] = nn ^ (mm & 2) << 62;
+ }
+}
+
+/*
+ * Signed saturating addition with scalar operand.
+ */
+
+void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+ int r = *(int8_t *)(a + i) + b;
+ if (r > INT8_MAX) {
+ r = INT8_MAX;
+ } else if (r < INT8_MIN) {
+ r = INT8_MIN;
+ }
+ *(int8_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ int r = *(int16_t *)(a + i) + b;
+ if (r > INT16_MAX) {
+ r = INT16_MAX;
+ } else if (r < INT16_MIN) {
+ r = INT16_MIN;
+ }
+ *(int16_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ int64_t r = *(int32_t *)(a + i) + b;
+ if (r > INT32_MAX) {
+ r = INT32_MAX;
+ } else if (r < INT32_MIN) {
+ r = INT32_MIN;
+ }
+ *(int32_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ int64_t ai = *(int64_t *)(a + i);
+ int64_t r = ai + b;
+ if (((r ^ ai) & ~(ai ^ b)) < 0) {
+ /* Signed overflow. */
+ r = (r < 0 ? INT64_MAX : INT64_MIN);
+ }
+ *(int64_t *)(d + i) = r;
+ }
+}
+
+/*
+ * Unsigned saturating addition with scalar operand.
+ */
+
+void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ int r = *(uint8_t *)(a + i) + b;
+ if (r > UINT8_MAX) {
+ r = UINT8_MAX;
+ } else if (r < 0) {
+ r = 0;
+ }
+ *(uint8_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ int r = *(uint16_t *)(a + i) + b;
+ if (r > UINT16_MAX) {
+ r = UINT16_MAX;
+ } else if (r < 0) {
+ r = 0;
+ }
+ *(uint16_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ int64_t r = *(uint32_t *)(a + i) + b;
+ if (r > UINT32_MAX) {
+ r = UINT32_MAX;
+ } else if (r < 0) {
+ r = 0;
+ }
+ *(uint32_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint64_t r = *(uint64_t *)(a + i) + b;
+ if (r < b) {
+ r = UINT64_MAX;
+ }
+ *(uint64_t *)(d + i) = r;
+ }
+}
+
+void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint64_t ai = *(uint64_t *)(a + i);
+ *(uint64_t *)(d + i) = (ai < b ? 0 : ai - b);
+ }
+}
+
+/* Two operand predicated copy immediate with merge. All valid immediates
+ * can fit within 17 signed bits in the simd_data field.
+ */
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_8, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_b(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_16, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_h(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_32, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_s(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ d[i] = (pg[H1(i)] & 1 ? mm : nn);
+ }
+}
+
+void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_8, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_b(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_16, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_h(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_32, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_s(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = (pg[H1(i)] & 1 ? val : 0);
+ }
+}
+
+/* Big-endian hosts need to frob the byte indicies. If the copy
+ * happens to be 8-byte aligned, then no frobbing necessary.
+ */
+static void swap_memmove(void *vd, void *vs, size_t n)
+{
+ uintptr_t d = (uintptr_t)vd;
+ uintptr_t s = (uintptr_t)vs;
+ uintptr_t o = (d | s | n) & 7;
+ size_t i;
+
+#ifndef HOST_WORDS_BIGENDIAN
+ o = 0;
+#endif
+ switch (o) {
+ case 0:
+ memmove(vd, vs, n);
+ break;
+
+ case 4:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i += 4) {
+ *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 4;
+ *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+ }
+ }
+ break;
+
+ case 2:
+ case 6:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i += 2) {
+ *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 2;
+ *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+ }
+ }
+ break;
+
+ default:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i++) {
+ *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 1;
+ *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+ }
+ }
+ break;
+ }
+}
+
+void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t opr_sz = simd_oprsz(desc);
+ size_t n_ofs = simd_data(desc);
+ size_t n_siz = opr_sz - n_ofs;
+
+ if (vd != vm) {
+ swap_memmove(vd, vn + n_ofs, n_siz);
+ swap_memmove(vd + n_siz, vm, n_ofs);
+ } else if (vd != vn) {
+ swap_memmove(vd + n_siz, vd, n_ofs);
+ swap_memmove(vd, vn + n_ofs, n_siz);
+ } else {
+ /* vd == vn == vm. Need temp space. */
+ ARMVectorReg tmp;
+ swap_memmove(&tmp, vm, n_ofs);
+ swap_memmove(vd, vd + n_ofs, n_siz);
+ memcpy(vd + n_siz, &tmp, n_ofs);
+ }
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index d8284678..b32332c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -36,13 +36,13 @@
#include "exec/log.h"
#include "trace-tcg.h"
+#include "translate-a64.h"
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
/* Load/store exclusive handling */
static TCGv_i64 cpu_exclusive_high;
-static TCGv_i64 cpu_reg(DisasContext *s, int reg);
static const char *regnames[] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -86,13 +86,6 @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
-/* Note that the gvec expanders operate on offsets + sizes. */
-typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
-typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
- uint32_t, uint32_t);
-typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t);
-
/* initialize TCG globals. */
void a64_translate_init(void)
{
@@ -405,22 +398,13 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
}
}
-static void unallocated_encoding(DisasContext *s)
+void unallocated_encoding(DisasContext *s)
{
/* Unallocated and reserved encodings are uncategorized */
gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
default_exception_el(s));
}
-#define unsupported_encoding(s, insn) \
- do { \
- qemu_log_mask(LOG_UNIMP, \
- "%s:%d: unsupported instruction encoding 0x%08x " \
- "at pc=%016" PRIx64 "\n", \
- __FILE__, __LINE__, insn, s->pc - 4); \
- unallocated_encoding(s); \
- } while (0)
-
static void init_tmp_a64_array(DisasContext *s)
{
#ifdef CONFIG_DEBUG_TCG
@@ -438,13 +422,13 @@ static void free_tmp_a64(DisasContext *s)
init_tmp_a64_array(s);
}
-static TCGv_i64 new_tmp_a64(DisasContext *s)
+TCGv_i64 new_tmp_a64(DisasContext *s)
{
assert(s->tmp_a64_count < TMP_A64_MAX);
return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
}
-static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
+TCGv_i64 new_tmp_a64_zero(DisasContext *s)
{
TCGv_i64 t = new_tmp_a64(s);
tcg_gen_movi_i64(t, 0);
@@ -466,7 +450,7 @@ static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
* to cpu_X[31] and ZR accesses to a temporary which can be discarded.
* This is the point of the _sp forms.
*/
-static TCGv_i64 cpu_reg(DisasContext *s, int reg)
+TCGv_i64 cpu_reg(DisasContext *s, int reg)
{
if (reg == 31) {
return new_tmp_a64_zero(s);
@@ -476,7 +460,7 @@ static TCGv_i64 cpu_reg(DisasContext *s, int reg)
}
/* register access for when 31 == SP */
-static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
+TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
{
return cpu_X[reg];
}
@@ -485,7 +469,7 @@ static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
* representing the register contents. This TCGv is an auto-freed
* temporary so it need not be explicitly freed, and may be modified.
*/
-static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
+TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
{
TCGv_i64 v = new_tmp_a64(s);
if (reg != 31) {
@@ -500,7 +484,7 @@ static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
return v;
}
-static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
+TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
{
TCGv_i64 v = new_tmp_a64(s);
if (sf) {
@@ -511,72 +495,6 @@ static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
return v;
}
-/* We should have at some point before trying to access an FP register
- * done the necessary access check, so assert that
- * (a) we did the check and
- * (b) we didn't then just plough ahead anyway if it failed.
- * Print the instruction pattern in the abort message so we can figure
- * out what we need to fix if a user encounters this problem in the wild.
- */
-static inline void assert_fp_access_checked(DisasContext *s)
-{
-#ifdef CONFIG_DEBUG_TCG
- if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
- fprintf(stderr, "target-arm: FP access check missing for "
- "instruction 0x%08x\n", s->insn);
- abort();
- }
-#endif
-}
-
-/* Return the offset into CPUARMState of an element of specified
- * size, 'element' places in from the least significant end of
- * the FP/vector register Qn.
- */
-static inline int vec_reg_offset(DisasContext *s, int regno,
- int element, TCGMemOp size)
-{
- int offs = 0;
-#ifdef HOST_WORDS_BIGENDIAN
- /* This is complicated slightly because vfp.zregs[n].d[0] is
- * still the low half and vfp.zregs[n].d[1] the high half
- * of the 128 bit vector, even on big endian systems.
- * Calculate the offset assuming a fully bigendian 128 bits,
- * then XOR to account for the order of the two 64 bit halves.
- */
- offs += (16 - ((element + 1) * (1 << size)));
- offs ^= 8;
-#else
- offs += element * (1 << size);
-#endif
- offs += offsetof(CPUARMState, vfp.zregs[regno]);
- assert_fp_access_checked(s);
- return offs;
-}
-
-/* Return the offset info CPUARMState of the "whole" vector register Qn. */
-static inline int vec_full_reg_offset(DisasContext *s, int regno)
-{
- assert_fp_access_checked(s);
- return offsetof(CPUARMState, vfp.zregs[regno]);
-}
-
-/* Return a newly allocated pointer to the vector register. */
-static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
-{
- TCGv_ptr ret = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
- return ret;
-}
-
-/* Return the byte size of the "whole" vector register, VL / 8. */
-static inline int vec_full_reg_size(DisasContext *s)
-{
- /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
- In the meantime this is just the AdvSIMD length of 128. */
- return 128 / 8;
-}
-
/* Return the offset into CPUARMState of a slice (from
* the least significant end) of FP register Qn (ie
* Dn, Sn, Hn or Bn).
@@ -641,7 +559,7 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
}
}
-static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
+void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
{
unsigned ofs = fp_reg_offset(s, reg, MO_64);
@@ -658,7 +576,7 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
tcg_temp_free_i64(tmp);
}
-static TCGv_ptr get_fpstatus_ptr(bool is_f16)
+TCGv_ptr get_fpstatus_ptr(bool is_f16)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
int offset;
@@ -1246,14 +1164,14 @@ static inline bool fp_access_check(DisasContext *s)
/* Check that SVE access is enabled. If it is, return true.
* If not, emit code to generate an appropriate exception and return false.
*/
-static inline bool sve_access_check(DisasContext *s)
+bool sve_access_check(DisasContext *s)
{
if (s->sve_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
s->sve_excp_el);
return false;
}
- return true;
+ return fp_access_check(s);
}
/*
@@ -3419,8 +3337,8 @@ static inline uint64_t bitmask64(unsigned int length)
* value (ie should cause a guest UNDEF exception), and true if they are
* valid, in which case the decoded bit pattern is written to result.
*/
-static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
- unsigned int imms, unsigned int immr)
+bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
+ unsigned int imms, unsigned int immr)
{
uint64_t mask;
unsigned e, levels, s, r;
@@ -5650,7 +5568,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
* the range 01....1xx to 10....0xx, and the most significant 4 bits of
* the mantissa; see VFPExpandImm() in the v8 ARM ARM.
*/
-static uint64_t vfp_expand_imm(int size, uint8_t imm8)
+uint64_t vfp_expand_imm(int size, uint8_t imm8)
{
uint64_t imm;
@@ -13758,9 +13676,14 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
s->fp_access_checked = false;
switch (extract32(insn, 25, 4)) {
- case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
+ case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
unallocated_encoding(s);
break;
+ case 0x2:
+ if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
+ unallocated_encoding(s);
+ }
+ break;
case 0x8: case 0x9: /* Data processing - immediate */
disas_data_proc_imm(s, insn);
break;
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
new file mode 100644
index 0000000..dd9c09f
--- /dev/null
+++ b/target/arm/translate-a64.h
@@ -0,0 +1,118 @@
+/*
+ * AArch64 translation, common definitions.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_TRANSLATE_A64_H
+#define TARGET_ARM_TRANSLATE_A64_H
+
+void unallocated_encoding(DisasContext *s);
+
+#define unsupported_encoding(s, insn) \
+ do { \
+ qemu_log_mask(LOG_UNIMP, \
+ "%s:%d: unsupported instruction encoding 0x%08x " \
+ "at pc=%016" PRIx64 "\n", \
+ __FILE__, __LINE__, insn, s->pc - 4); \
+ unallocated_encoding(s); \
+ } while (0)
+
+TCGv_i64 new_tmp_a64(DisasContext *s);
+TCGv_i64 new_tmp_a64_zero(DisasContext *s);
+TCGv_i64 cpu_reg(DisasContext *s, int reg);
+TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
+TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf);
+TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf);
+void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
+TCGv_ptr get_fpstatus_ptr(bool);
+bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
+ unsigned int imms, unsigned int immr);
+uint64_t vfp_expand_imm(int size, uint8_t imm8);
+bool sve_access_check(DisasContext *s);
+
+/* We should have at some point before trying to access an FP register
+ * done the necessary access check, so assert that
+ * (a) we did the check and
+ * (b) we didn't then just plough ahead anyway if it failed.
+ * Print the instruction pattern in the abort message so we can figure
+ * out what we need to fix if a user encounters this problem in the wild.
+ */
+static inline void assert_fp_access_checked(DisasContext *s)
+{
+#ifdef CONFIG_DEBUG_TCG
+ if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
+ fprintf(stderr, "target-arm: FP access check missing for "
+ "instruction 0x%08x\n", s->insn);
+ abort();
+ }
+#endif
+}
+
+/* Return the offset into CPUARMState of an element of specified
+ * size, 'element' places in from the least significant end of
+ * the FP/vector register Qn.
+ */
+static inline int vec_reg_offset(DisasContext *s, int regno,
+ int element, TCGMemOp size)
+{
+ int offs = 0;
+#ifdef HOST_WORDS_BIGENDIAN
+ /* This is complicated slightly because vfp.zregs[n].d[0] is
+ * still the low half and vfp.zregs[n].d[1] the high half
+ * of the 128 bit vector, even on big endian systems.
+ * Calculate the offset assuming a fully bigendian 128 bits,
+ * then XOR to account for the order of the two 64 bit halves.
+ */
+ offs += (16 - ((element + 1) * (1 << size)));
+ offs ^= 8;
+#else
+ offs += element * (1 << size);
+#endif
+ offs += offsetof(CPUARMState, vfp.zregs[regno]);
+ assert_fp_access_checked(s);
+ return offs;
+}
+
+/* Return the offset info CPUARMState of the "whole" vector register Qn. */
+static inline int vec_full_reg_offset(DisasContext *s, int regno)
+{
+ assert_fp_access_checked(s);
+ return offsetof(CPUARMState, vfp.zregs[regno]);
+}
+
+/* Return a newly allocated pointer to the vector register. */
+static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
+{
+ TCGv_ptr ret = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
+ return ret;
+}
+
+/* Return the byte size of the "whole" vector register, VL / 8. */
+static inline int vec_full_reg_size(DisasContext *s)
+{
+ return s->sve_len;
+}
+
+bool disas_sve(DisasContext *, uint32_t);
+
+/* Note that the gvec expanders operate on offsets + sizes. */
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
+ uint32_t, uint32_t);
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+
+#endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
new file mode 100644
index 0000000..c48d4b5
--- /dev/null
+++ b/target/arm/translate-sve.c
@@ -0,0 +1,2070 @@
+/*
+ * AArch64 SVE translation
+ *
+ * Copyright (c) 2018 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg-op.h"
+#include "tcg-op-gvec.h"
+#include "tcg-gvec-desc.h"
+#include "qemu/log.h"
+#include "arm_ldst.h"
+#include "translate.h"
+#include "internals.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "exec/log.h"
+#include "trace-tcg.h"
+#include "translate-a64.h"
+
+/*
+ * Helpers for extracting complex instruction fields.
+ */
+
+/* See e.g. ASR (immediate, predicated).
+ * Returns -1 for unallocated encoding; diagnose later.
+ */
+static int tszimm_esz(int x)
+{
+ x >>= 3; /* discard imm3 */
+ return 31 - clz32(x);
+}
+
+static int tszimm_shr(int x)
+{
+ return (16 << tszimm_esz(x)) - x;
+}
+
+/* See e.g. LSL (immediate, predicated). */
+static int tszimm_shl(int x)
+{
+ return x - (8 << tszimm_esz(x));
+}
+
+static inline int plus1(int x)
+{
+ return x + 1;
+}
+
+/* The SH bit is in bit 8. Extract the low 8 and shift. */
+static inline int expand_imm_sh8s(int x)
+{
+ return (int8_t)x << (x & 0x100 ? 8 : 0);
+}
+
+/*
+ * Include the generated decoder.
+ */
+
+#include "decode-sve.inc.c"
+
+/*
+ * Implement all of the translator functions referenced by the decoder.
+ */
+
+/* Return the offset info CPUARMState of the predicate vector register Pn.
+ * Note for this purpose, FFR is P16.
+ */
+static inline int pred_full_reg_offset(DisasContext *s, int regno)
+{
+ return offsetof(CPUARMState, vfp.pregs[regno]);
+}
+
+/* Return the byte size of the whole predicate register, VL / 64. */
+static inline int pred_full_reg_size(DisasContext *s)
+{
+ return s->sve_len >> 3;
+}
+
+/* Round up the size of a register to a size allowed by
+ * the tcg vector infrastructure. Any operation which uses this
+ * size may assume that the bits above pred_full_reg_size are zero,
+ * and must leave them the same way.
+ *
+ * Note that this is not needed for the vector registers as they
+ * are always properly sized for tcg vectors.
+ */
+static int size_for_gvec(int size)
+{
+ if (size <= 8) {
+ return 8;
+ } else {
+ return QEMU_ALIGN_UP(size, 16);
+ }
+}
+
+static int pred_gvec_reg_size(DisasContext *s)
+{
+ return size_for_gvec(pred_full_reg_size(s));
+}
+
+/* Invoke a vector expander on two Zregs. */
+static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
+ int esz, int rd, int rn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), vsz, vsz);
+ }
+ return true;
+}
+
+/* Invoke a vector expander on three Zregs. */
+static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vsz, vsz);
+ }
+ return true;
+}
+
+/* Invoke a vector move on two Zregs. */
+static bool do_mov_z(DisasContext *s, int rd, int rn)
+{
+ return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
+}
+
+/* Initialize a Zreg with replications of a 64-bit immediate. */
+static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
+}
+
+/* Invoke a vector expander on two Pregs. */
+static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
+ int esz, int rd, int rn)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(esz, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn), psz, psz);
+ }
+ return true;
+}
+
+/* Invoke a vector expander on three Pregs. */
+static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(esz, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm), psz, psz);
+ }
+ return true;
+}
+
+/* Invoke a vector operation on four Pregs. */
+static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
+ int rd, int rn, int rm, int rg)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, rg),
+ psz, psz, gvec_op);
+ }
+ return true;
+}
+
+/* Invoke a vector move on two Pregs. */
+static bool do_mov_p(DisasContext *s, int rd, int rn)
+{
+ return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
+}
+
+/* Set the cpu flags as per a return from an SVE helper. */
+static void do_pred_flags(TCGv_i32 t)
+{
+ tcg_gen_mov_i32(cpu_NF, t);
+ tcg_gen_andi_i32(cpu_ZF, t, 2);
+ tcg_gen_andi_i32(cpu_CF, t, 1);
+ tcg_gen_movi_i32(cpu_VF, 0);
+}
+
+/* Subroutines computing the ARM PredTest psuedofunction. */
+static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_helper_sve_predtest1(t, d, g);
+ do_pred_flags(t);
+ tcg_temp_free_i32(t);
+}
+
+static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
+{
+ TCGv_ptr dptr = tcg_temp_new_ptr();
+ TCGv_ptr gptr = tcg_temp_new_ptr();
+ TCGv_i32 t;
+
+ tcg_gen_addi_ptr(dptr, cpu_env, dofs);
+ tcg_gen_addi_ptr(gptr, cpu_env, gofs);
+ t = tcg_const_i32(words);
+
+ gen_helper_sve_predtest(t, dptr, gptr, t);
+ tcg_temp_free_ptr(dptr);
+ tcg_temp_free_ptr(gptr);
+
+ do_pred_flags(t);
+ tcg_temp_free_i32(t);
+}
+
+/* For each element size, the bits within a predicate word that are active. */
+const uint64_t pred_esz_masks[4] = {
+ 0xffffffffffffffffull, 0x5555555555555555ull,
+ 0x1111111111111111ull, 0x0101010101010101ull
+};
+
+/*
+ *** SVE Logical - Unpredicated Group
+ */
+
+static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
+}
+
+static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ if (a->rn == a->rm) { /* MOV */
+ return do_mov_z(s, a->rd, a->rn);
+ } else {
+ return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
+ }
+}
+
+static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
+}
+
+static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
+}
+
+/*
+ *** SVE Integer Arithmetic - Unpredicated Group
+ */
+
+static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
+}
+
+static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
+}
+
+static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
+}
+
+static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
+}
+
+static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
+}
+
+static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
+}
+
+/*
+ *** SVE Integer Arithmetic - Binary Predicated Group
+ */
+
+static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_ZPZZ(NAME, name) \
+static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_4 * const fns[4] = { \
+ gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
+ gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
+ }; \
+ return do_zpzz_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZPZZ(AND, and)
+DO_ZPZZ(EOR, eor)
+DO_ZPZZ(ORR, orr)
+DO_ZPZZ(BIC, bic)
+
+DO_ZPZZ(ADD, add)
+DO_ZPZZ(SUB, sub)
+
+DO_ZPZZ(SMAX, smax)
+DO_ZPZZ(UMAX, umax)
+DO_ZPZZ(SMIN, smin)
+DO_ZPZZ(UMIN, umin)
+DO_ZPZZ(SABD, sabd)
+DO_ZPZZ(UABD, uabd)
+
+DO_ZPZZ(MUL, mul)
+DO_ZPZZ(SMULH, smulh)
+DO_ZPZZ(UMULH, umulh)
+
+DO_ZPZZ(ASR, asr)
+DO_ZPZZ(LSR, lsr)
+DO_ZPZZ(LSL, lsl)
+
+static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_4 * const fns[4] = {
+ NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
+ };
+ return do_zpzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_4 * const fns[4] = {
+ NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
+ };
+ return do_zpzz_ool(s, a, fns[a->esz]);
+}
+
+#undef DO_ZPZZ
+
+/*
+ *** SVE Integer Arithmetic - Unary Predicated Group
+ */
+
+static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_ZPZ(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
+{ \
+ static gen_helper_gvec_3 * const fns[4] = { \
+ gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ return do_zpz_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZPZ(CLS, cls)
+DO_ZPZ(CLZ, clz)
+DO_ZPZ(CNT_zpz, cnt_zpz)
+DO_ZPZ(CNOT, cnot)
+DO_ZPZ(NOT_zpz, not_zpz)
+DO_ZPZ(ABS, abs)
+DO_ZPZ(NEG, neg)
+
+static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_fabs_h,
+ gen_helper_sve_fabs_s,
+ gen_helper_sve_fabs_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_fneg_h,
+ gen_helper_sve_fneg_s,
+ gen_helper_sve_fneg_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_sxtb_h,
+ gen_helper_sve_sxtb_s,
+ gen_helper_sve_sxtb_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_uxtb_h,
+ gen_helper_sve_uxtb_s,
+ gen_helper_sve_uxtb_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL, NULL,
+ gen_helper_sve_sxth_s,
+ gen_helper_sve_sxth_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL, NULL,
+ gen_helper_sve_uxth_s,
+ gen_helper_sve_uxth_d
+ };
+ return do_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
+}
+
+static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
+}
+
+#undef DO_ZPZ
+
+/*
+ *** SVE Integer Reduction Group
+ */
+
+typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
+static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
+ gen_helper_gvec_reduc *fn)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr t_zn, t_pg;
+ TCGv_i32 desc;
+ TCGv_i64 temp;
+
+ if (fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+ temp = tcg_temp_new_i64();
+ t_zn = tcg_temp_new_ptr();
+ t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
+ fn(temp, t_zn, t_pg, desc);
+ tcg_temp_free_ptr(t_zn);
+ tcg_temp_free_ptr(t_pg);
+ tcg_temp_free_i32(desc);
+
+ write_fp_dreg(s, a->rd, temp);
+ tcg_temp_free_i64(temp);
+ return true;
+}
+
+#define DO_VPZ(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
+{ \
+ static gen_helper_gvec_reduc * const fns[4] = { \
+ gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ return do_vpz_ool(s, a, fns[a->esz]); \
+}
+
+DO_VPZ(ORV, orv)
+DO_VPZ(ANDV, andv)
+DO_VPZ(EORV, eorv)
+
+DO_VPZ(UADDV, uaddv)
+DO_VPZ(SMAXV, smaxv)
+DO_VPZ(UMAXV, umaxv)
+DO_VPZ(SMINV, sminv)
+DO_VPZ(UMINV, uminv)
+
+static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_reduc * const fns[4] = {
+ gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
+ gen_helper_sve_saddv_s, NULL
+ };
+ return do_vpz_ool(s, a, fns[a->esz]);
+}
+
+#undef DO_VPZ
+
+/*
+ *** SVE Shift by Immediate - Predicated Group
+ */
+
+/* Store zero into every active element of Zd. We will use this for two
+ * and three-operand predicated instructions for which logic dictates a
+ * zero result.
+ */
+static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
+{
+ static gen_helper_gvec_2 * const fns[4] = {
+ gen_helper_sve_clr_b, gen_helper_sve_clr_h,
+ gen_helper_sve_clr_s, gen_helper_sve_clr_d,
+ };
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, 0, fns[esz]);
+ }
+ return true;
+}
+
+static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
+ gen_helper_gvec_3 *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, a->imm, fn);
+ }
+ return true;
+}
+
+static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
+ gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
+ };
+ if (a->esz < 0) {
+ /* Invalid tsz encoding -- see tszimm_esz. */
+ return false;
+ }
+ /* Shift by element size is architecturally valid. For
+ arithmetic right-shift, it's the same as by one less. */
+ a->imm = MIN(a->imm, (8 << a->esz) - 1);
+ return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
+ gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
+ };
+ if (a->esz < 0) {
+ return false;
+ }
+ /* Shift by element size is architecturally valid.
+ For logical shifts, it is a zeroing operation. */
+ if (a->imm >= (8 << a->esz)) {
+ return do_clr_zp(s, a->rd, a->pg, a->esz);
+ } else {
+ return do_zpzi_ool(s, a, fns[a->esz]);
+ }
+}
+
+static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
+ gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
+ };
+ if (a->esz < 0) {
+ return false;
+ }
+ /* Shift by element size is architecturally valid.
+ For logical shifts, it is a zeroing operation. */
+ if (a->imm >= (8 << a->esz)) {
+ return do_clr_zp(s, a->rd, a->pg, a->esz);
+ } else {
+ return do_zpzi_ool(s, a, fns[a->esz]);
+ }
+}
+
+static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
+ gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
+ };
+ if (a->esz < 0) {
+ return false;
+ }
+ /* Shift by element size is architecturally valid. For arithmetic
+ right shift for division, it is a zeroing operation. */
+ if (a->imm >= (8 << a->esz)) {
+ return do_clr_zp(s, a->rd, a->pg, a->esz);
+ } else {
+ return do_zpzi_ool(s, a, fns[a->esz]);
+ }
+}
+
+/*
+ *** SVE Bitwise Shift - Predicated Group
+ */
+
+#define DO_ZPZW(NAME, name) \
+static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_4 * const fns[3] = { \
+ gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
+ gen_helper_sve_##name##_zpzw_s, \
+ }; \
+ if (a->esz < 0 || a->esz >= 3) { \
+ return false; \
+ } \
+ return do_zpzz_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZPZW(ASR, asr)
+DO_ZPZW(LSR, lsr)
+DO_ZPZW(LSL, lsl)
+
+#undef DO_ZPZW
+
+/*
+ *** SVE Bitwise Shift - Unpredicated Group
+ */
+
+static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
+ void (*gvec_fn)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ if (a->esz < 0) {
+ /* Invalid tsz encoding -- see tszimm_esz. */
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ /* Shift by element size is architecturally valid. For
+ arithmetic right-shift, it's the same as by one less.
+ Otherwise it is a zeroing operation. */
+ if (a->imm >= 8 << a->esz) {
+ if (asr) {
+ a->imm = (8 << a->esz) - 1;
+ } else {
+ do_dupi_z(s, a->rd, 0);
+ return true;
+ }
+ }
+ gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+ }
+ return true;
+}
+
+static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
+}
+
+static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
+}
+
+static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
+{
+ return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
+}
+
+static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_ZZW(NAME, name) \
+static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
+ uint32_t insn) \
+{ \
+ static gen_helper_gvec_3 * const fns[4] = { \
+ gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
+ gen_helper_sve_##name##_zzw_s, NULL \
+ }; \
+ return do_zzw_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZZW(ASR, asr)
+DO_ZZW(LSR, lsr)
+DO_ZZW(LSL, lsl)
+
+#undef DO_ZZW
+
+/*
+ *** SVE Integer Multiply-Add Group
+ */
+
+static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
+ gen_helper_gvec_5 *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->ra),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_ZPZZZ(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
+{ \
+ static gen_helper_gvec_5 * const fns[4] = { \
+ gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ return do_zpzzz_ool(s, a, fns[a->esz]); \
+}
+
+DO_ZPZZZ(MLA, mla)
+DO_ZPZZZ(MLS, mls)
+
+#undef DO_ZPZZZ
+
+/*
+ *** SVE Index Generation Group
+ */
+
+static void do_index(DisasContext *s, int esz, int rd,
+ TCGv_i64 start, TCGv_i64 incr)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+ TCGv_ptr t_zd = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
+ if (esz == 3) {
+ gen_helper_sve_index_d(t_zd, start, incr, desc);
+ } else {
+ typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
+ static index_fn * const fns[3] = {
+ gen_helper_sve_index_b,
+ gen_helper_sve_index_h,
+ gen_helper_sve_index_s,
+ };
+ TCGv_i32 s32 = tcg_temp_new_i32();
+ TCGv_i32 i32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(s32, start);
+ tcg_gen_extrl_i64_i32(i32, incr);
+ fns[esz](t_zd, s32, i32, desc);
+
+ tcg_temp_free_i32(s32);
+ tcg_temp_free_i32(i32);
+ }
+ tcg_temp_free_ptr(t_zd);
+ tcg_temp_free_i32(desc);
+}
+
+static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 start = tcg_const_i64(a->imm1);
+ TCGv_i64 incr = tcg_const_i64(a->imm2);
+ do_index(s, a->esz, a->rd, start, incr);
+ tcg_temp_free_i64(start);
+ tcg_temp_free_i64(incr);
+ }
+ return true;
+}
+
+static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 start = tcg_const_i64(a->imm);
+ TCGv_i64 incr = cpu_reg(s, a->rm);
+ do_index(s, a->esz, a->rd, start, incr);
+ tcg_temp_free_i64(start);
+ }
+ return true;
+}
+
+static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 start = cpu_reg(s, a->rn);
+ TCGv_i64 incr = tcg_const_i64(a->imm);
+ do_index(s, a->esz, a->rd, start, incr);
+ tcg_temp_free_i64(incr);
+ }
+ return true;
+}
+
+static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 start = cpu_reg(s, a->rn);
+ TCGv_i64 incr = cpu_reg(s, a->rm);
+ do_index(s, a->esz, a->rd, start, incr);
+ }
+ return true;
+}
+
+/*
+ *** SVE Stack Allocation Group
+ */
+
+static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
+{
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
+ return true;
+}
+
+static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
+{
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
+ return true;
+}
+
+static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
+{
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
+ return true;
+}
+
+/*
+ *** SVE Compute Vector Address Group
+ */
+
+static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, a->imm, fn);
+ }
+ return true;
+}
+
+static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
+{
+ return do_adr(s, a, gen_helper_sve_adr_p32);
+}
+
+static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
+{
+ return do_adr(s, a, gen_helper_sve_adr_p64);
+}
+
+static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
+{
+ return do_adr(s, a, gen_helper_sve_adr_s32);
+}
+
+static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
+{
+ return do_adr(s, a, gen_helper_sve_adr_u32);
+}
+
+/*
+ *** SVE Integer Misc - Unpredicated Group
+ */
+
+static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_2 * const fns[4] = {
+ NULL,
+ gen_helper_sve_fexpa_h,
+ gen_helper_sve_fexpa_s,
+ gen_helper_sve_fexpa_d,
+ };
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ NULL,
+ gen_helper_sve_ftssel_h,
+ gen_helper_sve_ftssel_s,
+ gen_helper_sve_ftssel_d,
+ };
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+/*
+ *** SVE Predicate Logical Operations Group
+ */
+
+static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
+ const GVecGen4 *gvec_op)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned psz = pred_gvec_reg_size(s);
+ int dofs = pred_full_reg_offset(s, a->rd);
+ int nofs = pred_full_reg_offset(s, a->rn);
+ int mofs = pred_full_reg_offset(s, a->rm);
+ int gofs = pred_full_reg_offset(s, a->pg);
+
+ if (psz == 8) {
+ /* Do the operation and the flags generation in temps. */
+ TCGv_i64 pd = tcg_temp_new_i64();
+ TCGv_i64 pn = tcg_temp_new_i64();
+ TCGv_i64 pm = tcg_temp_new_i64();
+ TCGv_i64 pg = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(pn, cpu_env, nofs);
+ tcg_gen_ld_i64(pm, cpu_env, mofs);
+ tcg_gen_ld_i64(pg, cpu_env, gofs);
+
+ gvec_op->fni8(pd, pn, pm, pg);
+ tcg_gen_st_i64(pd, cpu_env, dofs);
+
+ do_predtest1(pd, pg);
+
+ tcg_temp_free_i64(pd);
+ tcg_temp_free_i64(pn);
+ tcg_temp_free_i64(pm);
+ tcg_temp_free_i64(pg);
+ } else {
+ /* The operation and flags generation is large. The computation
+ * of the flags depends on the original contents of the guarding
+ * predicate. If the destination overwrites the guarding predicate,
+ * then the easiest way to get this right is to save a copy.
+ */
+ int tofs = gofs;
+ if (a->rd == a->pg) {
+ tofs = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
+ }
+
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ do_predtest(s, dofs, tofs, psz / 8);
+ }
+ return true;
+}
+
+static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_and_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_and_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_and_pg_i64,
+ .fniv = gen_and_pg_vec,
+ .fno = gen_helper_sve_and_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else if (a->rn == a->rm) {
+ if (a->pg == a->rn) {
+ return do_mov_p(s, a->rd, a->rn);
+ } else {
+ return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
+ }
+ } else if (a->pg == a->rn || a->pg == a->rm) {
+ return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_andc_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_andc_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bic_pg_i64,
+ .fniv = gen_bic_pg_vec,
+ .fno = gen_helper_sve_bic_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else if (a->pg == a->rn) {
+ return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_xor_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_xor_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_eor_pg_i64,
+ .fniv = gen_eor_pg_vec,
+ .fno = gen_helper_sve_eor_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_and_i64(pn, pn, pg);
+ tcg_gen_andc_i64(pm, pm, pg);
+ tcg_gen_or_i64(pd, pn, pm);
+}
+
+static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_and_vec(vece, pn, pn, pg);
+ tcg_gen_andc_vec(vece, pm, pm, pg);
+ tcg_gen_or_vec(vece, pd, pn, pm);
+}
+
+static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_sel_pg_i64,
+ .fniv = gen_sel_pg_vec,
+ .fno = gen_helper_sve_sel_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return false;
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_or_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_or_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_orr_pg_i64,
+ .fniv = gen_orr_pg_vec,
+ .fno = gen_helper_sve_orr_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else if (a->pg == a->rn && a->rn == a->rm) {
+ return do_mov_p(s, a->rd, a->rn);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_orc_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_orc_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_orn_pg_i64,
+ .fniv = gen_orn_pg_vec,
+ .fno = gen_helper_sve_orn_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_or_i64(pd, pn, pm);
+ tcg_gen_andc_i64(pd, pg, pd);
+}
+
+static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_or_vec(vece, pd, pn, pm);
+ tcg_gen_andc_vec(vece, pd, pg, pd);
+}
+
+static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nor_pg_i64,
+ .fniv = gen_nor_pg_vec,
+ .fno = gen_helper_sve_nor_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_and_i64(pd, pn, pm);
+ tcg_gen_andc_i64(pd, pg, pd);
+}
+
+static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_and_vec(vece, pd, pn, pm);
+ tcg_gen_andc_vec(vece, pd, pg, pd);
+}
+
+static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nand_pg_i64,
+ .fniv = gen_nand_pg_vec,
+ .fno = gen_helper_sve_nand_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (a->s) {
+ return do_pppp_flags(s, a, &op);
+ } else {
+ return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
+ }
+}
+
+/*
+ *** SVE Predicate Misc Group
+ */
+
+static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ int nofs = pred_full_reg_offset(s, a->rn);
+ int gofs = pred_full_reg_offset(s, a->pg);
+ int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
+
+ if (words == 1) {
+ TCGv_i64 pn = tcg_temp_new_i64();
+ TCGv_i64 pg = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(pn, cpu_env, nofs);
+ tcg_gen_ld_i64(pg, cpu_env, gofs);
+ do_predtest1(pn, pg);
+
+ tcg_temp_free_i64(pn);
+ tcg_temp_free_i64(pg);
+ } else {
+ do_predtest(s, nofs, gofs, words);
+ }
+ }
+ return true;
+}
+
+/* See the ARM pseudocode DecodePredCount. */
+static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
+{
+ unsigned elements = fullsz >> esz;
+ unsigned bound;
+
+ switch (pattern) {
+ case 0x0: /* POW2 */
+ return pow2floor(elements);
+ case 0x1: /* VL1 */
+ case 0x2: /* VL2 */
+ case 0x3: /* VL3 */
+ case 0x4: /* VL4 */
+ case 0x5: /* VL5 */
+ case 0x6: /* VL6 */
+ case 0x7: /* VL7 */
+ case 0x8: /* VL8 */
+ bound = pattern;
+ break;
+ case 0x9: /* VL16 */
+ case 0xa: /* VL32 */
+ case 0xb: /* VL64 */
+ case 0xc: /* VL128 */
+ case 0xd: /* VL256 */
+ bound = 16 << (pattern - 9);
+ break;
+ case 0x1d: /* MUL4 */
+ return elements - elements % 4;
+ case 0x1e: /* MUL3 */
+ return elements - elements % 3;
+ case 0x1f: /* ALL */
+ return elements;
+ default: /* #uimm5 */
+ return 0;
+ }
+ return elements >= bound ? bound : 0;
+}
+
+/* This handles all of the predicate initialization instructions,
+ * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
+ * so that decode_pred_count returns 0. For SETFFR, we will have
+ * set RD == 16 == FFR.
+ */
+static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned ofs = pred_full_reg_offset(s, rd);
+ unsigned numelem, setsz, i;
+ uint64_t word, lastword;
+ TCGv_i64 t;
+
+ numelem = decode_pred_count(fullsz, pat, esz);
+
+ /* Determine what we must store into each bit, and how many. */
+ if (numelem == 0) {
+ lastword = word = 0;
+ setsz = fullsz;
+ } else {
+ setsz = numelem << esz;
+ lastword = word = pred_esz_masks[esz];
+ if (setsz % 64) {
+ lastword &= ~(-1ull << (setsz % 64));
+ }
+ }
+
+ t = tcg_temp_new_i64();
+ if (fullsz <= 64) {
+ tcg_gen_movi_i64(t, lastword);
+ tcg_gen_st_i64(t, cpu_env, ofs);
+ goto done;
+ }
+
+ if (word == lastword) {
+ unsigned maxsz = size_for_gvec(fullsz / 8);
+ unsigned oprsz = size_for_gvec(setsz / 8);
+
+ if (oprsz * 8 == setsz) {
+ tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
+ goto done;
+ }
+ if (oprsz * 8 == setsz + 8) {
+ tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
+ tcg_gen_movi_i64(t, 0);
+ tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
+ goto done;
+ }
+ }
+
+ setsz /= 8;
+ fullsz /= 8;
+
+ tcg_gen_movi_i64(t, word);
+ for (i = 0; i < setsz; i += 8) {
+ tcg_gen_st_i64(t, cpu_env, ofs + i);
+ }
+ if (lastword != word) {
+ tcg_gen_movi_i64(t, lastword);
+ tcg_gen_st_i64(t, cpu_env, ofs + i);
+ i += 8;
+ }
+ if (i < fullsz) {
+ tcg_gen_movi_i64(t, 0);
+ for (; i < fullsz; i += 8) {
+ tcg_gen_st_i64(t, cpu_env, ofs + i);
+ }
+ }
+
+ done:
+ tcg_temp_free_i64(t);
+
+ /* PTRUES */
+ if (setflag) {
+ tcg_gen_movi_i32(cpu_NF, -(word != 0));
+ tcg_gen_movi_i32(cpu_CF, word == 0);
+ tcg_gen_movi_i32(cpu_VF, 0);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ }
+ return true;
+}
+
+static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
+{
+ return do_predset(s, a->esz, a->rd, a->pat, a->s);
+}
+
+static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
+{
+ /* Note pat == 31 is #all, to set all elements. */
+ return do_predset(s, 0, FFR_PRED_NUM, 31, false);
+}
+
+static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
+{
+ /* Note pat == 32 is #unimp, to set no elements. */
+ return do_predset(s, 0, a->rd, 32, false);
+}
+
+static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
+{
+ /* The path through do_pppp_flags is complicated enough to want to avoid
+ * duplication. Frob the arguments into the form of a predicated AND.
+ */
+ arg_rprr_s alt_a = {
+ .rd = a->rd, .pg = a->pg, .s = a->s,
+ .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
+ };
+ return trans_AND_pppp(s, &alt_a, insn);
+}
+
+static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
+{
+ return do_mov_p(s, a->rd, FFR_PRED_NUM);
+}
+
+static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
+{
+ return do_mov_p(s, FFR_PRED_NUM, a->rn);
+}
+
+static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
+ void (*gen_fn)(TCGv_i32, TCGv_ptr,
+ TCGv_ptr, TCGv_i32))
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ TCGv_ptr t_pd = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ TCGv_i32 t;
+ unsigned desc;
+
+ desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
+ desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+
+ tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
+ t = tcg_const_i32(desc);
+
+ gen_fn(t, t_pd, t_pg, t);
+ tcg_temp_free_ptr(t_pd);
+ tcg_temp_free_ptr(t_pg);
+
+ do_pred_flags(t);
+ tcg_temp_free_i32(t);
+ return true;
+}
+
+static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+ return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
+}
+
+static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
+{
+ return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
+}
+
+/*
+ *** SVE Element Count Group
+ */
+
+/* Perform an inline saturating addition of a 32-bit value within
+ * a 64-bit register. The second operand is known to be positive,
+ * which halves the comparisions we must perform to bound the result.
+ */
+static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
+{
+ int64_t ibound;
+ TCGv_i64 bound;
+ TCGCond cond;
+
+ /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
+ if (u) {
+ tcg_gen_ext32u_i64(reg, reg);
+ } else {
+ tcg_gen_ext32s_i64(reg, reg);
+ }
+ if (d) {
+ tcg_gen_sub_i64(reg, reg, val);
+ ibound = (u ? 0 : INT32_MIN);
+ cond = TCG_COND_LT;
+ } else {
+ tcg_gen_add_i64(reg, reg, val);
+ ibound = (u ? UINT32_MAX : INT32_MAX);
+ cond = TCG_COND_GT;
+ }
+ bound = tcg_const_i64(ibound);
+ tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
+ tcg_temp_free_i64(bound);
+}
+
+/* Similarly with 64-bit values. */
+static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2;
+
+ if (u) {
+ if (d) {
+ tcg_gen_sub_i64(t0, reg, val);
+ tcg_gen_movi_i64(t1, 0);
+ tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
+ } else {
+ tcg_gen_add_i64(t0, reg, val);
+ tcg_gen_movi_i64(t1, -1);
+ tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
+ }
+ } else {
+ if (d) {
+ /* Detect signed overflow for subtraction. */
+ tcg_gen_xor_i64(t0, reg, val);
+ tcg_gen_sub_i64(t1, reg, val);
+ tcg_gen_xor_i64(reg, reg, t0);
+ tcg_gen_and_i64(t0, t0, reg);
+
+ /* Bound the result. */
+ tcg_gen_movi_i64(reg, INT64_MIN);
+ t2 = tcg_const_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
+ } else {
+ /* Detect signed overflow for addition. */
+ tcg_gen_xor_i64(t0, reg, val);
+ tcg_gen_add_i64(reg, reg, val);
+ tcg_gen_xor_i64(t1, reg, val);
+ tcg_gen_andc_i64(t0, t1, t0);
+
+ /* Bound the result. */
+ tcg_gen_movi_i64(t1, INT64_MAX);
+ t2 = tcg_const_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
+ }
+ tcg_temp_free_i64(t2);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+/* Similarly with a vector and a scalar operand. */
+static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
+ TCGv_i64 val, bool u, bool d)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr dptr, nptr;
+ TCGv_i32 t32, desc;
+ TCGv_i64 t64;
+
+ dptr = tcg_temp_new_ptr();
+ nptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
+ tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
+ desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+
+ switch (esz) {
+ case MO_8:
+ t32 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t32, val);
+ if (d) {
+ tcg_gen_neg_i32(t32, t32);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
+ } else {
+ gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
+ }
+ tcg_temp_free_i32(t32);
+ break;
+
+ case MO_16:
+ t32 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t32, val);
+ if (d) {
+ tcg_gen_neg_i32(t32, t32);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
+ } else {
+ gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
+ }
+ tcg_temp_free_i32(t32);
+ break;
+
+ case MO_32:
+ t64 = tcg_temp_new_i64();
+ if (d) {
+ tcg_gen_neg_i64(t64, val);
+ } else {
+ tcg_gen_mov_i64(t64, val);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
+ } else {
+ gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
+ }
+ tcg_temp_free_i64(t64);
+ break;
+
+ case MO_64:
+ if (u) {
+ if (d) {
+ gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
+ } else {
+ gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
+ }
+ } else if (d) {
+ t64 = tcg_temp_new_i64();
+ tcg_gen_neg_i64(t64, val);
+ gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
+ tcg_temp_free_i64(t64);
+ } else {
+ gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_temp_free_ptr(dptr);
+ tcg_temp_free_ptr(nptr);
+ tcg_temp_free_i32(desc);
+}
+
+static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
+ }
+ return true;
+}
+
+static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm * (a->d ? -1 : 1);
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ tcg_gen_addi_i64(reg, reg, inc);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
+ uint32_t insn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
+ if (inc == 0) {
+ if (a->u) {
+ tcg_gen_ext32u_i64(reg, reg);
+ } else {
+ tcg_gen_ext32s_i64(reg, reg);
+ }
+ } else {
+ TCGv_i64 t = tcg_const_i64(inc);
+ do_sat_addsub_32(reg, t, a->u, a->d);
+ tcg_temp_free_i64(t);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
+ uint32_t insn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ if (inc != 0) {
+ TCGv_i64 t = tcg_const_i64(inc);
+ do_sat_addsub_64(reg, t, a->u, a->d);
+ tcg_temp_free_i64(t);
+ }
+ return true;
+}
+
+static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+
+ if (inc != 0) {
+ if (sve_access_check(s)) {
+ TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
+ tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ t, fullsz, fullsz);
+ tcg_temp_free_i64(t);
+ }
+ } else {
+ do_mov_z(s, a->rd, a->rn);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
+ uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+
+ if (inc != 0) {
+ if (sve_access_check(s)) {
+ TCGv_i64 t = tcg_const_i64(inc);
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
+ tcg_temp_free_i64(t);
+ }
+ } else {
+ do_mov_z(s, a->rd, a->rn);
+ }
+ return true;
+}
+
+/*
+ *** SVE Bitwise Immediate Group
+ */
+
+static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
+{
+ uint64_t imm;
+ if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
+ extract32(a->dbm, 0, 6),
+ extract32(a->dbm, 6, 6))) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
+ }
+ return true;
+}
+
+static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
+{
+ return do_zz_dbm(s, a, tcg_gen_gvec_andi);
+}
+
+static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
+{
+ return do_zz_dbm(s, a, tcg_gen_gvec_ori);
+}
+
+static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
+{
+ return do_zz_dbm(s, a, tcg_gen_gvec_xori);
+}
+
+static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
+{
+ uint64_t imm;
+ if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
+ extract32(a->dbm, 0, 6),
+ extract32(a->dbm, 6, 6))) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_dupi_z(s, a->rd, imm);
+ }
+ return true;
+}
+
+/*
+ *** SVE Integer Wide Immediate - Predicated Group
+ */
+
+/* Implement all merging copies. This is used for CPY (immediate),
+ * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
+ */
+static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
+ TCGv_i64 val)
+{
+ typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+ static gen_cpy * const fns[4] = {
+ gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
+ gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
+ TCGv_ptr t_zd = tcg_temp_new_ptr();
+ TCGv_ptr t_zn = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
+ tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
+ tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
+
+ fns[esz](t_zd, t_zn, t_pg, val, desc);
+
+ tcg_temp_free_ptr(t_zd);
+ tcg_temp_free_ptr(t_zn);
+ tcg_temp_free_ptr(t_pg);
+ tcg_temp_free_i32(desc);
+}
+
+static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
+{
+ if (a->esz == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ /* Decode the VFP immediate. */
+ uint64_t imm = vfp_expand_imm(a->esz, a->imm);
+ TCGv_i64 t_imm = tcg_const_i64(imm);
+ do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
+ tcg_temp_free_i64(t_imm);
+ }
+ return true;
+}
+
+static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
+{
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 t_imm = tcg_const_i64(a->imm);
+ do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
+ tcg_temp_free_i64(t_imm);
+ }
+ return true;
+}
+
+static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
+{
+ static gen_helper_gvec_2i * const fns[4] = {
+ gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
+ gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
+ };
+
+ if (a->esz == 0 && extract32(insn, 13, 1)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i64 t_imm = tcg_const_i64(a->imm);
+ tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ t_imm, vsz, vsz, 0, fns[a->esz]);
+ tcg_temp_free_i64(t_imm);
+ }
+ return true;
+}
+
+/*
+ *** SVE Permute Extract Group
+ */
+
+static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
+ unsigned n_siz = vsz - n_ofs;
+ unsigned d = vec_full_reg_offset(s, a->rd);
+ unsigned n = vec_full_reg_offset(s, a->rn);
+ unsigned m = vec_full_reg_offset(s, a->rm);
+
+ /* Use host vector move insns if we have appropriate sizes
+ * and no unfortunate overlap.
+ */
+ if (m != d
+ && n_ofs == size_for_gvec(n_ofs)
+ && n_siz == size_for_gvec(n_siz)
+ && (d != n || n_siz <= n_ofs)) {
+ tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
+ if (n_ofs != 0) {
+ tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
+ }
+ } else {
+ tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
+ }
+ return true;
+}
+
+/*
+ *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
+ */
+
+/* Subroutine loading a vector register at VOFS of LEN bytes.
+ * The load should begin at the address Rn + IMM.
+ */
+
+static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
+ int rn, int imm)
+{
+ uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
+ uint32_t len_remain = len % 8;
+ uint32_t nparts = len / 8 + ctpop8(len_remain);
+ int midx = get_mem_index(s);
+ TCGv_i64 addr, t0, t1;
+
+ addr = tcg_temp_new_i64();
+ t0 = tcg_temp_new_i64();
+
+ /* Note that unpredicated load/store of vector/predicate registers
+ * are defined as a stream of bytes, which equates to little-endian
+ * operations on larger quantities. There is no nice way to force
+ * a little-endian load for aarch64_be-linux-user out of line.
+ *
+ * Attempt to keep code expansion to a minimum by limiting the
+ * amount of unrolling done.
+ */
+ if (nparts <= 4) {
+ int i;
+
+ for (i = 0; i < len_align; i += 8) {
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
+ tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
+ tcg_gen_st_i64(t0, cpu_env, vofs + i);
+ }
+ } else {
+ TCGLabel *loop = gen_new_label();
+ TCGv_ptr tp, i = tcg_const_local_ptr(0);
+
+ gen_set_label(loop);
+
+ /* Minimize the number of local temps that must be re-read from
+ * the stack each iteration. Instead, re-compute values other
+ * than the loop counter.
+ */
+ tp = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(tp, i, imm);
+ tcg_gen_extu_ptr_i64(addr, tp);
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
+
+ tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
+
+ tcg_gen_add_ptr(tp, cpu_env, i);
+ tcg_gen_addi_ptr(i, i, 8);
+ tcg_gen_st_i64(t0, tp, vofs);
+ tcg_temp_free_ptr(tp);
+
+ tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ tcg_temp_free_ptr(i);
+ }
+
+ /* Predicate register loads can be any multiple of 2.
+ * Note that we still store the entire 64-bit unit into cpu_env.
+ */
+ if (len_remain) {
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
+
+ switch (len_remain) {
+ case 2:
+ case 4:
+ case 8:
+ tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
+ break;
+
+ case 6:
+ t1 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
+ tcg_gen_addi_i64(addr, addr, 4);
+ tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
+ tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
+ tcg_temp_free_i64(t1);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
+ }
+ tcg_temp_free_i64(addr);
+ tcg_temp_free_i64(t0);
+}
+
+static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ int size = vec_full_reg_size(s);
+ int off = vec_full_reg_offset(s, a->rd);
+ do_ldr(s, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}
+
+static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
+{
+ if (sve_access_check(s)) {
+ int size = pred_full_reg_size(s);
+ int off = pred_full_reg_offset(s, a->rd);
+ do_ldr(s, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}