diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-06-16 13:36:31 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-06-16 13:36:31 +0100 |
commit | cb8278cd997f4776b5a38fce7859bbe3b2d8d139 (patch) | |
tree | cf41c46b445f6ca9bfeb4c078a14d2ffbd98b159 | |
parent | 6675a653d2e57ab09c32c0ea7b44a1d6c40a7f58 (diff) | |
parent | 64b397417a26509bcdff44ab94356a35c7901c79 (diff) | |
download | qemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.zip qemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.tar.gz qemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.tar.bz2 |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200616' into staging
* hw: arm: Set vendor property for IMX SDHCI emulations
* sd: sdhci: Implement basic vendor specific register support
* hw/net/imx_fec: Convert debug fprintf() to trace events
* target/arm/cpu: adjust virtual time for all KVM arm cpus
* Implement configurable descriptor size in ftgmac100
* hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
* target/arm: More Neon decodetree conversion work
# gpg: Signature made Tue 16 Jun 2020 10:56:10 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20200616: (23 commits)
hw: arm: Set vendor property for IMX SDHCI emulations
sd: sdhci: Implement basic vendor specific register support
hw/net/imx_fec: Convert debug fprintf() to trace events
target/arm/cpu: adjust virtual time for all KVM arm cpus
Implement configurable descriptor size in ftgmac100
hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers
target/arm: Convert Neon VDUP (scalar) to decodetree
target/arm: Convert Neon VTBL, VTBX to decodetree
target/arm: Convert Neon VEXT to decodetree
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree
target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree
target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree
target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree
target/arm: Add missing TCG temp free in do_2shift_env_64()
target/arm: Add 'static' and 'const' annotations to VSHLL function arrays
target/arm: Convert Neon 3-reg-diff polynomial VMULL
target/arm: Convert Neon 3-reg-diff saturating doubling multiplies
target/arm: Convert Neon 3-reg-diff long multiplies
target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
# Conflicts:
# hw/arm/fsl-imx25.c
# hw/arm/fsl-imx6.c
# hw/arm/fsl-imx6ul.c
# hw/arm/fsl-imx7.c
-rw-r--r-- | hw/arm/fsl-imx25.c | 6 | ||||
-rw-r--r-- | hw/arm/fsl-imx6.c | 6 | ||||
-rw-r--r-- | hw/arm/fsl-imx6ul.c | 2 | ||||
-rw-r--r-- | hw/arm/fsl-imx7.c | 2 | ||||
-rw-r--r-- | hw/misc/imx6ul_ccm.c | 76 | ||||
-rw-r--r-- | hw/net/ftgmac100.c | 26 | ||||
-rw-r--r-- | hw/net/imx_fec.c | 106 | ||||
-rw-r--r-- | hw/net/trace-events | 18 | ||||
-rw-r--r-- | hw/sd/sdhci-internal.h | 5 | ||||
-rw-r--r-- | hw/sd/sdhci.c | 18 | ||||
-rw-r--r-- | include/hw/sd/sdhci.h | 5 | ||||
-rw-r--r-- | target/arm/cpu.c | 6 | ||||
-rw-r--r-- | target/arm/cpu64.c | 1 | ||||
-rw-r--r-- | target/arm/kvm.c | 21 | ||||
-rw-r--r-- | target/arm/neon-dp.decode | 130 | ||||
-rw-r--r-- | target/arm/translate-neon.inc.c | 1148 | ||||
-rw-r--r-- | target/arm/translate.c | 684 | ||||
-rw-r--r-- | target/arm/translate.h | 1 |
18 files changed, 1495 insertions, 766 deletions
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index fb516bd..f32f9bc 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -263,6 +263,12 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp) &err); object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES, "capareg", &err); + object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX, + "vendor", &err); + if (err) { + error_propagate(errp, err); + return; + } sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err); if (err) { error_propagate(errp, err); diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index 1759348..d4bc4fa 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -339,6 +339,12 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp) &err); object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES, "capareg", &err); + object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX, + "vendor", &err); + if (err) { + error_propagate(errp, err); + return; + } sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err); if (err) { error_propagate(errp, err); diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c index f8c5640..6446034 100644 --- a/hw/arm/fsl-imx6ul.c +++ b/hw/arm/fsl-imx6ul.c @@ -479,6 +479,8 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp) FSL_IMX6UL_USDHC2_IRQ, }; + object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX, + "vendor", &error_abort); sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort); sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0, diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c index ca8b5cc..b49d895 100644 --- a/hw/arm/fsl-imx7.c +++ b/hw/arm/fsl-imx7.c @@ -393,6 +393,8 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp) FSL_IMX7_USDHC3_IRQ, }; + object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX, + "vendor", &error_abort); sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort); sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0, diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c index a2fc1d0..5e0661d 100644 --- a/hw/misc/imx6ul_ccm.c +++ b/hw/misc/imx6ul_ccm.c @@ -19,6 +19,62 @@ #include "trace.h" +static const uint32_t ccm_mask[CCM_MAX] = { + [CCM_CCR] = 0xf01fef80, + [CCM_CCDR] = 0xfffeffff, + [CCM_CSR] = 0xffffffff, + [CCM_CCSR] = 0xfffffef2, + [CCM_CACRR] = 0xfffffff8, + [CCM_CBCDR] = 0xc1f8e000, + [CCM_CBCMR] = 0xfc03cfff, + [CCM_CSCMR1] = 0x80700000, + [CCM_CSCMR2] = 0xe01ff003, + [CCM_CSCDR1] = 0xfe00c780, + [CCM_CS1CDR] = 0xfe00fe00, + [CCM_CS2CDR] = 0xf8007000, + [CCM_CDCDR] = 0xf00fffff, + [CCM_CHSCCDR] = 0xfffc01ff, + [CCM_CSCDR2] = 0xfe0001ff, + [CCM_CSCDR3] = 0xffffc1ff, + [CCM_CDHIPR] = 0xffffffff, + [CCM_CTOR] = 0x00000000, + [CCM_CLPCR] = 0xf39ff01c, + [CCM_CISR] = 0xfb85ffbe, + [CCM_CIMR] = 0xfb85ffbf, + [CCM_CCOSR] = 0xfe00fe00, + [CCM_CGPR] = 0xfffc3fea, + [CCM_CCGR0] = 0x00000000, + [CCM_CCGR1] = 0x00000000, + [CCM_CCGR2] = 0x00000000, + [CCM_CCGR3] = 0x00000000, + [CCM_CCGR4] = 0x00000000, + [CCM_CCGR5] = 0x00000000, + [CCM_CCGR6] = 0x00000000, + [CCM_CMEOR] = 0xafffff1f, +}; + +static const uint32_t analog_mask[CCM_ANALOG_MAX] = { + [CCM_ANALOG_PLL_ARM] = 0xfff60f80, + [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc, + [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc, + [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe, + [CCM_ANALOG_PLL_SYS_SS] = 0x00000000, + [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000, + [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000, + [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80, + [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000, + [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000, + [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80, + [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000, + [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000, + [CCM_ANALOG_PLL_ENET] = 0xffc20ff0, + [CCM_ANALOG_PFD_480] = 0x40404040, + [CCM_ANALOG_PFD_528] = 0x40404040, + [PMU_MISC0] = 0x01fe8306, + [PMU_MISC1] = 0x07fcede0, + [PMU_MISC2] = 0x005f5f5f, +}; + static const char *imx6ul_ccm_reg_name(uint32_t reg) { static char unknown[20]; @@ -596,11 +652,8 @@ static void imx6ul_ccm_write(void *opaque, hwaddr offset, uint64_t value, trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value); - /* - * We will do a better implementation later. In particular some bits - * cannot be written to. - */ - s->ccm[index] = (uint32_t)value; + s->ccm[index] = (s->ccm[index] & ccm_mask[index]) | + ((uint32_t)value & ~ccm_mask[index]); } static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size) @@ -737,7 +790,7 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value, * the REG_NAME register. So we change the value of the * REG_NAME register, setting bits passed in the value. */ - s->analog[index - 1] |= value; + s->analog[index - 1] |= (value & ~analog_mask[index - 1]); break; case CCM_ANALOG_PLL_ARM_CLR: case CCM_ANALOG_PLL_USB1_CLR: @@ -762,7 +815,7 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value, * the REG_NAME register. So we change the value of the * REG_NAME register, unsetting bits passed in the value. */ - s->analog[index - 2] &= ~value; + s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]); break; case CCM_ANALOG_PLL_ARM_TOG: case CCM_ANALOG_PLL_USB1_TOG: @@ -787,14 +840,11 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value, * the REG_NAME register. So we change the value of the * REG_NAME register, toggling bits passed in the value. */ - s->analog[index - 3] ^= value; + s->analog[index - 3] ^= (value & ~analog_mask[index - 3]); break; default: - /* - * We will do a better implementation later. In particular some bits - * cannot be written to. - */ - s->analog[index] = value; + s->analog[index] = (s->analog[index] & analog_mask[index]) | + (value & ~analog_mask[index]); break; } } diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c index 25ebee7..043ba61 100644 --- a/hw/net/ftgmac100.c +++ b/hw/net/ftgmac100.c @@ -80,6 +80,16 @@ #define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12) /* + * DMA burst length and arbitration control register + */ +#define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3) +#define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3) +#define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8) +#define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8) +#define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7) +#define FTGMAC100_DBLAC_IFG_INC (1 << 23) + +/* * PHY control register */ #define FTGMAC100_PHYCR_MIIRD (1 << 26) @@ -553,7 +563,7 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring, if (bd.des0 & s->txdes0_edotr) { addr = tx_ring; } else { - addr += sizeof(FTGMAC100Desc); + addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac); } } @@ -800,6 +810,18 @@ static void ftgmac100_write(void *opaque, hwaddr addr, s->phydata = value & 0xffff; break; case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */ + if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: transmit descriptor too small : %d bytes\n", + __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac)); + break; + } + if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: receive descriptor too small : %d bytes\n", + __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac)); + break; + } s->dblac = value; break; case FTGMAC100_REVR: /* Feature Register */ @@ -982,7 +1004,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf, if (bd.des0 & s->rxdes0_edorr) { addr = s->rx_ring; } else { - addr += sizeof(FTGMAC100Desc); + addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac); } } s->rx_descriptor = addr; diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 7adcc9d..eefedc2 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -31,34 +31,11 @@ #include "qemu/module.h" #include "net/checksum.h" #include "net/eth.h" +#include "trace.h" /* For crc32 */ #include <zlib.h> -#ifndef DEBUG_IMX_FEC -#define DEBUG_IMX_FEC 0 -#endif - -#define FEC_PRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX_FEC) { \ - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \ - __func__, ##args); \ - } \ - } while (0) - -#ifndef DEBUG_IMX_PHY -#define DEBUG_IMX_PHY 0 -#endif - -#define PHY_PRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX_PHY) { \ - fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \ - __func__, ##args); \ - } \ - } while (0) - #define IMX_MAX_DESC 1024 static const char *imx_default_reg_name(IMXFECState *s, uint32_t index) @@ -262,43 +239,45 @@ static void imx_eth_update(IMXFECState *s); * For now we don't handle any GPIO/interrupt line, so the OS will * have to poll for the PHY status. */ -static void phy_update_irq(IMXFECState *s) +static void imx_phy_update_irq(IMXFECState *s) { imx_eth_update(s); } -static void phy_update_link(IMXFECState *s) +static void imx_phy_update_link(IMXFECState *s) { /* Autonegotiation status mirrors link status. */ if (qemu_get_queue(s->nic)->link_down) { - PHY_PRINTF("link is down\n"); + trace_imx_phy_update_link("down"); s->phy_status &= ~0x0024; s->phy_int |= PHY_INT_DOWN; } else { - PHY_PRINTF("link is up\n"); + trace_imx_phy_update_link("up"); s->phy_status |= 0x0024; s->phy_int |= PHY_INT_ENERGYON; s->phy_int |= PHY_INT_AUTONEG_COMPLETE; } - phy_update_irq(s); + imx_phy_update_irq(s); } static void imx_eth_set_link(NetClientState *nc) { - phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); + imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); } -static void phy_reset(IMXFECState *s) +static void imx_phy_reset(IMXFECState *s) { + trace_imx_phy_reset(); + s->phy_status = 0x7809; s->phy_control = 0x3000; s->phy_advertise = 0x01e1; s->phy_int_mask = 0; s->phy_int = 0; - phy_update_link(s); + imx_phy_update_link(s); } -static uint32_t do_phy_read(IMXFECState *s, int reg) +static uint32_t imx_phy_read(IMXFECState *s, int reg) { uint32_t val; @@ -332,7 +311,7 @@ static uint32_t do_phy_read(IMXFECState *s, int reg) case 29: /* Interrupt source. */ val = s->phy_int; s->phy_int = 0; - phy_update_irq(s); + imx_phy_update_irq(s); break; case 30: /* Interrupt mask */ val = s->phy_int_mask; @@ -352,14 +331,14 @@ static uint32_t do_phy_read(IMXFECState *s, int reg) break; } - PHY_PRINTF("read 0x%04x @ %d\n", val, reg); + trace_imx_phy_read(val, reg); return val; } -static void do_phy_write(IMXFECState *s, int reg, uint32_t val) +static void imx_phy_write(IMXFECState *s, int reg, uint32_t val) { - PHY_PRINTF("write 0x%04x @ %d\n", val, reg); + trace_imx_phy_write(val, reg); if (reg > 31) { /* we only advertise one phy */ @@ -369,7 +348,7 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val) switch (reg) { case 0: /* Basic Control */ if (val & 0x8000) { - phy_reset(s); + imx_phy_reset(s); } else { s->phy_control = val & 0x7980; /* Complete autonegotiation immediately. */ @@ -383,7 +362,7 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val) break; case 30: /* Interrupt mask */ s->phy_int_mask = val & 0xff; - phy_update_irq(s); + imx_phy_update_irq(s); break; case 17: case 18: @@ -402,6 +381,8 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val) static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr) { dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); + + trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data); } static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) @@ -412,6 +393,9 @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr) { dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); + + trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data, + bd->option, bd->status); } static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr) @@ -471,11 +455,11 @@ static void imx_fec_do_tx(IMXFECState *s) int len; imx_fec_read_bd(&bd, addr); - FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n", - addr, bd.flags, bd.length, bd.data); if ((bd.flags & ENET_BD_R) == 0) { + /* Run out of descriptors to transmit. */ - FEC_PRINTF("tx_bd ran out of descriptors to transmit\n"); + trace_imx_eth_tx_bd_busy(); + break; } len = bd.length; @@ -552,11 +536,11 @@ static void imx_enet_do_tx(IMXFECState *s, uint32_t index) int len; imx_enet_read_bd(&bd, addr); - FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x " - "status %04x\n", addr, bd.flags, bd.length, bd.data, - bd.option, bd.status); if ((bd.flags & ENET_BD_R) == 0) { /* Run out of descriptors to transmit. */ + + trace_imx_eth_tx_bd_busy(); + break; } len = bd.length; @@ -633,7 +617,7 @@ static void imx_eth_enable_rx(IMXFECState *s, bool flush) s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0; if (!s->regs[ENET_RDAR]) { - FEC_PRINTF("RX buffer full\n"); + trace_imx_eth_rx_bd_full(); } else if (flush) { qemu_flush_queued_packets(qemu_get_queue(s->nic)); } @@ -676,7 +660,7 @@ static void imx_eth_reset(DeviceState *d) memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor)); /* We also reset the PHY */ - phy_reset(s); + imx_phy_reset(s); } static uint32_t imx_default_read(IMXFECState *s, uint32_t index) @@ -774,8 +758,7 @@ static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size) break; } - FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), - value); + trace_imx_eth_read(index, imx_eth_reg_name(s, index), value); return value; } @@ -884,8 +867,7 @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s); uint32_t index = offset >> 2; - FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), - (uint32_t)value); + trace_imx_eth_write(index, imx_eth_reg_name(s, index), value); switch (index) { case ENET_EIR: @@ -940,12 +922,12 @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, if (extract32(value, 29, 1)) { /* This is a read operation */ s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16, - do_phy_read(s, + imx_phy_read(s, extract32(value, 18, 10))); } else { /* This a write operation */ - do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); + imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ s->regs[ENET_EIR] |= ENET_INT_MII; @@ -1053,8 +1035,6 @@ static bool imx_eth_can_receive(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); - FEC_PRINTF("\n"); - return !!s->regs[ENET_RDAR]; } @@ -1071,7 +1051,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, unsigned int buf_len; size_t size = len; - FEC_PRINTF("len %d\n", (int)size); + trace_imx_fec_receive(size); if (!s->regs[ENET_RDAR]) { qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", @@ -1113,7 +1093,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, bd.length = buf_len; size -= buf_len; - FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); + trace_imx_fec_receive_len(addr, bd.length); /* The last 4 bytes are the CRC. */ if (size < 4) { @@ -1131,7 +1111,9 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, if (size == 0) { /* Last buffer in frame. */ bd.flags |= flags | ENET_BD_L; - FEC_PRINTF("rx frame flags %04x\n", bd.flags); + + trace_imx_fec_receive_last(bd.flags); + s->regs[ENET_EIR] |= ENET_INT_RXF; } else { s->regs[ENET_EIR] |= ENET_INT_RXB; @@ -1164,7 +1146,7 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, size_t size = len; bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16; - FEC_PRINTF("len %d\n", (int)size); + trace_imx_enet_receive(size); if (!s->regs[ENET_RDAR]) { qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", @@ -1210,7 +1192,7 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, bd.length = buf_len; size -= buf_len; - FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); + trace_imx_enet_receive_len(addr, bd.length); /* The last 4 bytes are the CRC. */ if (size < 4) { @@ -1246,7 +1228,9 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, if (size == 0) { /* Last buffer in frame. */ bd.flags |= flags | ENET_BD_L; - FEC_PRINTF("rx frame flags %04x\n", bd.flags); + + trace_imx_enet_receive_last(bd.flags); + /* Indicate that we've updated the last buffer descriptor. */ bd.last_buffer = ENET_BD_BDU; if (bd.option & ENET_BD_RX_INT) { diff --git a/hw/net/trace-events b/hw/net/trace-events index e18f883..26700da 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -408,3 +408,21 @@ i82596_receive_packet(size_t sz) "len=%zu" i82596_new_mac(const char *id_with_mac) "New MAC for: %s" i82596_set_multicast(uint16_t count) "Added %d multicast entries" i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION" + +# imx_fec.c +imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]" +imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]" +imx_phy_update_link(const char *s) "%s" +imx_phy_reset(void) "" +imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x" +imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x" +imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit" +imx_eth_rx_bd_full(void) "RX buffer is full" +imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32 +imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64 +imx_fec_receive(size_t size) "len %zu" +imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d" +imx_fec_receive_last(int last) "rx frame flags 0x%04x" +imx_enet_receive(size_t size) "len %zu" +imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d" +imx_enet_receive_last(int last) "rx frame flags 0x%04x" diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h index e7c8a52..e8c753d 100644 --- a/hw/sd/sdhci-internal.h +++ b/hw/sd/sdhci-internal.h @@ -75,6 +75,7 @@ #define SDHC_CMD_INHIBIT 0x00000001 #define SDHC_DATA_INHIBIT 0x00000002 #define SDHC_DAT_LINE_ACTIVE 0x00000004 +#define SDHC_IMX_CLOCK_GATE_OFF 0x00000080 #define SDHC_DOING_WRITE 0x00000100 #define SDHC_DOING_READ 0x00000200 #define SDHC_SPACE_AVAILABLE 0x00000400 @@ -289,7 +290,10 @@ extern const VMStateDescription sdhci_vmstate; #define ESDHC_MIX_CTRL 0x48 + #define ESDHC_VENDOR_SPEC 0xc0 +#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8) + #define ESDHC_DLL_CTRL 0x60 #define ESDHC_TUNING_CTRL 0xcc @@ -326,6 +330,7 @@ extern const VMStateDescription sdhci_vmstate; #define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \ DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \ DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \ + DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \ \ /* Capabilities registers provide information on supported * features of this specific host controller implementation */ \ diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 1b75d7b..eb2be65 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1569,11 +1569,13 @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size) } break; + case ESDHC_VENDOR_SPEC: + ret = s->vendor_spec; + break; case ESDHC_DLL_CTRL: case ESDHC_TUNE_CTRL_STATUS: case ESDHC_UNDOCUMENTED_REG27: case ESDHC_TUNING_CTRL: - case ESDHC_VENDOR_SPEC: case ESDHC_MIX_CTRL: case ESDHC_WTMK_LVL: ret = 0; @@ -1596,7 +1598,21 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) case ESDHC_UNDOCUMENTED_REG27: case ESDHC_TUNING_CTRL: case ESDHC_WTMK_LVL: + break; + case ESDHC_VENDOR_SPEC: + s->vendor_spec = value; + switch (s->vendor) { + case SDHCI_VENDOR_IMX: + if (value & ESDHC_IMX_FRC_SDCLK_ON) { + s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF; + } else { + s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF; + } + break; + default: + break; + } break; case SDHC_HOSTCTL: diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h index c6868c9..5d9275f 100644 --- a/include/hw/sd/sdhci.h +++ b/include/hw/sd/sdhci.h @@ -74,6 +74,7 @@ typedef struct SDHCIState { uint16_t acmd12errsts; /* Auto CMD12 error status register */ uint16_t hostctl2; /* Host Control 2 */ uint64_t admasysaddr; /* ADMA System Address Register */ + uint16_t vendor_spec; /* Vendor specific register */ /* Read-only registers */ uint64_t capareg; /* Capabilities Register */ @@ -96,8 +97,12 @@ typedef struct SDHCIState { uint32_t quirks; uint8_t sd_spec_version; uint8_t uhs_mode; + uint8_t vendor; /* For vendor specific functionality */ } SDHCIState; +#define SDHCI_VENDOR_NONE 0 +#define SDHCI_VENDOR_IMX 1 + /* * Controller does not provide transfer-complete interrupt when not * busy. diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 32bec15..5b7a36b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1245,6 +1245,10 @@ void arm_cpu_post_init(Object *obj) if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property); } + + if (kvm_enabled()) { + kvm_arm_add_vcpu_properties(obj); + } } static void arm_cpu_finalizefn(Object *obj) @@ -2029,7 +2033,6 @@ static void arm_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); - kvm_arm_add_vcpu_properties(obj); } else { cortex_a15_initfn(obj); @@ -2183,7 +2186,6 @@ static void arm_host_initfn(Object *obj) if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { aarch64_add_sve_properties(obj); } - kvm_arm_add_vcpu_properties(obj); arm_cpu_post_init(obj); } diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index cbc5c38..778cecc 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -592,7 +592,6 @@ static void aarch64_max_initfn(Object *obj) if (kvm_enabled()) { kvm_arm_set_cpu_features_from_host(cpu); - kvm_arm_add_vcpu_properties(obj); } else { uint64_t t; uint32_t u; diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 4bdbe6d..eef3bbd 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -194,17 +194,18 @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) /* KVM VCPU properties should be prefixed with "kvm-". */ void kvm_arm_add_vcpu_properties(Object *obj) { - if (!kvm_enabled()) { - return; - } + ARMCPU *cpu = ARM_CPU(obj); + CPUARMState *env = &cpu->env; - ARM_CPU(obj)->kvm_adjvtime = true; - object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, - kvm_no_adjvtime_set); - object_property_set_description(obj, "kvm-no-adjvtime", - "Set on to disable the adjustment of " - "the virtual counter. VM stopped time " - "will be counted."); + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { + cpu->kvm_adjvtime = true; + object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, + kvm_no_adjvtime_set); + object_property_set_description(obj, "kvm-no-adjvtime", + "Set on to disable the adjustment of " + "the virtual counter. VM stopped time " + "will be counted."); + } } bool kvm_arm_pmu_supported(CPUState *cpu) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index bd1b0e1..6d890b2 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -397,3 +397,133 @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt # So we have a single decode line and check the cmode/op in the # trans function. Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm + +###################################################################### +# Within the "two registers, or three registers of different lengths" +# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode +# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar; +# or they are a size field for the three-reg-different-lengths and +# two-reg-and-scalar insn groups (where size cannot be 0b11). This +# is slightly awkward for decodetree: we handle it with this +# non-exclusive group which contains within it two exclusive groups: +# one for the size=0b11 patterns, and one for the size-not-0b11 +# patterns. This allows us to check that none of the insns within +# each subgroup accidentally overlap each other. Note that all the +# trans functions for the size-not-0b11 patterns must check and +# return false for size==3. +###################################################################### +{ + [ + ################################################################## + # Miscellaneous size=0b11 insns + ################################################################## + VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + + VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + + VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \ + vm=%vm_dp vd=%vd_dp size=0 + VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \ + vm=%vm_dp vd=%vd_dp size=1 + VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \ + vm=%vm_dp vd=%vd_dp size=2 + ] + + # Subgroup for size != 0b11 + [ + ################################################################## + # 3-reg-different-length grouping: + # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4 + ################################################################## + + &3diff vm vn vd size + + @3diff .... ... . . . size:2 .... .... .... . . . . .... \ + &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp + + VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff + VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff + + VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff + VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff + + VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff + VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff + + VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff + VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff + + VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff + VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff + + VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff + VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff + + VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff + VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff + + VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff + VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff + + VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff + VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff + + VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff + + VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff + VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff + + VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff + + VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff + VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff + + VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff + + VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff + + ################################################################## + # 2-regs-plus-scalar grouping: + # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4 + ################################################################## + &2scalar vm vn vd size q + + @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \ + &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp + # For the 'long' ops the Q bit is part of insn decode + @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \ + &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0 + + VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar + VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar + + VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0 + VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0 + + VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0 + + VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar + VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar + + VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0 + VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0 + + VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0 + + VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar + VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar + + VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0 + VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0 + + VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0 + + VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar + VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar + + VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar + VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar + ] +} diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 664d361..a5aa56b 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1329,6 +1329,7 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, neon_load_reg64(tmp, a->vm + pass); fn(tmp, cpu_env, tmp, constimm); neon_store_reg64(tmp, a->vd + pass); + tcg_temp_free_i64(tmp); } tcg_temp_free_i64(constimm); return true; @@ -1624,6 +1625,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, tmp = tcg_temp_new_i64(); widenfn(tmp, rm0); + tcg_temp_free_i32(rm0); if (a->shift != 0) { tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); @@ -1631,6 +1633,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, neon_store_reg64(tmp, a->vd); widenfn(tmp, rm1); + tcg_temp_free_i32(rm1); if (a->shift != 0) { tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); @@ -1642,7 +1645,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) { - NeonGenWidenFn *widenfn[] = { + static NeonGenWidenFn * const widenfn[] = { gen_helper_neon_widen_s8, gen_helper_neon_widen_s16, tcg_gen_ext_i32_i64, @@ -1652,7 +1655,7 @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) { - NeonGenWidenFn *widenfn[] = { + static NeonGenWidenFn * const widenfn[] = { gen_helper_neon_widen_u8, gen_helper_neon_widen_u16, tcg_gen_extu_i32_i64, @@ -1826,3 +1829,1144 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) } return do_1reg_imm(s, a, fn); } + +static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, + NeonGenWidenFn *widenfn, + NeonGenTwo64OpFn *opfn, + bool src1_wide) +{ + /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ + TCGv_i64 rn0_64, rn1_64, rm_64; + TCGv_i32 rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!widenfn || !opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vd & 1) || (src1_wide && (a->vn & 1))) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn0_64 = tcg_temp_new_i64(); + rn1_64 = tcg_temp_new_i64(); + rm_64 = tcg_temp_new_i64(); + + if (src1_wide) { + neon_load_reg64(rn0_64, a->vn); + } else { + TCGv_i32 tmp = neon_load_reg(a->vn, 0); + widenfn(rn0_64, tmp); + tcg_temp_free_i32(tmp); + } + rm = neon_load_reg(a->vm, 0); + + widenfn(rm_64, rm); + tcg_temp_free_i32(rm); + opfn(rn0_64, rn0_64, rm_64); + + /* + * Load second pass inputs before storing the first pass result, to + * avoid incorrect results if a narrow input overlaps with the result. + */ + if (src1_wide) { + neon_load_reg64(rn1_64, a->vn + 1); + } else { + TCGv_i32 tmp = neon_load_reg(a->vn, 1); + widenfn(rn1_64, tmp); + tcg_temp_free_i32(tmp); + } + rm = neon_load_reg(a->vm, 1); + + neon_store_reg64(rn0_64, a->vd); + + widenfn(rm_64, rm); + tcg_temp_free_i32(rm); + opfn(rn1_64, rn1_64, rm_64); + neon_store_reg64(rn1_64, a->vd + 1); + + tcg_temp_free_i64(rn0_64); + tcg_temp_free_i64(rn1_64); + tcg_temp_free_i64(rm_64); + + return true; +} + +#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenWidenFn * const widenfn[] = { \ + gen_helper_neon_widen_##S##8, \ + gen_helper_neon_widen_##S##16, \ + tcg_gen_##EXT##_i32_i64, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + return do_prewiden_3d(s, a, widenfn[a->size], \ + addfn[a->size], SRC1WIDE); \ + } + +DO_PREWIDEN(VADDL_S, s, ext, add, false) +DO_PREWIDEN(VADDL_U, u, extu, add, false) +DO_PREWIDEN(VSUBL_S, s, ext, sub, false) +DO_PREWIDEN(VSUBL_U, u, extu, sub, false) +DO_PREWIDEN(VADDW_S, s, ext, add, true) +DO_PREWIDEN(VADDW_U, u, extu, add, true) +DO_PREWIDEN(VSUBW_S, s, ext, sub, true) +DO_PREWIDEN(VSUBW_U, u, extu, sub, true) + +static bool do_narrow_3d(DisasContext *s, arg_3diff *a, + NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) +{ + /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ + TCGv_i64 rn_64, rm_64; + TCGv_i32 rd0, rd1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn || !narrowfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vn | a->vm) & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn_64 = tcg_temp_new_i64(); + rm_64 = tcg_temp_new_i64(); + rd0 = tcg_temp_new_i32(); + rd1 = tcg_temp_new_i32(); + + neon_load_reg64(rn_64, a->vn); + neon_load_reg64(rm_64, a->vm); + + opfn(rn_64, rn_64, rm_64); + + narrowfn(rd0, rn_64); + + neon_load_reg64(rn_64, a->vn + 1); + neon_load_reg64(rm_64, a->vm + 1); + + opfn(rn_64, rn_64, rm_64); + + narrowfn(rd1, rn_64); + + neon_store_reg(a->vd, 0, rd0); + neon_store_reg(a->vd, 1, rd1); + + tcg_temp_free_i64(rn_64); + tcg_temp_free_i64(rm_64); + + return true; +} + +#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + static NeonGenNarrowFn * const narrowfn[] = { \ + gen_helper_neon_##NARROWTYPE##_high_u8, \ + gen_helper_neon_##NARROWTYPE##_high_u16, \ + EXTOP, \ + NULL, \ + }; \ + return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ + } + +static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) +{ + tcg_gen_addi_i64(rn, rn, 1u << 31); + tcg_gen_extrh_i64_i32(rd, rn); +} + +DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) +DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) + +static bool do_long_3d(DisasContext *s, arg_3diff *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + /* + * 3-regs different lengths, long operations. + * These perform an operation on two inputs that returns a double-width + * result, and then possibly perform an accumulation operation of + * that result into the double-width destination. + */ + TCGv_i64 rd0, rd1, tmp; + TCGv_i32 rn, rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rd0 = tcg_temp_new_i64(); + rd1 = tcg_temp_new_i64(); + + rn = neon_load_reg(a->vn, 0); + rm = neon_load_reg(a->vm, 0); + opfn(rd0, rn, rm); + tcg_temp_free_i32(rn); + tcg_temp_free_i32(rm); + + rn = neon_load_reg(a->vn, 1); + rm = neon_load_reg(a->vm, 1); + opfn(rd1, rn, rm); + tcg_temp_free_i32(rn); + tcg_temp_free_i32(rm); + + /* Don't store results until after all loads: they might overlap */ + if (accfn) { + tmp = tcg_temp_new_i64(); + neon_load_reg64(tmp, a->vd); + accfn(tmp, tmp, rd0); + neon_store_reg64(tmp, a->vd); + neon_load_reg64(tmp, a->vd + 1); + accfn(tmp, tmp, rd1); + neon_store_reg64(tmp, a->vd + 1); + tcg_temp_free_i64(tmp); + } else { + neon_store_reg64(rd0, a->vd); + neon_store_reg64(rd1, a->vd + 1); + } + + tcg_temp_free_i64(rd0); + tcg_temp_free_i64(rd1); + + return true; +} + +static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(); + TCGv_i32 hi = tcg_temp_new_i32(); + + tcg_gen_muls2_i32(lo, hi, rn, rm); + tcg_gen_concat_i32_i64(rd, lo, hi); + + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); +} + +static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(); + TCGv_i32 hi = tcg_temp_new_i32(); + + tcg_gen_mulu2_i32(lo, hi, rn, rm); + tcg_gen_concat_i32_i64(rd, lo, hi); + + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); +} + +static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_s8, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_u8, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL(INSN,MULL,ACC) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + gen_helper_neon_##MULL##8, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + gen_helper_neon_##ACC##l_u16, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL(VMLAL_S,mull_s,add) +DO_VMLAL(VMLAL_U,mull_u,add) +DO_VMLAL(VMLSL_S,mull_s,sub) +DO_VMLAL(VMLSL_U,mull_u,sub) + +static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_helper_neon_mull_s16(rd, rn, rm); + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); +} + +static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_mull_s32(rd, rn, rm); + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); +} + +static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); +} + +static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); +} + +static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_negl_u32(rm, rm); + gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); +} + +static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_neg_i64(rm, rm); + gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); +} + +static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) +{ + gen_helper_gvec_3 *fn_gvec; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + switch (a->size) { + case 0: + fn_gvec = gen_helper_neon_pmull_h; + break; + case 2: + if (!dc_isar_feature(aa32_pmull, s)) { + return false; + } + fn_gvec = gen_helper_gvec_pmull_q; + break; + default: + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0), + neon_reg_offset(a->vn, 0), + neon_reg_offset(a->vm, 0), + 16, 16, 0, fn_gvec); + return true; +} + +static void gen_neon_dup_low16(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(var, var); + tcg_gen_shli_i32(tmp, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +static void gen_neon_dup_high16(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_andi_i32(var, var, 0xffff0000); + tcg_gen_shri_i32(tmp, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +static inline TCGv_i32 neon_get_scalar(int size, int reg) +{ + TCGv_i32 tmp; + if (size == 1) { + tmp = neon_load_reg(reg & 7, reg >> 4); + if (reg & 8) { + gen_neon_dup_high16(tmp); + } else { + gen_neon_dup_low16(tmp); + } + } else { + tmp = neon_load_reg(reg & 15, reg >> 4); + } + return tmp; +} + +static bool do_2scalar(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) +{ + /* + * Two registers and a scalar: perform an operation between + * the input elements and the scalar, and then possibly + * perform an accumulation operation of that result into the + * destination. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(a->vn, pass); + opfn(tmp, tmp, scalar); + if (accfn) { + TCGv_i32 rd = neon_load_reg(a->vd, pass); + accfn(tmp, rd, tmp); + tcg_temp_free_i32(rd); + } + neon_store_reg(a->vd, pass, tmp); + } + tcg_temp_free_i32(scalar); + return true; +} + +static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_add_u16, + tcg_gen_add_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_sub_u16, + tcg_gen_sub_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +/* + * Rather than have a float-specific version of do_2scalar just for + * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into + * a NeonGenTwoOpFn. + */ +#define WRAP_FP_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \ + { \ + TCGv_ptr fpstatus = get_fpstatus_ptr(1); \ + FUNC(rd, rn, rm, fpstatus); \ + tcg_temp_free_ptr(fpstatus); \ + } + +WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls) +WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds) +WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs) + +static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_add, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_sub, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) +WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) +WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) +WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) + +static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQDMULH_16, + gen_VQDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQRDMULH_16, + gen_VQRDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, + NeonGenThreeOpEnvFn *opfn) +{ + /* + * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn + * performs a kind of fused op-then-accumulate using a helper + * function that takes all of rd, rn and the scalar at once. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + if (!dc_isar_feature(aa32_rdm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 rn = neon_load_reg(a->vn, pass); + TCGv_i32 rd = neon_load_reg(a->vd, pass); + opfn(rd, cpu_env, rn, scalar, rd); + tcg_temp_free_i32(rn); + neon_store_reg(a->vd, pass, rd); + } + tcg_temp_free_i32(scalar); + + return true; +} + +static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlah_s16, + gen_helper_neon_qrdmlah_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlsh_s16, + gen_helper_neon_qrdmlsh_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + /* + * Two registers and a scalar, long operations: perform an + * operation on the input elements and the scalar which produces + * a double-width result, and then possibly perform an accumulation + * operation of that result into the destination. + */ + TCGv_i32 scalar, rn; + TCGv_i64 rn0_64, rn1_64; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(a->size, a->vm); + + /* Load all inputs before writing any outputs, in case of overlap */ + rn = neon_load_reg(a->vn, 0); + rn0_64 = tcg_temp_new_i64(); + opfn(rn0_64, rn, scalar); + tcg_temp_free_i32(rn); + + rn = neon_load_reg(a->vn, 1); + rn1_64 = tcg_temp_new_i64(); + opfn(rn1_64, rn, scalar); + tcg_temp_free_i32(rn); + tcg_temp_free_i32(scalar); + + if (accfn) { + TCGv_i64 t64 = tcg_temp_new_i64(); + neon_load_reg64(t64, a->vd); + accfn(t64, t64, rn0_64); + neon_store_reg64(t64, a->vd); + neon_load_reg64(t64, a->vd + 1); + accfn(t64, t64, rn1_64); + neon_store_reg64(t64, a->vd + 1); + tcg_temp_free_i64(t64); + } else { + neon_store_reg64(rn0_64, a->vd); + neon_store_reg64(rn1_64, a->vd + 1); + } + tcg_temp_free_i64(rn0_64); + tcg_temp_free_i64(rn1_64); + return true; +} + +static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL_2SC(INSN, MULL, ACC) \ + static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + NULL, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + NULL, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL_2SC(VMLAL_S, mull_s, add) +DO_VMLAL_2SC(VMLAL_U, mull_u, add) +DO_VMLAL_2SC(VMLSL_S, mull_s, sub) +DO_VMLAL_2SC(VMLSL_U, mull_u, sub) + +static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VEXT(DisasContext *s, arg_VEXT *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (a->imm > 7 && !a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (!a->q) { + /* Extract 64 bits from <Vm:Vn> */ + TCGv_i64 left, right, dest; + + left = tcg_temp_new_i64(); + right = tcg_temp_new_i64(); + dest = tcg_temp_new_i64(); + + neon_load_reg64(right, a->vn); + neon_load_reg64(left, a->vm); + tcg_gen_extract2_i64(dest, right, left, a->imm * 8); + neon_store_reg64(dest, a->vd); + + tcg_temp_free_i64(left); + tcg_temp_free_i64(right); + tcg_temp_free_i64(dest); + } else { + /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ + TCGv_i64 left, middle, right, destleft, destright; + + left = tcg_temp_new_i64(); + middle = tcg_temp_new_i64(); + right = tcg_temp_new_i64(); + destleft = tcg_temp_new_i64(); + destright = tcg_temp_new_i64(); + + if (a->imm < 8) { + neon_load_reg64(right, a->vn); + neon_load_reg64(middle, a->vn + 1); + tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); + neon_load_reg64(left, a->vm); + tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); + } else { + neon_load_reg64(right, a->vn + 1); + neon_load_reg64(middle, a->vm); + tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); + neon_load_reg64(left, a->vm + 1); + tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); + } + + neon_store_reg64(destright, a->vd); + neon_store_reg64(destleft, a->vd + 1); + + tcg_temp_free_i64(destright); + tcg_temp_free_i64(destleft); + tcg_temp_free_i64(right); + tcg_temp_free_i64(middle); + tcg_temp_free_i64(left); + } + return true; +} + +static bool trans_VTBL(DisasContext *s, arg_VTBL *a) +{ + int n; + TCGv_i32 tmp, tmp2, tmp3, tmp4; + TCGv_ptr ptr1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + n = a->len + 1; + if ((a->vn + n) > 32) { + /* + * This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return false; + } + n <<= 3; + if (a->op) { + tmp = neon_load_reg(a->vd, 0); + } else { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } + tmp2 = neon_load_reg(a->vm, 0); + ptr1 = vfp_reg_ptr(true, a->vn); + tmp4 = tcg_const_i32(n); + gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4); + tcg_temp_free_i32(tmp); + if (a->op) { + tmp = neon_load_reg(a->vd, 1); + } else { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } + tmp3 = neon_load_reg(a->vm, 1); + gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4); + tcg_temp_free_i32(tmp4); + tcg_temp_free_ptr(ptr1); + neon_store_reg(a->vd, 0, tmp2); + neon_store_reg(a->vd, 1, tmp3); + tcg_temp_free_i32(tmp); + return true; +} + +static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0), + neon_element_offset(a->vm, a->index, a->size), + a->q ? 16 : 8, a->q ? 16 : 8); + return true; +} diff --git a/target/arm/translate.c b/target/arm/translate.c index bcdfec3..6d18892 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -377,43 +377,6 @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var) tcg_gen_ext16s_i32(dest, var); } -/* 32x32->64 multiply. Marks inputs as dead. */ -static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(); - TCGv_i32 hi = tcg_temp_new_i32(); - TCGv_i64 ret; - - tcg_gen_mulu2_i32(lo, hi, a, b); - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - - ret = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(ret, lo, hi); - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); - - return ret; -} - -static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(); - TCGv_i32 hi = tcg_temp_new_i32(); - TCGv_i64 ret; - - tcg_gen_muls2_i32(lo, hi, a, b); - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - - ret = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(ret, lo, hi); - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); - - return ret; -} - /* Swap low and high halfwords. */ static void gen_swap_half(TCGv_i32 var) { @@ -2624,24 +2587,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) -static void gen_neon_dup_low16(TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - -static void gen_neon_dup_high16(TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_andi_i32(var, var, 0xffff0000); - tcg_gen_shri_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static inline bool use_goto_tb(DisasContext *s, target_ulong dest) { #ifndef CONFIG_USER_ONLY @@ -2991,55 +2936,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 -static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1) -{ - switch (size) { - case 0: gen_helper_neon_add_u8(t0, t0, t1); break; - case 1: gen_helper_neon_add_u16(t0, t0, t1); break; - case 2: tcg_gen_add_i32(t0, t0, t1); break; - default: abort(); - } -} - -static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) -{ - switch (size) { - case 0: gen_helper_neon_sub_u8(t0, t1, t0); break; - case 1: gen_helper_neon_sub_u16(t0, t1, t0); break; - case 2: tcg_gen_sub_i32(t0, t1, t0); break; - default: return; - } -} - -static TCGv_i32 neon_load_scratch(int scratch) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - return tmp; -} - -static void neon_store_scratch(int scratch, TCGv_i32 var) -{ - tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - tcg_temp_free_i32(var); -} - -static inline TCGv_i32 neon_get_scalar(int size, int reg) -{ - TCGv_i32 tmp; - if (size == 1) { - tmp = neon_load_reg(reg & 7, reg >> 4); - if (reg & 8) { - gen_neon_dup_high16(tmp); - } else { - gen_neon_dup_low16(tmp); - } - } else { - tmp = neon_load_reg(reg & 15, reg >> 4); - } - return tmp; -} - static int gen_neon_unzip(int rd, int rm, int size, int q) { TCGv_ptr pd, pm; @@ -3231,68 +3127,6 @@ static inline void gen_neon_addl(int size) } } -static inline void gen_neon_subl(int size) -{ - switch (size) { - case 0: gen_helper_neon_subl_u16(CPU_V001); break; - case 1: gen_helper_neon_subl_u32(CPU_V001); break; - case 2: tcg_gen_sub_i64(CPU_V001); break; - default: abort(); - } -} - -static inline void gen_neon_negl(TCGv_i64 var, int size) -{ - switch (size) { - case 0: gen_helper_neon_negl_u16(var, var); break; - case 1: gen_helper_neon_negl_u32(var, var); break; - case 2: - tcg_gen_neg_i64(var, var); - break; - default: abort(); - } -} - -static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) -{ - switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; - default: abort(); - } -} - -static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, - int size, int u) -{ - TCGv_i64 tmp; - - switch ((size << 1) | u) { - case 0: gen_helper_neon_mull_s8(dest, a, b); break; - case 1: gen_helper_neon_mull_u8(dest, a, b); break; - case 2: gen_helper_neon_mull_s16(dest, a, b); break; - case 3: gen_helper_neon_mull_u16(dest, a, b); break; - case 4: - tmp = gen_muls_i64_i32(a, b); - tcg_gen_mov_i64(dest, tmp); - tcg_temp_free_i64(tmp); - break; - case 5: - tmp = gen_mulu_i64_i32(a, b); - tcg_gen_mov_i64(dest, tmp); - tcg_temp_free_i64(tmp); - break; - default: abort(); - } - - /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. - Don't forget to clean them now. */ - if (size < 2) { - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - } -} - static void gen_neon_narrow_op(int op, int u, int size, TCGv_i32 dest, TCGv_i64 src) { @@ -5191,15 +5025,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) { int op; int q; - int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; + int rd, rm, rd_ofs, rm_ofs; int size; int pass; int u; int vec_size; - uint32_t imm; - TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; - TCGv_ptr ptr1; - TCGv_i64 tmp64; + TCGv_i32 tmp, tmp2, tmp3; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return 1; @@ -5220,12 +5051,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) q = (insn & (1 << 6)) != 0; u = (insn >> 24) & 1; VFP_DREG_D(rd, insn); - VFP_DREG_N(rn, insn); VFP_DREG_M(rm, insn); size = (insn >> 20) & 3; vec_size = q ? 16 : 8; rd_ofs = neon_reg_offset(rd, 0); - rn_ofs = neon_reg_offset(rn, 0); rm_ofs = neon_reg_offset(rm, 0); if ((insn & (1 << 23)) == 0) { @@ -5236,454 +5065,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } else { /* (insn & 0x00800010 == 0x00800000) */ if (size != 3) { - op = (insn >> 8) & 0xf; - if ((insn & (1 << 6)) == 0) { - /* Three registers of different lengths. */ - int src1_wide; - int src2_wide; - int prewiden; - /* undefreq: bit 0 : UNDEF if size == 0 - * bit 1 : UNDEF if size == 1 - * bit 2 : UNDEF if size == 2 - * bit 3 : UNDEF if U == 1 - * Note that [2:0] set implies 'always UNDEF' - */ - int undefreq; - /* prewiden, src1_wide, src2_wide, undefreq */ - static const int neon_3reg_wide[16][4] = { - {1, 0, 0, 0}, /* VADDL */ - {1, 1, 0, 0}, /* VADDW */ - {1, 0, 0, 0}, /* VSUBL */ - {1, 1, 0, 0}, /* VSUBW */ - {0, 1, 1, 0}, /* VADDHN */ - {0, 0, 0, 0}, /* VABAL */ - {0, 1, 1, 0}, /* VSUBHN */ - {0, 0, 0, 0}, /* VABDL */ - {0, 0, 0, 0}, /* VMLAL */ - {0, 0, 0, 9}, /* VQDMLAL */ - {0, 0, 0, 0}, /* VMLSL */ - {0, 0, 0, 9}, /* VQDMLSL */ - {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 9}, /* VQDMULL */ - {0, 0, 0, 0xa}, /* Polynomial VMULL */ - {0, 0, 0, 7}, /* Reserved: always UNDEF */ - }; - - prewiden = neon_3reg_wide[op][0]; - src1_wide = neon_3reg_wide[op][1]; - src2_wide = neon_3reg_wide[op][2]; - undefreq = neon_3reg_wide[op][3]; - - if ((undefreq & (1 << size)) || - ((undefreq & 8) && u)) { - return 1; - } - if ((src1_wide && (rn & 1)) || - (src2_wide && (rm & 1)) || - (!src2_wide && (rd & 1))) { - return 1; - } - - /* Handle polynomial VMULL in a single pass. */ - if (op == 14) { - if (size == 0) { - /* VMULL.P8 */ - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_neon_pmull_h); - } else { - /* VMULL.P64 */ - if (!dc_isar_feature(aa32_pmull, s)) { - return 1; - } - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_gvec_pmull_q); - } - return 0; - } - - /* Avoid overlapping operands. Wide source operands are - always aligned so will never overlap with wide - destinations in problematic ways. */ - if (rd == rm && !src2_wide) { - tmp = neon_load_reg(rm, 1); - neon_store_scratch(2, tmp); - } else if (rd == rn && !src1_wide) { - tmp = neon_load_reg(rn, 1); - neon_store_scratch(2, tmp); - } - tmp3 = NULL; - for (pass = 0; pass < 2; pass++) { - if (src1_wide) { - neon_load_reg64(cpu_V0, rn + pass); - tmp = NULL; - } else { - if (pass == 1 && rd == rn) { - tmp = neon_load_scratch(2); - } else { - tmp = neon_load_reg(rn, pass); - } - if (prewiden) { - gen_neon_widen(cpu_V0, tmp, size, u); - } - } - if (src2_wide) { - neon_load_reg64(cpu_V1, rm + pass); - tmp2 = NULL; - } else { - if (pass == 1 && rd == rm) { - tmp2 = neon_load_scratch(2); - } else { - tmp2 = neon_load_reg(rm, pass); - } - if (prewiden) { - gen_neon_widen(cpu_V1, tmp2, size, u); - } - } - switch (op) { - case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ - gen_neon_addl(size); - break; - case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ - gen_neon_subl(size); - break; - case 5: case 7: /* VABAL, VABDL */ - switch ((size << 1) | u) { - case 0: - gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2); - break; - case 1: - gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2); - break; - case 2: - gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2); - break; - case 3: - gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2); - break; - case 4: - gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2); - break; - case 5: - gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2); - break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); - break; - case 8: case 9: case 10: case 11: case 12: case 13: - /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ - gen_neon_mull(cpu_V0, tmp, tmp2, size, u); - break; - default: /* 15 is RESERVED: caught earlier */ - abort(); - } - if (op == 13) { - /* VQDMULL */ - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - neon_store_reg64(cpu_V0, rd + pass); - } else if (op == 5 || (op >= 8 && op <= 11)) { - /* Accumulate. */ - neon_load_reg64(cpu_V1, rd + pass); - switch (op) { - case 10: /* VMLSL */ - gen_neon_negl(cpu_V0, size); - /* Fall through */ - case 5: case 8: /* VABAL, VMLAL */ - gen_neon_addl(size); - break; - case 9: case 11: /* VQDMLAL, VQDMLSL */ - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - if (op == 11) { - gen_neon_negl(cpu_V0, size); - } - gen_neon_addl_saturate(cpu_V0, cpu_V1, size); - break; - default: - abort(); - } - neon_store_reg64(cpu_V0, rd + pass); - } else if (op == 4 || op == 6) { - /* Narrowing operation. */ - tmp = tcg_temp_new_i32(); - if (!u) { - switch (size) { - case 0: - gen_helper_neon_narrow_high_u8(tmp, cpu_V0); - break; - case 1: - gen_helper_neon_narrow_high_u16(tmp, cpu_V0); - break; - case 2: - tcg_gen_extrh_i64_i32(tmp, cpu_V0); - break; - default: abort(); - } - } else { - switch (size) { - case 0: - gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0); - break; - case 1: - gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0); - break; - case 2: - tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31); - tcg_gen_extrh_i64_i32(tmp, cpu_V0); - break; - default: abort(); - } - } - if (pass == 0) { - tmp3 = tmp; - } else { - neon_store_reg(rd, 0, tmp3); - neon_store_reg(rd, 1, tmp); - } - } else { - /* Write back the result. */ - neon_store_reg64(cpu_V0, rd + pass); - } - } - } else { - /* Two registers and a scalar. NB that for ops of this form - * the ARM ARM labels bit 24 as Q, but it is in our variable - * 'u', not 'q'. - */ - if (size == 0) { - return 1; - } - switch (op) { - case 1: /* Float VMLA scalar */ - case 5: /* Floating point VMLS scalar */ - case 9: /* Floating point VMUL scalar */ - if (size == 1) { - return 1; - } - /* fall through */ - case 0: /* Integer VMLA scalar */ - case 4: /* Integer VMLS scalar */ - case 8: /* Integer VMUL scalar */ - case 12: /* VQDMULH scalar */ - case 13: /* VQRDMULH scalar */ - if (u && ((rd | rn) & 1)) { - return 1; - } - tmp = neon_get_scalar(size, rm); - neon_store_scratch(0, tmp); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_scratch(0); - tmp2 = neon_load_reg(rn, pass); - if (op == 12) { - if (size == 1) { - gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); - } - } else if (op == 13) { - if (size == 1) { - gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); - } - } else if (op & 1) { - TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(fpstatus); - } else { - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: abort(); - } - } - tcg_temp_free_i32(tmp2); - if (op < 8) { - /* Accumulate. */ - tmp2 = neon_load_reg(rd, pass); - switch (op) { - case 0: - gen_neon_add(size, tmp, tmp2); - break; - case 1: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(fpstatus); - break; - } - case 4: - gen_neon_rsb(size, tmp, tmp2); - break; - case 5: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(1); - gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); - tcg_temp_free_ptr(fpstatus); - break; - } - default: - abort(); - } - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); - } - break; - case 3: /* VQDMLAL scalar */ - case 7: /* VQDMLSL scalar */ - case 11: /* VQDMULL scalar */ - if (u == 1) { - return 1; - } - /* fall through */ - case 2: /* VMLAL sclar */ - case 6: /* VMLSL scalar */ - case 10: /* VMULL scalar */ - if (rd & 1) { - return 1; - } - tmp2 = neon_get_scalar(size, rm); - /* We need a copy of tmp2 because gen_neon_mull - * deletes it during pass 0. */ - tmp4 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp4, tmp2); - tmp3 = neon_load_reg(rn, 1); - - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(rn, 0); - } else { - tmp = tmp3; - tmp2 = tmp4; - } - gen_neon_mull(cpu_V0, tmp, tmp2, size, u); - if (op != 11) { - neon_load_reg64(cpu_V1, rd + pass); - } - switch (op) { - case 6: - gen_neon_negl(cpu_V0, size); - /* Fall through */ - case 2: - gen_neon_addl(size); - break; - case 3: case 7: - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - if (op == 7) { - gen_neon_negl(cpu_V0, size); - } - gen_neon_addl_saturate(cpu_V0, cpu_V1, size); - break; - case 10: - /* no-op */ - break; - case 11: - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - break; - default: - abort(); - } - neon_store_reg64(cpu_V0, rd + pass); - } - break; - case 14: /* VQRDMLAH scalar */ - case 15: /* VQRDMLSH scalar */ - { - NeonGenThreeOpEnvFn *fn; - - if (!dc_isar_feature(aa32_rdm, s)) { - return 1; - } - if (u && ((rd | rn) & 1)) { - return 1; - } - if (op == 14) { - if (size == 1) { - fn = gen_helper_neon_qrdmlah_s16; - } else { - fn = gen_helper_neon_qrdmlah_s32; - } - } else { - if (size == 1) { - fn = gen_helper_neon_qrdmlsh_s16; - } else { - fn = gen_helper_neon_qrdmlsh_s32; - } - } - - tmp2 = neon_get_scalar(size, rm); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_reg(rn, pass); - tmp3 = neon_load_reg(rd, pass); - fn(tmp, cpu_env, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - neon_store_reg(rd, pass, tmp); - } - tcg_temp_free_i32(tmp2); - } - break; - default: - g_assert_not_reached(); - } - } + /* + * Three registers of different lengths, or two registers and + * a scalar: handled by decodetree + */ + return 1; } else { /* size == 3 */ if (!u) { - /* Extract. */ - imm = (insn >> 8) & 0xf; - - if (imm > 7 && !q) - return 1; - - if (q && ((rd | rn | rm) & 1)) { - return 1; - } - - if (imm == 0) { - neon_load_reg64(cpu_V0, rn); - if (q) { - neon_load_reg64(cpu_V1, rn + 1); - } - } else if (imm == 8) { - neon_load_reg64(cpu_V0, rn + 1); - if (q) { - neon_load_reg64(cpu_V1, rm); - } - } else if (q) { - tmp64 = tcg_temp_new_i64(); - if (imm < 8) { - neon_load_reg64(cpu_V0, rn); - neon_load_reg64(tmp64, rn + 1); - } else { - neon_load_reg64(cpu_V0, rn + 1); - neon_load_reg64(tmp64, rm); - } - tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8); - tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8)); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); - if (imm < 8) { - neon_load_reg64(cpu_V1, rm); - } else { - neon_load_reg64(cpu_V1, rm + 1); - imm -= 8; - } - tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); - tcg_gen_shri_i64(tmp64, tmp64, imm * 8); - tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64); - tcg_temp_free_i64(tmp64); - } else { - /* BUGFIX */ - neon_load_reg64(cpu_V0, rn); - tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8); - neon_load_reg64(cpu_V1, rm); - tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); - } - neon_store_reg64(cpu_V0, rd); - if (q) { - neon_store_reg64(cpu_V1, rd + 1); - } + /* Extract: handled by decodetree */ + return 1; } else if ((insn & (1 << 11)) == 0) { /* Two register misc. */ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); @@ -6184,62 +5574,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } break; } - } else if ((insn & (1 << 10)) == 0) { - /* VTBL, VTBX. */ - int n = ((insn >> 8) & 3) + 1; - if ((rn + n) > 32) { - /* This is UNPREDICTABLE; we choose to UNDEF to avoid the - * helper function running off the end of the register file. - */ - return 1; - } - n <<= 3; - if (insn & (1 << 6)) { - tmp = neon_load_reg(rd, 0); - } else { - tmp = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp, 0); - } - tmp2 = neon_load_reg(rm, 0); - ptr1 = vfp_reg_ptr(true, rn); - tmp5 = tcg_const_i32(n); - gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5); - tcg_temp_free_i32(tmp); - if (insn & (1 << 6)) { - tmp = neon_load_reg(rd, 1); - } else { - tmp = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp, 0); - } - tmp3 = neon_load_reg(rm, 1); - gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5); - tcg_temp_free_i32(tmp5); - tcg_temp_free_ptr(ptr1); - neon_store_reg(rd, 0, tmp2); - neon_store_reg(rd, 1, tmp3); - tcg_temp_free_i32(tmp); - } else if ((insn & 0x380) == 0) { - /* VDUP */ - int element; - MemOp size; - - if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { - return 1; - } - if (insn & (1 << 16)) { - size = MO_8; - element = (insn >> 17) & 7; - } else if (insn & (1 << 17)) { - size = MO_16; - element = (insn >> 18) & 3; - } else { - size = MO_32; - element = (insn >> 19) & 1; - } - tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0), - neon_element_offset(rm, element, size), - q ? 16 : 8, q ? 16 : 8); } else { + /* VTBL, VTBX, VDUP: handled by decodetree */ return 1; } } diff --git a/target/arm/translate.h b/target/arm/translate.h index c937dfe..62ed5c4 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -371,6 +371,7 @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); +typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); |