diff options
28 files changed, 4263 insertions, 915 deletions
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst new file mode 100644 index 0000000..45f891e --- /dev/null +++ b/docs/system/arm/aspeed.rst @@ -0,0 +1,85 @@ +Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``) +================================================================== + +The QEMU Aspeed machines model BMCs of various OpenPOWER systems and +Aspeed evaluation boards. They are based on different releases of the +Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the +AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600 +with dual cores ARM Cortex A7 CPUs (1.2GHz). + +The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C, +etc. + +AST2400 SoC based machines : + +- ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC + +AST2500 SoC based machines : + +- ``ast2500-evb`` Aspeed AST2500 Evaluation board +- ``romulus-bmc`` OpenPOWER Romulus POWER9 BMC +- ``witherspoon-bmc`` OpenPOWER Witherspoon POWER9 BMC +- ``sonorapass-bmc`` OCP SonoraPass BMC +- ``swift-bmc`` OpenPOWER Swift BMC POWER9 + +AST2600 SoC based machines : + +- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex A7) +- ``tacoma-bmc`` OpenPOWER Witherspoon POWER9 AST2600 BMC + +Supported devices +----------------- + + * SMP (for the AST2600 Cortex-A7) + * Interrupt Controller (VIC) + * Timer Controller + * RTC Controller + * I2C Controller + * System Control Unit (SCU) + * SRAM mapping + * X-DMA Controller (basic interface) + * Static Memory Controller (SMC or FMC) - Only SPI Flash support + * SPI Memory Controller + * USB 2.0 Controller + * SD/MMC storage controllers + * SDRAM controller (dummy interface for basic settings and training) + * Watchdog Controller + * GPIO Controller (Master only) + * UART + * Ethernet controllers + + +Missing devices +--------------- + + * Coprocessor support + * ADC (out of tree implementation) + * PWM and Fan Controller + * LPC Bus Controller + * Slave GPIO Controller + * Super I/O Controller + * Hash/Crypto Engine + * PCI-Express 1 Controller + * Graphic Display Controller + * PECI Controller + * MCTP Controller + * Mailbox Controller + * Virtual UART + * eSPI Controller + * I3C Controller + +Boot options +------------ + +The Aspeed machines can be started using the -kernel option to load a +Linux kernel or from a firmare image which can be downloaded from the +OpenPOWER jenkins : + + https://openpower.xyz/ + +The image should be attached as an MTD drive. Run : + +.. code-block:: bash + + $ qemu-system-arm -M romulus-bmc -nic user \ + -drive file=flash-romulus,format=raw,if=mtd -nographic diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index dce384c..1bd477a 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -81,6 +81,7 @@ undocumented; you can get a complete list by running arm/realview arm/versatile arm/vexpress + arm/aspeed arm/musicpal arm/nseries arm/orangepi diff --git a/hw/adc/stm32f2xx_adc.c b/hw/adc/stm32f2xx_adc.c index 4f9d485..01a0b14 100644 --- a/hw/adc/stm32f2xx_adc.c +++ b/hw/adc/stm32f2xx_adc.c @@ -246,6 +246,8 @@ static const MemoryRegionOps stm32f2xx_adc_ops = { .read = stm32f2xx_adc_read, .write = stm32f2xx_adc_write, .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 4, + .impl.max_access_size = 4, }; static const VMStateDescription vmstate_stm32f2xx_adc = { @@ -278,7 +280,7 @@ static void stm32f2xx_adc_init(Object *obj) sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); memory_region_init_io(&s->mmio, obj, &stm32f2xx_adc_ops, s, - TYPE_STM32F2XX_ADC, 0xFF); + TYPE_STM32F2XX_ADC, 0x100); sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); } diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index f1bcc14..cca5b5a 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -125,6 +125,17 @@ static void bcm2835_peripherals_init(Object *obj) OBJECT(&s->sdhci.sdbus)); object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost", OBJECT(&s->sdhost.sdbus)); + + /* Mphi */ + sysbus_init_child_obj(obj, "mphi", &s->mphi, sizeof(s->mphi), + TYPE_BCM2835_MPHI); + + /* DWC2 */ + sysbus_init_child_obj(obj, "dwc2", &s->dwc2, sizeof(s->dwc2), + TYPE_DWC2_USB); + + object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr", + OBJECT(&s->gpu_bus_mr)); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -360,6 +371,32 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus"); + /* Mphi */ + object_property_set_bool(OBJECT(&s->mphi), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, MPHI_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mphi), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->mphi), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_HOSTPORT)); + + /* DWC2 */ + object_property_set_bool(OBJECT(&s->dwc2), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, USB_OTG_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dwc2), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->dwc2), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_USB)); + create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40); create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000); create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000); @@ -373,7 +410,6 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) create_unimp(s, &s->otp, "bcm2835-otp", OTP_OFFSET, 0x80); create_unimp(s, &s->dbus, "bcm2835-dbus", DBUS_OFFSET, 0x8000); create_unimp(s, &s->ave0, "bcm2835-ave0", AVE0_OFFSET, 0x8000); - create_unimp(s, &s->dwc2, "dwc-usb2", USB_OTG_OFFSET, 0x1000); create_unimp(s, &s->sdramc, "bcm2835-sdramc", SDRAMC_OFFSET, 0x100); } diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index 336c9ba..e649f89 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -26,6 +26,7 @@ #include "sysemu/blockdev.h" #include "sysemu/qtest.h" #include "qemu/cutils.h" +#include "qemu/log.h" static struct { hwaddr io_base; @@ -112,7 +113,9 @@ static uint64_t pxa2xx_pm_read(void *opaque, hwaddr addr, return s->pm_regs[addr >> 2]; default: fail: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -143,8 +146,9 @@ static void pxa2xx_pm_write(void *opaque, hwaddr addr, s->pm_regs[addr >> 2] = value; break; } - - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -185,7 +189,9 @@ static uint64_t pxa2xx_cm_read(void *opaque, hwaddr addr, return s->cm_regs[CCCR >> 2] | (3 << 28); default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -210,7 +216,9 @@ static void pxa2xx_cm_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -415,7 +423,9 @@ static uint64_t pxa2xx_mm_read(void *opaque, hwaddr addr, return s->mm_regs[addr >> 2]; /* fall through */ default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -434,7 +444,9 @@ static void pxa2xx_mm_write(void *opaque, hwaddr addr, } default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -641,7 +653,9 @@ static uint64_t pxa2xx_ssp_read(void *opaque, hwaddr addr, case SSACD: return s->ssacd; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -733,7 +747,9 @@ static void pxa2xx_ssp_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -995,7 +1011,9 @@ static uint64_t pxa2xx_rtc_read(void *opaque, hwaddr addr, else return s->last_swcr; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1101,7 +1119,9 @@ static void pxa2xx_rtc_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1354,7 +1374,9 @@ static uint64_t pxa2xx_i2c_read(void *opaque, hwaddr addr, s->ibmr = 0; return s->ibmr; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1427,7 +1449,9 @@ static void pxa2xx_i2c_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1628,7 +1652,9 @@ static uint64_t pxa2xx_i2s_read(void *opaque, hwaddr addr, } return 0; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1685,7 +1711,9 @@ static void pxa2xx_i2s_write(void *opaque, hwaddr addr, } break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1870,7 +1898,9 @@ static uint64_t pxa2xx_fir_read(void *opaque, hwaddr addr, case ICFOR: return s->rx_len; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1922,7 +1952,9 @@ static void pxa2xx_fir_write(void *opaque, hwaddr addr, case ICFOR: break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } diff --git a/hw/input/pxa2xx_keypad.c b/hw/input/pxa2xx_keypad.c index 31862a7d..62aa6f6 100644 --- a/hw/input/pxa2xx_keypad.c +++ b/hw/input/pxa2xx_keypad.c @@ -12,7 +12,7 @@ */ #include "qemu/osdep.h" -#include "hw/hw.h" +#include "qemu/log.h" #include "hw/irq.h" #include "migration/vmstate.h" #include "hw/arm/pxa.h" @@ -233,7 +233,9 @@ static uint64_t pxa2xx_keypad_read(void *opaque, hwaddr offset, return s->kpkdi; break; default: - hw_error("%s: Bad offset " REG_FMT "\n", __func__, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } return 0; @@ -280,7 +282,9 @@ static void pxa2xx_keypad_write(void *opaque, hwaddr offset, break; default: - hw_error("%s: Bad offset " REG_FMT "\n", __func__, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } } diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index b25181b..60a9d80 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -56,6 +56,7 @@ common-obj-$(CONFIG_OMAP) += omap_l4.o common-obj-$(CONFIG_OMAP) += omap_sdrc.o common-obj-$(CONFIG_OMAP) += omap_tap.o common-obj-$(CONFIG_RASPI) += bcm2835_mbox.o +common-obj-$(CONFIG_RASPI) += bcm2835_mphi.o common-obj-$(CONFIG_RASPI) += bcm2835_property.o common-obj-$(CONFIG_RASPI) += bcm2835_rng.o common-obj-$(CONFIG_RASPI) += bcm2835_thermal.o diff --git a/hw/misc/bcm2835_mphi.c b/hw/misc/bcm2835_mphi.c new file mode 100644 index 0000000..0428e10 --- /dev/null +++ b/hw/misc/bcm2835_mphi.c @@ -0,0 +1,191 @@ +/* + * BCM2835 SOC MPHI emulation + * + * Very basic emulation, only providing the FIQ interrupt needed to + * allow the dwc-otg USB host controller driver in the Raspbian kernel + * to function. + * + * Copyright (c) 2020 Paul Zimmerman <pauldzim@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/misc/bcm2835_mphi.h" +#include "migration/vmstate.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" + +static inline void mphi_raise_irq(BCM2835MphiState *s) +{ + qemu_set_irq(s->irq, 1); +} + +static inline void mphi_lower_irq(BCM2835MphiState *s) +{ + qemu_set_irq(s->irq, 0); +} + +static uint64_t mphi_reg_read(void *ptr, hwaddr addr, unsigned size) +{ + BCM2835MphiState *s = ptr; + uint32_t val = 0; + + switch (addr) { + case 0x28: /* outdda */ + val = s->outdda; + break; + case 0x2c: /* outddb */ + val = s->outddb; + break; + case 0x4c: /* ctrl */ + val = s->ctrl; + val |= 1 << 17; + break; + case 0x50: /* intstat */ + val = s->intstat; + break; + case 0x1f0: /* swirq_set */ + val = s->swirq; + break; + case 0x1f4: /* swirq_clr */ + val = s->swirq; + break; + default: + qemu_log_mask(LOG_UNIMP, "read from unknown register"); + break; + } + + return val; +} + +static void mphi_reg_write(void *ptr, hwaddr addr, uint64_t val, unsigned size) +{ + BCM2835MphiState *s = ptr; + int do_irq = 0; + + switch (addr) { + case 0x28: /* outdda */ + s->outdda = val; + break; + case 0x2c: /* outddb */ + s->outddb = val; + if (val & (1 << 29)) { + do_irq = 1; + } + break; + case 0x4c: /* ctrl */ + s->ctrl = val; + if (val & (1 << 16)) { + do_irq = -1; + } + break; + case 0x50: /* intstat */ + s->intstat = val; + if (val & ((1 << 16) | (1 << 29))) { + do_irq = -1; + } + break; + case 0x1f0: /* swirq_set */ + s->swirq |= val; + do_irq = 1; + break; + case 0x1f4: /* swirq_clr */ + s->swirq &= ~val; + do_irq = -1; + break; + default: + qemu_log_mask(LOG_UNIMP, "write to unknown register"); + return; + } + + if (do_irq > 0) { + mphi_raise_irq(s); + } else if (do_irq < 0) { + mphi_lower_irq(s); + } +} + +static const MemoryRegionOps mphi_mmio_ops = { + .read = mphi_reg_read, + .write = mphi_reg_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void mphi_reset(DeviceState *dev) +{ + BCM2835MphiState *s = BCM2835_MPHI(dev); + + s->outdda = 0; + s->outddb = 0; + s->ctrl = 0; + s->intstat = 0; + s->swirq = 0; +} + +static void mphi_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + BCM2835MphiState *s = BCM2835_MPHI(dev); + + sysbus_init_irq(sbd, &s->irq); +} + +static void mphi_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + BCM2835MphiState *s = BCM2835_MPHI(obj); + + memory_region_init_io(&s->iomem, obj, &mphi_mmio_ops, s, "mphi", MPHI_MMIO_SIZE); + sysbus_init_mmio(sbd, &s->iomem); +} + +const VMStateDescription vmstate_mphi_state = { + .name = "mphi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(outdda, BCM2835MphiState), + VMSTATE_UINT32(outddb, BCM2835MphiState), + VMSTATE_UINT32(ctrl, BCM2835MphiState), + VMSTATE_UINT32(intstat, BCM2835MphiState), + VMSTATE_UINT32(swirq, BCM2835MphiState), + VMSTATE_END_OF_LIST() + } +}; + +static void mphi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = mphi_realize; + dc->reset = mphi_reset; + dc->vmsd = &vmstate_mphi_state; +} + +static const TypeInfo bcm2835_mphi_type_info = { + .name = TYPE_BCM2835_MPHI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835MphiState), + .instance_init = mphi_init, + .class_init = mphi_class_init, +}; + +static void bcm2835_mphi_register_types(void) +{ + type_register_static(&bcm2835_mphi_type_info); +} + +type_init(bcm2835_mphi_register_types) diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c index 2dd9a63..43b2f14 100644 --- a/hw/ssi/imx_spi.c +++ b/hw/ssi/imx_spi.c @@ -182,7 +182,7 @@ static void imx_spi_flush_txfifo(IMXSPIState *s) rx = 0; - while (tx_burst) { + while (tx_burst > 0) { uint8_t byte = tx & 0xff; DPRINTF("writing 0x%02x\n", (uint32_t)byte); @@ -206,7 +206,7 @@ static void imx_spi_flush_txfifo(IMXSPIState *s) if (fifo32_is_full(&s->rx_fifo)) { s->regs[ECSPI_STATREG] |= ECSPI_STATREG_RO; } else { - fifo32_push(&s->rx_fifo, (uint8_t)rx); + fifo32_push(&s->rx_fifo, rx); } if (s->burst_length <= 0) { diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig index 464348b..d4d8c37 100644 --- a/hw/usb/Kconfig +++ b/hw/usb/Kconfig @@ -46,6 +46,11 @@ config USB_MUSB bool select USB +config USB_DWC2 + bool + default y + select USB + config TUSB6010 bool select USB_MUSB diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 66835e5..fa5c3fa 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -12,6 +12,7 @@ common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o +common-obj-$(CONFIG_USB_DWC2) += hcd-dwc2.o common-obj-$(CONFIG_TUSB6010) += tusb6010.o common-obj-$(CONFIG_IMX) += chipidea.o diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index 4eba475..a5204b6 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -229,6 +229,9 @@ static void usb_msd_copy_data(MSDState *s, USBPacket *p) usb_packet_copy(p, scsi_req_get_buf(s->req) + s->scsi_off, len); s->scsi_len -= len; s->scsi_off += len; + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->scsi_len == 0 || s->data_len == 0) { scsi_req_continue(s->req); @@ -303,6 +306,9 @@ static void usb_msd_command_complete(SCSIRequest *req, uint32_t status, size_t r if (s->data_len) { int len = (p->iov.size - p->actual_length); usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; } if (s->data_len == 0) { @@ -469,6 +475,9 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket *p) int len = p->iov.size - p->actual_length; if (len) { usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->data_len == 0) { s->mode = USB_MSDM_CSW; @@ -528,13 +537,17 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket *p) int len = p->iov.size - p->actual_length; if (len) { usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->data_len == 0) { s->mode = USB_MSDM_CSW; } } } - if (p->actual_length < p->iov.size) { + if (p->actual_length < p->iov.size && (p->short_not_ok || + s->scsi_len >= p->ep->max_packet_size)) { DPRINTF("Deferring packet %p [wait data-in]\n", p); s->packet = p; p->status = USB_RET_ASYNC; diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c new file mode 100644 index 0000000..72cbd05 --- /dev/null +++ b/hw/usb/hcd-dwc2.c @@ -0,0 +1,1417 @@ +/* + * dwc-hsotg (dwc2) USB host controller emulation + * + * Based on hw/usb/hcd-ehci.c and hw/usb/hcd-ohci.c + * + * Note that to use this emulation with the dwc-otg driver in the + * Raspbian kernel, you must pass the option "dwc_otg.fiq_fsm_enable=0" + * on the kernel command line. + * + * Some useful documentation used to develop this emulation can be + * found online (as of April 2020) at: + * + * http://www.capital-micro.com/PDF/CME-M7_Family_User_Guide_EN.pdf + * which has a pretty complete description of the controller starting + * on page 370. + * + * https://sourceforge.net/p/wive-ng/wive-ng-mt/ci/master/tree/docs/DataSheets/RT3050_5x_V2.0_081408_0902.pdf + * which has a description of the controller registers starting on + * page 130. + * + * Copyright (c) 2020 Paul Zimmerman <pauldzim@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/usb/dwc2-regs.h" +#include "hw/usb/hcd-dwc2.h" +#include "migration/vmstate.h" +#include "trace.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "hw/qdev-properties.h" + +#define USB_HZ_FS 12000000 +#define USB_HZ_HS 96000000 +#define USB_FRMINTVL 12000 + +/* nifty macros from Arnon's EHCI version */ +#define get_field(data, field) \ + (((data) & field##_MASK) >> field##_SHIFT) + +#define set_field(data, newval, field) do { \ + uint32_t val = *(data); \ + val &= ~field##_MASK; \ + val |= ((newval) << field##_SHIFT) & field##_MASK; \ + *(data) = val; \ +} while (0) + +#define get_bit(data, bitmask) \ + (!!((data) & (bitmask))) + +/* update irq line */ +static inline void dwc2_update_irq(DWC2State *s) +{ + static int oldlevel; + int level = 0; + + if ((s->gintsts & s->gintmsk) && (s->gahbcfg & GAHBCFG_GLBL_INTR_EN)) { + level = 1; + } + if (level != oldlevel) { + oldlevel = level; + trace_usb_dwc2_update_irq(level); + qemu_set_irq(s->irq, level); + } +} + +/* flag interrupt condition */ +static inline void dwc2_raise_global_irq(DWC2State *s, uint32_t intr) +{ + if (!(s->gintsts & intr)) { + s->gintsts |= intr; + trace_usb_dwc2_raise_global_irq(intr); + dwc2_update_irq(s); + } +} + +static inline void dwc2_lower_global_irq(DWC2State *s, uint32_t intr) +{ + if (s->gintsts & intr) { + s->gintsts &= ~intr; + trace_usb_dwc2_lower_global_irq(intr); + dwc2_update_irq(s); + } +} + +static inline void dwc2_raise_host_irq(DWC2State *s, uint32_t host_intr) +{ + if (!(s->haint & host_intr)) { + s->haint |= host_intr; + s->haint &= 0xffff; + trace_usb_dwc2_raise_host_irq(host_intr); + if (s->haint & s->haintmsk) { + dwc2_raise_global_irq(s, GINTSTS_HCHINT); + } + } +} + +static inline void dwc2_lower_host_irq(DWC2State *s, uint32_t host_intr) +{ + if (s->haint & host_intr) { + s->haint &= ~host_intr; + trace_usb_dwc2_lower_host_irq(host_intr); + if (!(s->haint & s->haintmsk)) { + dwc2_lower_global_irq(s, GINTSTS_HCHINT); + } + } +} + +static inline void dwc2_update_hc_irq(DWC2State *s, int index) +{ + uint32_t host_intr = 1 << (index >> 3); + + if (s->hreg1[index + 2] & s->hreg1[index + 3]) { + dwc2_raise_host_irq(s, host_intr); + } else { + dwc2_lower_host_irq(s, host_intr); + } +} + +/* set a timer for EOF */ +static void dwc2_eof_timer(DWC2State *s) +{ + timer_mod(s->eof_timer, s->sof_time + s->usb_frame_time); +} + +/* Set a timer for EOF and generate SOF event */ +static void dwc2_sof(DWC2State *s) +{ + s->sof_time += s->usb_frame_time; + trace_usb_dwc2_sof(s->sof_time); + dwc2_eof_timer(s); + dwc2_raise_global_irq(s, GINTSTS_SOF); +} + +/* Do frame processing on frame boundary */ +static void dwc2_frame_boundary(void *opaque) +{ + DWC2State *s = opaque; + int64_t now; + uint16_t frcnt; + + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* Frame boundary, so do EOF stuff here */ + + /* Increment frame number */ + frcnt = (uint16_t)((now - s->sof_time) / s->fi); + s->frame_number = (s->frame_number + frcnt) & 0xffff; + s->hfnum = s->frame_number & HFNUM_MAX_FRNUM; + + /* Do SOF stuff here */ + dwc2_sof(s); +} + +/* Start sending SOF tokens on the USB bus */ +static void dwc2_bus_start(DWC2State *s) +{ + trace_usb_dwc2_bus_start(); + s->sof_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + dwc2_eof_timer(s); +} + +/* Stop sending SOF tokens on the USB bus */ +static void dwc2_bus_stop(DWC2State *s) +{ + trace_usb_dwc2_bus_stop(); + timer_del(s->eof_timer); +} + +static USBDevice *dwc2_find_device(DWC2State *s, uint8_t addr) +{ + USBDevice *dev; + + trace_usb_dwc2_find_device(addr); + + if (!(s->hprt0 & HPRT0_ENA)) { + trace_usb_dwc2_port_disabled(0); + } else { + dev = usb_find_device(&s->uport, addr); + if (dev != NULL) { + trace_usb_dwc2_device_found(0); + return dev; + } + } + + trace_usb_dwc2_device_not_found(); + return NULL; +} + +static const char *pstatus[] = { + "USB_RET_SUCCESS", "USB_RET_NODEV", "USB_RET_NAK", "USB_RET_STALL", + "USB_RET_BABBLE", "USB_RET_IOERROR", "USB_RET_ASYNC", + "USB_RET_ADD_TO_QUEUE", "USB_RET_REMOVE_FROM_QUEUE" +}; + +static uint32_t pintr[] = { + HCINTMSK_XFERCOMPL, HCINTMSK_XACTERR, HCINTMSK_NAK, HCINTMSK_STALL, + HCINTMSK_BBLERR, HCINTMSK_XACTERR, HCINTMSK_XACTERR, HCINTMSK_XACTERR, + HCINTMSK_XACTERR +}; + +static const char *types[] = { + "Ctrl", "Isoc", "Bulk", "Intr" +}; + +static const char *dirs[] = { + "Out", "In" +}; + +static void dwc2_handle_packet(DWC2State *s, uint32_t devadr, USBDevice *dev, + USBEndpoint *ep, uint32_t index, bool send) +{ + DWC2Packet *p; + uint32_t hcchar = s->hreg1[index]; + uint32_t hctsiz = s->hreg1[index + 4]; + uint32_t hcdma = s->hreg1[index + 5]; + uint32_t chan, epnum, epdir, eptype, mps, pid, pcnt, len, tlen, intr = 0; + uint32_t tpcnt, stsidx, actual = 0; + bool do_intr = false, done = false; + + epnum = get_field(hcchar, HCCHAR_EPNUM); + epdir = get_bit(hcchar, HCCHAR_EPDIR); + eptype = get_field(hcchar, HCCHAR_EPTYPE); + mps = get_field(hcchar, HCCHAR_MPS); + pid = get_field(hctsiz, TSIZ_SC_MC_PID); + pcnt = get_field(hctsiz, TSIZ_PKTCNT); + len = get_field(hctsiz, TSIZ_XFERSIZE); + assert(len <= DWC2_MAX_XFER_SIZE); + chan = index >> 3; + p = &s->packet[chan]; + + trace_usb_dwc2_handle_packet(chan, dev, &p->packet, epnum, types[eptype], + dirs[epdir], mps, len, pcnt); + + if (eptype == USB_ENDPOINT_XFER_CONTROL && pid == TSIZ_SC_MC_PID_SETUP) { + pid = USB_TOKEN_SETUP; + } else { + pid = epdir ? USB_TOKEN_IN : USB_TOKEN_OUT; + } + + if (send) { + tlen = len; + if (p->small) { + if (tlen > mps) { + tlen = mps; + } + } + + if (pid != USB_TOKEN_IN) { + trace_usb_dwc2_memory_read(hcdma, tlen); + if (dma_memory_read(&s->dma_as, hcdma, + s->usb_buf[chan], tlen) != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dma_memory_read failed\n", + __func__); + } + } + + usb_packet_init(&p->packet); + usb_packet_setup(&p->packet, pid, ep, 0, hcdma, + pid != USB_TOKEN_IN, true); + usb_packet_addbuf(&p->packet, s->usb_buf[chan], tlen); + p->async = DWC2_ASYNC_NONE; + usb_handle_packet(dev, &p->packet); + } else { + tlen = p->len; + } + + stsidx = -p->packet.status; + assert(stsidx < sizeof(pstatus) / sizeof(*pstatus)); + actual = p->packet.actual_length; + trace_usb_dwc2_packet_status(pstatus[stsidx], actual); + +babble: + if (p->packet.status != USB_RET_SUCCESS && + p->packet.status != USB_RET_NAK && + p->packet.status != USB_RET_STALL && + p->packet.status != USB_RET_ASYNC) { + trace_usb_dwc2_packet_error(pstatus[stsidx]); + } + + if (p->packet.status == USB_RET_ASYNC) { + trace_usb_dwc2_async_packet(&p->packet, chan, dev, epnum, + dirs[epdir], tlen); + usb_device_flush_ep_queue(dev, ep); + assert(p->async != DWC2_ASYNC_INFLIGHT); + p->devadr = devadr; + p->epnum = epnum; + p->epdir = epdir; + p->mps = mps; + p->pid = pid; + p->index = index; + p->pcnt = pcnt; + p->len = tlen; + p->async = DWC2_ASYNC_INFLIGHT; + p->needs_service = false; + return; + } + + if (p->packet.status == USB_RET_SUCCESS) { + if (actual > tlen) { + p->packet.status = USB_RET_BABBLE; + goto babble; + } + + if (pid == USB_TOKEN_IN) { + trace_usb_dwc2_memory_write(hcdma, actual); + if (dma_memory_write(&s->dma_as, hcdma, s->usb_buf[chan], + actual) != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dma_memory_write failed\n", + __func__); + } + } + + tpcnt = actual / mps; + if (actual % mps) { + tpcnt++; + if (pid == USB_TOKEN_IN) { + done = true; + } + } + + pcnt -= tpcnt < pcnt ? tpcnt : pcnt; + set_field(&hctsiz, pcnt, TSIZ_PKTCNT); + len -= actual < len ? actual : len; + set_field(&hctsiz, len, TSIZ_XFERSIZE); + s->hreg1[index + 4] = hctsiz; + hcdma += actual; + s->hreg1[index + 5] = hcdma; + + if (!pcnt || len == 0 || actual == 0) { + done = true; + } + } else { + intr |= pintr[stsidx]; + if (p->packet.status == USB_RET_NAK && + (eptype == USB_ENDPOINT_XFER_CONTROL || + eptype == USB_ENDPOINT_XFER_BULK)) { + /* + * for ctrl/bulk, automatically retry on NAK, + * but send the interrupt anyway + */ + intr &= ~HCINTMSK_RESERVED14_31; + s->hreg1[index + 2] |= intr; + do_intr = true; + } else { + intr |= HCINTMSK_CHHLTD; + done = true; + } + } + + usb_packet_cleanup(&p->packet); + + if (done) { + hcchar &= ~HCCHAR_CHENA; + s->hreg1[index] = hcchar; + if (!(intr & HCINTMSK_CHHLTD)) { + intr |= HCINTMSK_CHHLTD | HCINTMSK_XFERCOMPL; + } + intr &= ~HCINTMSK_RESERVED14_31; + s->hreg1[index + 2] |= intr; + p->needs_service = false; + trace_usb_dwc2_packet_done(pstatus[stsidx], actual, len, pcnt); + dwc2_update_hc_irq(s, index); + return; + } + + p->devadr = devadr; + p->epnum = epnum; + p->epdir = epdir; + p->mps = mps; + p->pid = pid; + p->index = index; + p->pcnt = pcnt; + p->len = len; + p->needs_service = true; + trace_usb_dwc2_packet_next(pstatus[stsidx], len, pcnt); + if (do_intr) { + dwc2_update_hc_irq(s, index); + } +} + +/* Attach or detach a device on root hub */ + +static const char *speeds[] = { + "low", "full", "high" +}; + +static void dwc2_attach(USBPort *port) +{ + DWC2State *s = port->opaque; + int hispd = 0; + + trace_usb_dwc2_attach(port); + assert(port->index == 0); + + if (!port->dev || !port->dev->attached) { + return; + } + + assert(port->dev->speed <= USB_SPEED_HIGH); + trace_usb_dwc2_attach_speed(speeds[port->dev->speed]); + s->hprt0 &= ~HPRT0_SPD_MASK; + + switch (port->dev->speed) { + case USB_SPEED_LOW: + s->hprt0 |= HPRT0_SPD_LOW_SPEED << HPRT0_SPD_SHIFT; + break; + case USB_SPEED_FULL: + s->hprt0 |= HPRT0_SPD_FULL_SPEED << HPRT0_SPD_SHIFT; + break; + case USB_SPEED_HIGH: + s->hprt0 |= HPRT0_SPD_HIGH_SPEED << HPRT0_SPD_SHIFT; + hispd = 1; + break; + } + + if (hispd) { + s->usb_frame_time = NANOSECONDS_PER_SECOND / 8000; /* 125000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_HS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_HS; /* 10.4 */ + } else { + s->usb_bit_time = 1; + } + } else { + s->usb_frame_time = NANOSECONDS_PER_SECOND / 1000; /* 1000000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_FS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_FS; /* 83.3 */ + } else { + s->usb_bit_time = 1; + } + } + + s->fi = USB_FRMINTVL - 1; + s->hprt0 |= HPRT0_CONNDET | HPRT0_CONNSTS; + + dwc2_bus_start(s); + dwc2_raise_global_irq(s, GINTSTS_PRTINT); +} + +static void dwc2_detach(USBPort *port) +{ + DWC2State *s = port->opaque; + + trace_usb_dwc2_detach(port); + assert(port->index == 0); + + dwc2_bus_stop(s); + + s->hprt0 &= ~(HPRT0_SPD_MASK | HPRT0_SUSP | HPRT0_ENA | HPRT0_CONNSTS); + s->hprt0 |= HPRT0_CONNDET | HPRT0_ENACHG; + + dwc2_raise_global_irq(s, GINTSTS_PRTINT); +} + +static void dwc2_child_detach(USBPort *port, USBDevice *child) +{ + trace_usb_dwc2_child_detach(port, child); + assert(port->index == 0); +} + +static void dwc2_wakeup(USBPort *port) +{ + DWC2State *s = port->opaque; + + trace_usb_dwc2_wakeup(port); + assert(port->index == 0); + + if (s->hprt0 & HPRT0_SUSP) { + s->hprt0 |= HPRT0_RES; + dwc2_raise_global_irq(s, GINTSTS_PRTINT); + } + + qemu_bh_schedule(s->async_bh); +} + +static void dwc2_async_packet_complete(USBPort *port, USBPacket *packet) +{ + DWC2State *s = port->opaque; + DWC2Packet *p; + USBDevice *dev; + USBEndpoint *ep; + + assert(port->index == 0); + p = container_of(packet, DWC2Packet, packet); + dev = dwc2_find_device(s, p->devadr); + ep = usb_ep_get(dev, p->pid, p->epnum); + trace_usb_dwc2_async_packet_complete(port, packet, p->index >> 3, dev, + p->epnum, dirs[p->epdir], p->len); + assert(p->async == DWC2_ASYNC_INFLIGHT); + + if (packet->status == USB_RET_REMOVE_FROM_QUEUE) { + usb_cancel_packet(packet); + usb_packet_cleanup(packet); + return; + } + + dwc2_handle_packet(s, p->devadr, dev, ep, p->index, false); + + p->async = DWC2_ASYNC_FINISHED; + qemu_bh_schedule(s->async_bh); +} + +static USBPortOps dwc2_port_ops = { + .attach = dwc2_attach, + .detach = dwc2_detach, + .child_detach = dwc2_child_detach, + .wakeup = dwc2_wakeup, + .complete = dwc2_async_packet_complete, +}; + +static uint32_t dwc2_get_frame_remaining(DWC2State *s) +{ + uint32_t fr = 0; + int64_t tks; + + tks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->sof_time; + if (tks < 0) { + tks = 0; + } + + /* avoid muldiv if possible */ + if (tks >= s->usb_frame_time) { + goto out; + } + if (tks < s->usb_bit_time) { + fr = s->fi; + goto out; + } + + /* tks = number of ns since SOF, divided by 83 (fs) or 10 (hs) */ + tks = tks / s->usb_bit_time; + if (tks >= (int64_t)s->fi) { + goto out; + } + + /* remaining = frame interval minus tks */ + fr = (uint32_t)((int64_t)s->fi - tks); + +out: + return fr; +} + +static void dwc2_work_bh(void *opaque) +{ + DWC2State *s = opaque; + DWC2Packet *p; + USBDevice *dev; + USBEndpoint *ep; + int64_t t_now, expire_time; + int chan; + bool found = false; + + trace_usb_dwc2_work_bh(); + if (s->working) { + return; + } + s->working = true; + + t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + chan = s->next_chan; + + do { + p = &s->packet[chan]; + if (p->needs_service) { + dev = dwc2_find_device(s, p->devadr); + ep = usb_ep_get(dev, p->pid, p->epnum); + trace_usb_dwc2_work_bh_service(s->next_chan, chan, dev, p->epnum); + dwc2_handle_packet(s, p->devadr, dev, ep, p->index, true); + found = true; + } + if (++chan == DWC2_NB_CHAN) { + chan = 0; + } + if (found) { + s->next_chan = chan; + trace_usb_dwc2_work_bh_next(chan); + } + } while (chan != s->next_chan); + + if (found) { + expire_time = t_now + NANOSECONDS_PER_SECOND / 4000; + timer_mod(s->frame_timer, expire_time); + } + s->working = false; +} + +static void dwc2_enable_chan(DWC2State *s, uint32_t index) +{ + USBDevice *dev; + USBEndpoint *ep; + uint32_t hcchar; + uint32_t hctsiz; + uint32_t devadr, epnum, epdir, eptype, pid, len; + DWC2Packet *p; + + assert((index >> 3) < DWC2_NB_CHAN); + p = &s->packet[index >> 3]; + hcchar = s->hreg1[index]; + hctsiz = s->hreg1[index + 4]; + devadr = get_field(hcchar, HCCHAR_DEVADDR); + epnum = get_field(hcchar, HCCHAR_EPNUM); + epdir = get_bit(hcchar, HCCHAR_EPDIR); + eptype = get_field(hcchar, HCCHAR_EPTYPE); + pid = get_field(hctsiz, TSIZ_SC_MC_PID); + len = get_field(hctsiz, TSIZ_XFERSIZE); + + dev = dwc2_find_device(s, devadr); + + trace_usb_dwc2_enable_chan(index >> 3, dev, &p->packet, epnum); + if (dev == NULL) { + return; + } + + if (eptype == USB_ENDPOINT_XFER_CONTROL && pid == TSIZ_SC_MC_PID_SETUP) { + pid = USB_TOKEN_SETUP; + } else { + pid = epdir ? USB_TOKEN_IN : USB_TOKEN_OUT; + } + + ep = usb_ep_get(dev, pid, epnum); + + /* + * Hack: Networking doesn't like us delivering large transfers, it kind + * of works but the latency is horrible. So if the transfer is <= the mtu + * size, we take that as a hint that this might be a network transfer, + * and do the transfer packet-by-packet. + */ + if (len > 1536) { + p->small = false; + } else { + p->small = true; + } + + dwc2_handle_packet(s, devadr, dev, ep, index, true); + qemu_bh_schedule(s->async_bh); +} + +static const char *glbregnm[] = { + "GOTGCTL ", "GOTGINT ", "GAHBCFG ", "GUSBCFG ", "GRSTCTL ", + "GINTSTS ", "GINTMSK ", "GRXSTSR ", "GRXSTSP ", "GRXFSIZ ", + "GNPTXFSIZ", "GNPTXSTS ", "GI2CCTL ", "GPVNDCTL ", "GGPIO ", + "GUID ", "GSNPSID ", "GHWCFG1 ", "GHWCFG2 ", "GHWCFG3 ", + "GHWCFG4 ", "GLPMCFG ", "GPWRDN ", "GDFIFOCFG", "GADPCTL ", + "GREFCLK ", "GINTMSK2 ", "GINTSTS2 " +}; + +static uint64_t dwc2_glbreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr <= GINTSTS2); + val = s->glbreg[index]; + + switch (addr) { + case GRSTCTL: + /* clear any self-clearing bits that were set */ + val &= ~(GRSTCTL_TXFFLSH | GRSTCTL_RXFFLSH | GRSTCTL_IN_TKNQ_FLSH | + GRSTCTL_FRMCNTRRST | GRSTCTL_HSFTRST | GRSTCTL_CSFTRST); + s->glbreg[index] = val; + break; + default: + break; + } + + trace_usb_dwc2_glbreg_read(addr, glbregnm[index], val); + return val; +} + +static void dwc2_glbreg_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + int iflg = 0; + + assert(addr <= GINTSTS2); + mmio = &s->glbreg[index]; + old = *mmio; + + switch (addr) { + case GOTGCTL: + /* don't allow setting of read-only bits */ + val &= ~(GOTGCTL_MULT_VALID_BC_MASK | GOTGCTL_BSESVLD | + GOTGCTL_ASESVLD | GOTGCTL_DBNC_SHORT | GOTGCTL_CONID_B | + GOTGCTL_HSTNEGSCS | GOTGCTL_SESREQSCS); + /* don't allow clearing of read-only bits */ + val |= old & (GOTGCTL_MULT_VALID_BC_MASK | GOTGCTL_BSESVLD | + GOTGCTL_ASESVLD | GOTGCTL_DBNC_SHORT | GOTGCTL_CONID_B | + GOTGCTL_HSTNEGSCS | GOTGCTL_SESREQSCS); + break; + case GAHBCFG: + if ((val & GAHBCFG_GLBL_INTR_EN) && !(old & GAHBCFG_GLBL_INTR_EN)) { + iflg = 1; + } + break; + case GRSTCTL: + val |= GRSTCTL_AHBIDLE; + val &= ~GRSTCTL_DMAREQ; + if (!(old & GRSTCTL_TXFFLSH) && (val & GRSTCTL_TXFFLSH)) { + /* TODO - TX fifo flush */ + qemu_log_mask(LOG_UNIMP, "Tx FIFO flush not implemented\n"); + } + if (!(old & GRSTCTL_RXFFLSH) && (val & GRSTCTL_RXFFLSH)) { + /* TODO - RX fifo flush */ + qemu_log_mask(LOG_UNIMP, "Rx FIFO flush not implemented\n"); + } + if (!(old & GRSTCTL_IN_TKNQ_FLSH) && (val & GRSTCTL_IN_TKNQ_FLSH)) { + /* TODO - device IN token queue flush */ + qemu_log_mask(LOG_UNIMP, "Token queue flush not implemented\n"); + } + if (!(old & GRSTCTL_FRMCNTRRST) && (val & GRSTCTL_FRMCNTRRST)) { + /* TODO - host frame counter reset */ + qemu_log_mask(LOG_UNIMP, "Frame counter reset not implemented\n"); + } + if (!(old & GRSTCTL_HSFTRST) && (val & GRSTCTL_HSFTRST)) { + /* TODO - host soft reset */ + qemu_log_mask(LOG_UNIMP, "Host soft reset not implemented\n"); + } + if (!(old & GRSTCTL_CSFTRST) && (val & GRSTCTL_CSFTRST)) { + /* TODO - core soft reset */ + qemu_log_mask(LOG_UNIMP, "Core soft reset not implemented\n"); + } + /* don't allow clearing of self-clearing bits */ + val |= old & (GRSTCTL_TXFFLSH | GRSTCTL_RXFFLSH | + GRSTCTL_IN_TKNQ_FLSH | GRSTCTL_FRMCNTRRST | + GRSTCTL_HSFTRST | GRSTCTL_CSFTRST); + break; + case GINTSTS: + /* clear the write-1-to-clear bits */ + val |= ~old; + val = ~val; + /* don't allow clearing of read-only bits */ + val |= old & (GINTSTS_PTXFEMP | GINTSTS_HCHINT | GINTSTS_PRTINT | + GINTSTS_OEPINT | GINTSTS_IEPINT | GINTSTS_GOUTNAKEFF | + GINTSTS_GINNAKEFF | GINTSTS_NPTXFEMP | GINTSTS_RXFLVL | + GINTSTS_OTGINT | GINTSTS_CURMODE_HOST); + iflg = 1; + break; + case GINTMSK: + iflg = 1; + break; + default: + break; + } + + trace_usb_dwc2_glbreg_write(addr, glbregnm[index], orig, old, val); + *mmio = val; + + if (iflg) { + dwc2_update_irq(s); + } +} + +static uint64_t dwc2_fszreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr == HPTXFSIZ); + val = s->fszreg[index]; + + trace_usb_dwc2_fszreg_read(addr, val); + return val; +} + +static void dwc2_fszreg_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + + assert(addr == HPTXFSIZ); + mmio = &s->fszreg[index]; + old = *mmio; + + trace_usb_dwc2_fszreg_write(addr, orig, old, val); + *mmio = val; +} + +static const char *hreg0nm[] = { + "HCFG ", "HFIR ", "HFNUM ", "<rsvd> ", "HPTXSTS ", + "HAINT ", "HAINTMSK ", "HFLBADDR ", "<rsvd> ", "<rsvd> ", + "<rsvd> ", "<rsvd> ", "<rsvd> ", "<rsvd> ", "<rsvd> ", + "<rsvd> ", "HPRT0 " +}; + +static uint64_t dwc2_hreg0_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= HCFG && addr <= HPRT0); + val = s->hreg0[index]; + + switch (addr) { + case HFNUM: + val = (dwc2_get_frame_remaining(s) << HFNUM_FRREM_SHIFT) | + (s->hfnum << HFNUM_FRNUM_SHIFT); + break; + default: + break; + } + + trace_usb_dwc2_hreg0_read(addr, hreg0nm[index], val); + return val; +} + +static void dwc2_hreg0_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + USBDevice *dev = s->uport.dev; + uint64_t orig = val; + uint32_t *mmio; + uint32_t tval, told, old; + int prst = 0; + int iflg = 0; + + assert(addr >= HCFG && addr <= HPRT0); + mmio = &s->hreg0[index]; + old = *mmio; + + switch (addr) { + case HFIR: + break; + case HFNUM: + case HPTXSTS: + case HAINT: + qemu_log_mask(LOG_GUEST_ERROR, "%s: write to read-only register\n", + __func__); + return; + case HAINTMSK: + val &= 0xffff; + break; + case HPRT0: + /* don't allow clearing of read-only bits */ + val |= old & (HPRT0_SPD_MASK | HPRT0_LNSTS_MASK | HPRT0_OVRCURRACT | + HPRT0_CONNSTS); + /* don't allow clearing of self-clearing bits */ + val |= old & (HPRT0_SUSP | HPRT0_RES); + /* don't allow setting of self-setting bits */ + if (!(old & HPRT0_ENA) && (val & HPRT0_ENA)) { + val &= ~HPRT0_ENA; + } + /* clear the write-1-to-clear bits */ + tval = val & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + told = old & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + tval |= ~told; + tval = ~tval; + tval &= (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + val &= ~(HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + val |= tval; + if (!(val & HPRT0_RST) && (old & HPRT0_RST)) { + if (dev && dev->attached) { + val |= HPRT0_ENA | HPRT0_ENACHG; + prst = 1; + } + } + if (val & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_CONNDET)) { + iflg = 1; + } else { + iflg = -1; + } + break; + default: + break; + } + + if (prst) { + trace_usb_dwc2_hreg0_write(addr, hreg0nm[index], orig, old, + val & ~HPRT0_CONNDET); + trace_usb_dwc2_hreg0_action("call usb_port_reset"); + usb_port_reset(&s->uport); + val &= ~HPRT0_CONNDET; + } else { + trace_usb_dwc2_hreg0_write(addr, hreg0nm[index], orig, old, val); + } + + *mmio = val; + + if (iflg > 0) { + trace_usb_dwc2_hreg0_action("enable PRTINT"); + dwc2_raise_global_irq(s, GINTSTS_PRTINT); + } else if (iflg < 0) { + trace_usb_dwc2_hreg0_action("disable PRTINT"); + dwc2_lower_global_irq(s, GINTSTS_PRTINT); + } +} + +static const char *hreg1nm[] = { + "HCCHAR ", "HCSPLT ", "HCINT ", "HCINTMSK", "HCTSIZ ", "HCDMA ", + "<rsvd> ", "HCDMAB " +}; + +static uint64_t dwc2_hreg1_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= HCCHAR(0) && addr <= HCDMAB(DWC2_NB_CHAN - 1)); + val = s->hreg1[index]; + + trace_usb_dwc2_hreg1_read(addr, hreg1nm[index & 7], addr >> 5, val); + return val; +} + +static void dwc2_hreg1_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + int iflg = 0; + int enflg = 0; + int disflg = 0; + + assert(addr >= HCCHAR(0) && addr <= HCDMAB(DWC2_NB_CHAN - 1)); + mmio = &s->hreg1[index]; + old = *mmio; + + switch (HSOTG_REG(0x500) + (addr & 0x1c)) { + case HCCHAR(0): + if ((val & HCCHAR_CHDIS) && !(old & HCCHAR_CHDIS)) { + val &= ~(HCCHAR_CHENA | HCCHAR_CHDIS); + disflg = 1; + } else { + val |= old & HCCHAR_CHDIS; + if ((val & HCCHAR_CHENA) && !(old & HCCHAR_CHENA)) { + val &= ~HCCHAR_CHDIS; + enflg = 1; + } else { + val |= old & HCCHAR_CHENA; + } + } + break; + case HCINT(0): + /* clear the write-1-to-clear bits */ + val |= ~old; + val = ~val; + val &= ~HCINTMSK_RESERVED14_31; + iflg = 1; + break; + case HCINTMSK(0): + val &= ~HCINTMSK_RESERVED14_31; + iflg = 1; + break; + case HCDMAB(0): + qemu_log_mask(LOG_GUEST_ERROR, "%s: write to read-only register\n", + __func__); + return; + default: + break; + } + + trace_usb_dwc2_hreg1_write(addr, hreg1nm[index & 7], index >> 3, orig, + old, val); + *mmio = val; + + if (disflg) { + /* set ChHltd in HCINT */ + s->hreg1[(index & ~7) + 2] |= HCINTMSK_CHHLTD; + iflg = 1; + } + + if (enflg) { + dwc2_enable_chan(s, index & ~7); + } + + if (iflg) { + dwc2_update_hc_irq(s, index & ~7); + } +} + +static const char *pcgregnm[] = { + "PCGCTL ", "PCGCCTL1 " +}; + +static uint64_t dwc2_pcgreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= PCGCTL && addr <= PCGCCTL1); + val = s->pcgreg[index]; + + trace_usb_dwc2_pcgreg_read(addr, pcgregnm[index], val); + return val; +} + +static void dwc2_pcgreg_write(void *ptr, hwaddr addr, int index, + uint64_t val, unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + + assert(addr >= PCGCTL && addr <= PCGCCTL1); + mmio = &s->pcgreg[index]; + old = *mmio; + + trace_usb_dwc2_pcgreg_write(addr, pcgregnm[index], orig, old, val); + *mmio = val; +} + +static uint64_t dwc2_hsotg_read(void *ptr, hwaddr addr, unsigned size) +{ + uint64_t val; + + switch (addr) { + case HSOTG_REG(0x000) ... HSOTG_REG(0x0fc): + val = dwc2_glbreg_read(ptr, addr, (addr - HSOTG_REG(0x000)) >> 2, size); + break; + case HSOTG_REG(0x100): + val = dwc2_fszreg_read(ptr, addr, (addr - HSOTG_REG(0x100)) >> 2, size); + break; + case HSOTG_REG(0x104) ... HSOTG_REG(0x3fc): + /* Gadget-mode registers, just return 0 for now */ + val = 0; + break; + case HSOTG_REG(0x400) ... HSOTG_REG(0x4fc): + val = dwc2_hreg0_read(ptr, addr, (addr - HSOTG_REG(0x400)) >> 2, size); + break; + case HSOTG_REG(0x500) ... HSOTG_REG(0x7fc): + val = dwc2_hreg1_read(ptr, addr, (addr - HSOTG_REG(0x500)) >> 2, size); + break; + case HSOTG_REG(0x800) ... HSOTG_REG(0xdfc): + /* Gadget-mode registers, just return 0 for now */ + val = 0; + break; + case HSOTG_REG(0xe00) ... HSOTG_REG(0xffc): + val = dwc2_pcgreg_read(ptr, addr, (addr - HSOTG_REG(0xe00)) >> 2, size); + break; + default: + g_assert_not_reached(); + } + + return val; +} + +static void dwc2_hsotg_write(void *ptr, hwaddr addr, uint64_t val, + unsigned size) +{ + switch (addr) { + case HSOTG_REG(0x000) ... HSOTG_REG(0x0fc): + dwc2_glbreg_write(ptr, addr, (addr - HSOTG_REG(0x000)) >> 2, val, size); + break; + case HSOTG_REG(0x100): + dwc2_fszreg_write(ptr, addr, (addr - HSOTG_REG(0x100)) >> 2, val, size); + break; + case HSOTG_REG(0x104) ... HSOTG_REG(0x3fc): + /* Gadget-mode registers, do nothing for now */ + break; + case HSOTG_REG(0x400) ... HSOTG_REG(0x4fc): + dwc2_hreg0_write(ptr, addr, (addr - HSOTG_REG(0x400)) >> 2, val, size); + break; + case HSOTG_REG(0x500) ... HSOTG_REG(0x7fc): + dwc2_hreg1_write(ptr, addr, (addr - HSOTG_REG(0x500)) >> 2, val, size); + break; + case HSOTG_REG(0x800) ... HSOTG_REG(0xdfc): + /* Gadget-mode registers, do nothing for now */ + break; + case HSOTG_REG(0xe00) ... HSOTG_REG(0xffc): + dwc2_pcgreg_write(ptr, addr, (addr - HSOTG_REG(0xe00)) >> 2, val, size); + break; + default: + g_assert_not_reached(); + } +} + +static const MemoryRegionOps dwc2_mmio_hsotg_ops = { + .read = dwc2_hsotg_read, + .write = dwc2_hsotg_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t dwc2_hreg2_read(void *ptr, hwaddr addr, unsigned size) +{ + /* TODO - implement FIFOs to support slave mode */ + trace_usb_dwc2_hreg2_read(addr, addr >> 12, 0); + qemu_log_mask(LOG_UNIMP, "FIFO read not implemented\n"); + return 0; +} + +static void dwc2_hreg2_write(void *ptr, hwaddr addr, uint64_t val, + unsigned size) +{ + uint64_t orig = val; + + /* TODO - implement FIFOs to support slave mode */ + trace_usb_dwc2_hreg2_write(addr, addr >> 12, orig, 0, val); + qemu_log_mask(LOG_UNIMP, "FIFO write not implemented\n"); +} + +static const MemoryRegionOps dwc2_mmio_hreg2_ops = { + .read = dwc2_hreg2_read, + .write = dwc2_hreg2_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void dwc2_wakeup_endpoint(USBBus *bus, USBEndpoint *ep, + unsigned int stream) +{ + DWC2State *s = container_of(bus, DWC2State, bus); + + trace_usb_dwc2_wakeup_endpoint(ep, stream); + + /* TODO - do something here? */ + qemu_bh_schedule(s->async_bh); +} + +static USBBusOps dwc2_bus_ops = { + .wakeup_endpoint = dwc2_wakeup_endpoint, +}; + +static void dwc2_work_timer(void *opaque) +{ + DWC2State *s = opaque; + + trace_usb_dwc2_work_timer(); + qemu_bh_schedule(s->async_bh); +} + +static void dwc2_reset_enter(Object *obj, ResetType type) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + int i; + + trace_usb_dwc2_reset_enter(); + + if (c->parent_phases.enter) { + c->parent_phases.enter(obj, type); + } + + timer_del(s->frame_timer); + qemu_bh_cancel(s->async_bh); + + if (s->uport.dev && s->uport.dev->attached) { + usb_detach(&s->uport); + } + + dwc2_bus_stop(s); + + s->gotgctl = GOTGCTL_BSESVLD | GOTGCTL_ASESVLD | GOTGCTL_CONID_B; + s->gotgint = 0; + s->gahbcfg = 0; + s->gusbcfg = 5 << GUSBCFG_USBTRDTIM_SHIFT; + s->grstctl = GRSTCTL_AHBIDLE; + s->gintsts = GINTSTS_CONIDSTSCHNG | GINTSTS_PTXFEMP | GINTSTS_NPTXFEMP | + GINTSTS_CURMODE_HOST; + s->gintmsk = 0; + s->grxstsr = 0; + s->grxstsp = 0; + s->grxfsiz = 1024; + s->gnptxfsiz = 1024 << FIFOSIZE_DEPTH_SHIFT; + s->gnptxsts = (4 << FIFOSIZE_DEPTH_SHIFT) | 1024; + s->gi2cctl = GI2CCTL_I2CDATSE0 | GI2CCTL_ACK; + s->gpvndctl = 0; + s->ggpio = 0; + s->guid = 0; + s->gsnpsid = 0x4f54294a; + s->ghwcfg1 = 0; + s->ghwcfg2 = (8 << GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT) | + (4 << GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT) | + (4 << GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT) | + GHWCFG2_DYNAMIC_FIFO | + GHWCFG2_PERIO_EP_SUPPORTED | + ((DWC2_NB_CHAN - 1) << GHWCFG2_NUM_HOST_CHAN_SHIFT) | + (GHWCFG2_INT_DMA_ARCH << GHWCFG2_ARCHITECTURE_SHIFT) | + (GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST << GHWCFG2_OP_MODE_SHIFT); + s->ghwcfg3 = (4096 << GHWCFG3_DFIFO_DEPTH_SHIFT) | + (4 << GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT) | + (4 << GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT); + s->ghwcfg4 = 0; + s->glpmcfg = 0; + s->gpwrdn = GPWRDN_PWRDNRSTN; + s->gdfifocfg = 0; + s->gadpctl = 0; + s->grefclk = 0; + s->gintmsk2 = 0; + s->gintsts2 = 0; + + s->hptxfsiz = 500 << FIFOSIZE_DEPTH_SHIFT; + + s->hcfg = 2 << HCFG_RESVALID_SHIFT; + s->hfir = 60000; + s->hfnum = 0x3fff; + s->hptxsts = (16 << TXSTS_QSPCAVAIL_SHIFT) | 32768; + s->haint = 0; + s->haintmsk = 0; + s->hprt0 = 0; + + memset(s->hreg1, 0, sizeof(s->hreg1)); + memset(s->pcgreg, 0, sizeof(s->pcgreg)); + + s->sof_time = 0; + s->frame_number = 0; + s->fi = USB_FRMINTVL - 1; + s->next_chan = 0; + s->working = false; + + for (i = 0; i < DWC2_NB_CHAN; i++) { + s->packet[i].needs_service = false; + } +} + +static void dwc2_reset_hold(Object *obj) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + + trace_usb_dwc2_reset_hold(); + + if (c->parent_phases.hold) { + c->parent_phases.hold(obj); + } + + dwc2_update_irq(s); +} + +static void dwc2_reset_exit(Object *obj) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + + trace_usb_dwc2_reset_exit(); + + if (c->parent_phases.exit) { + c->parent_phases.exit(obj); + } + + s->hprt0 = HPRT0_PWR; + if (s->uport.dev && s->uport.dev->attached) { + usb_attach(&s->uport); + usb_device_reset(s->uport.dev); + } +} + +static void dwc2_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + DWC2State *s = DWC2_USB(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "dma-mr", &err); + if (err) { + error_setg(errp, "dwc2: required dma-mr link not found: %s", + error_get_pretty(err)); + return; + } + assert(obj != NULL); + + s->dma_mr = MEMORY_REGION(obj); + address_space_init(&s->dma_as, s->dma_mr, "dwc2"); + + usb_bus_new(&s->bus, sizeof(s->bus), &dwc2_bus_ops, dev); + usb_register_port(&s->bus, &s->uport, s, 0, &dwc2_port_ops, + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL | + (s->usb_version == 2 ? USB_SPEED_MASK_HIGH : 0)); + s->uport.dev = 0; + + s->usb_frame_time = NANOSECONDS_PER_SECOND / 1000; /* 1000000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_FS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_FS; /* 83.3 */ + } else { + s->usb_bit_time = 1; + } + + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); + s->async_bh = qemu_bh_new(dwc2_work_bh, s); + + sysbus_init_irq(sbd, &s->irq); +} + +static void dwc2_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + DWC2State *s = DWC2_USB(obj); + + memory_region_init(&s->container, obj, "dwc2", DWC2_MMIO_SIZE); + sysbus_init_mmio(sbd, &s->container); + + memory_region_init_io(&s->hsotg, obj, &dwc2_mmio_hsotg_ops, s, + "dwc2-io", 4 * KiB); + memory_region_add_subregion(&s->container, 0x0000, &s->hsotg); + + memory_region_init_io(&s->fifos, obj, &dwc2_mmio_hreg2_ops, s, + "dwc2-fifo", 64 * KiB); + memory_region_add_subregion(&s->container, 0x1000, &s->fifos); +} + +static const VMStateDescription vmstate_dwc2_state_packet = { + .name = "dwc2/packet", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(devadr, DWC2Packet), + VMSTATE_UINT32(epnum, DWC2Packet), + VMSTATE_UINT32(epdir, DWC2Packet), + VMSTATE_UINT32(mps, DWC2Packet), + VMSTATE_UINT32(pid, DWC2Packet), + VMSTATE_UINT32(index, DWC2Packet), + VMSTATE_UINT32(pcnt, DWC2Packet), + VMSTATE_UINT32(len, DWC2Packet), + VMSTATE_INT32(async, DWC2Packet), + VMSTATE_BOOL(small, DWC2Packet), + VMSTATE_BOOL(needs_service, DWC2Packet), + VMSTATE_END_OF_LIST() + }, +}; + +const VMStateDescription vmstate_dwc2_state = { + .name = "dwc2", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(glbreg, DWC2State, + DWC2_GLBREG_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(fszreg, DWC2State, + DWC2_FSZREG_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(hreg0, DWC2State, + DWC2_HREG0_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(hreg1, DWC2State, + DWC2_HREG1_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(pcgreg, DWC2State, + DWC2_PCGREG_SIZE / sizeof(uint32_t)), + + VMSTATE_TIMER_PTR(eof_timer, DWC2State), + VMSTATE_TIMER_PTR(frame_timer, DWC2State), + VMSTATE_INT64(sof_time, DWC2State), + VMSTATE_INT64(usb_frame_time, DWC2State), + VMSTATE_INT64(usb_bit_time, DWC2State), + VMSTATE_UINT32(usb_version, DWC2State), + VMSTATE_UINT16(frame_number, DWC2State), + VMSTATE_UINT16(fi, DWC2State), + VMSTATE_UINT16(next_chan, DWC2State), + VMSTATE_BOOL(working, DWC2State), + + VMSTATE_STRUCT_ARRAY(packet, DWC2State, DWC2_NB_CHAN, 1, + vmstate_dwc2_state_packet, DWC2Packet), + VMSTATE_UINT8_2DARRAY(usb_buf, DWC2State, DWC2_NB_CHAN, + DWC2_MAX_XFER_SIZE), + + VMSTATE_END_OF_LIST() + } +}; + +static Property dwc2_usb_properties[] = { + DEFINE_PROP_UINT32("usb_version", DWC2State, usb_version, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void dwc2_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + DWC2Class *c = DWC2_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + dc->realize = dwc2_realize; + dc->vmsd = &vmstate_dwc2_state; + set_bit(DEVICE_CATEGORY_USB, dc->categories); + device_class_set_props(dc, dwc2_usb_properties); + resettable_class_set_parent_phases(rc, dwc2_reset_enter, dwc2_reset_hold, + dwc2_reset_exit, &c->parent_phases); +} + +static const TypeInfo dwc2_usb_type_info = { + .name = TYPE_DWC2_USB, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(DWC2State), + .instance_init = dwc2_init, + .class_size = sizeof(DWC2Class), + .class_init = dwc2_class_init, +}; + +static void dwc2_usb_register_types(void) +{ + type_register_static(&dwc2_usb_type_info); +} + +type_init(dwc2_usb_register_types) diff --git a/hw/usb/hcd-dwc2.h b/hw/usb/hcd-dwc2.h new file mode 100644 index 0000000..4ba809a --- /dev/null +++ b/hw/usb/hcd-dwc2.h @@ -0,0 +1,190 @@ +/* + * dwc-hsotg (dwc2) USB host controller state definitions + * + * Based on hw/usb/hcd-ehci.h + * + * Copyright (c) 2020 Paul Zimmerman <pauldzim@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef HW_USB_DWC2_H +#define HW_USB_DWC2_H + +#include "qemu/timer.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "hw/usb.h" +#include "sysemu/dma.h" + +#define DWC2_MMIO_SIZE 0x11000 + +#define DWC2_NB_CHAN 8 /* Number of host channels */ +#define DWC2_MAX_XFER_SIZE 65536 /* Max transfer size expected in HCTSIZ */ + +typedef struct DWC2Packet DWC2Packet; +typedef struct DWC2State DWC2State; +typedef struct DWC2Class DWC2Class; + +enum async_state { + DWC2_ASYNC_NONE = 0, + DWC2_ASYNC_INITIALIZED, + DWC2_ASYNC_INFLIGHT, + DWC2_ASYNC_FINISHED, +}; + +struct DWC2Packet { + USBPacket packet; + uint32_t devadr; + uint32_t epnum; + uint32_t epdir; + uint32_t mps; + uint32_t pid; + uint32_t index; + uint32_t pcnt; + uint32_t len; + int32_t async; + bool small; + bool needs_service; +}; + +struct DWC2State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + USBBus bus; + qemu_irq irq; + MemoryRegion *dma_mr; + AddressSpace dma_as; + MemoryRegion container; + MemoryRegion hsotg; + MemoryRegion fifos; + + union { +#define DWC2_GLBREG_SIZE 0x70 + uint32_t glbreg[DWC2_GLBREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t gotgctl; /* 00 */ + uint32_t gotgint; /* 04 */ + uint32_t gahbcfg; /* 08 */ + uint32_t gusbcfg; /* 0c */ + uint32_t grstctl; /* 10 */ + uint32_t gintsts; /* 14 */ + uint32_t gintmsk; /* 18 */ + uint32_t grxstsr; /* 1c */ + uint32_t grxstsp; /* 20 */ + uint32_t grxfsiz; /* 24 */ + uint32_t gnptxfsiz; /* 28 */ + uint32_t gnptxsts; /* 2c */ + uint32_t gi2cctl; /* 30 */ + uint32_t gpvndctl; /* 34 */ + uint32_t ggpio; /* 38 */ + uint32_t guid; /* 3c */ + uint32_t gsnpsid; /* 40 */ + uint32_t ghwcfg1; /* 44 */ + uint32_t ghwcfg2; /* 48 */ + uint32_t ghwcfg3; /* 4c */ + uint32_t ghwcfg4; /* 50 */ + uint32_t glpmcfg; /* 54 */ + uint32_t gpwrdn; /* 58 */ + uint32_t gdfifocfg; /* 5c */ + uint32_t gadpctl; /* 60 */ + uint32_t grefclk; /* 64 */ + uint32_t gintmsk2; /* 68 */ + uint32_t gintsts2; /* 6c */ + }; + }; + + union { +#define DWC2_FSZREG_SIZE 0x04 + uint32_t fszreg[DWC2_FSZREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t hptxfsiz; /* 100 */ + }; + }; + + union { +#define DWC2_HREG0_SIZE 0x44 + uint32_t hreg0[DWC2_HREG0_SIZE / sizeof(uint32_t)]; + struct { + uint32_t hcfg; /* 400 */ + uint32_t hfir; /* 404 */ + uint32_t hfnum; /* 408 */ + uint32_t rsvd0; /* 40c */ + uint32_t hptxsts; /* 410 */ + uint32_t haint; /* 414 */ + uint32_t haintmsk; /* 418 */ + uint32_t hflbaddr; /* 41c */ + uint32_t rsvd1[8]; /* 420-43c */ + uint32_t hprt0; /* 440 */ + }; + }; + +#define DWC2_HREG1_SIZE (0x20 * DWC2_NB_CHAN) + uint32_t hreg1[DWC2_HREG1_SIZE / sizeof(uint32_t)]; + +#define hcchar(_ch) hreg1[((_ch) << 3) + 0] /* 500, 520, ... */ +#define hcsplt(_ch) hreg1[((_ch) << 3) + 1] /* 504, 524, ... */ +#define hcint(_ch) hreg1[((_ch) << 3) + 2] /* 508, 528, ... */ +#define hcintmsk(_ch) hreg1[((_ch) << 3) + 3] /* 50c, 52c, ... */ +#define hctsiz(_ch) hreg1[((_ch) << 3) + 4] /* 510, 530, ... */ +#define hcdma(_ch) hreg1[((_ch) << 3) + 5] /* 514, 534, ... */ +#define hcdmab(_ch) hreg1[((_ch) << 3) + 7] /* 51c, 53c, ... */ + + union { +#define DWC2_PCGREG_SIZE 0x08 + uint32_t pcgreg[DWC2_PCGREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t pcgctl; /* e00 */ + uint32_t pcgcctl1; /* e04 */ + }; + }; + + /* TODO - implement FIFO registers for slave mode */ +#define DWC2_HFIFO_SIZE (0x1000 * DWC2_NB_CHAN) + + /* + * Internal state + */ + QEMUTimer *eof_timer; + QEMUTimer *frame_timer; + QEMUBH *async_bh; + int64_t sof_time; + int64_t usb_frame_time; + int64_t usb_bit_time; + uint32_t usb_version; + uint16_t frame_number; + uint16_t fi; + uint16_t next_chan; + bool working; + USBPort uport; + DWC2Packet packet[DWC2_NB_CHAN]; /* one packet per chan */ + uint8_t usb_buf[DWC2_NB_CHAN][DWC2_MAX_XFER_SIZE]; /* one buffer per chan */ +}; + +struct DWC2Class { + /*< private >*/ + SysBusDeviceClass parent_class; + ResettablePhases parent_phases; + + /*< public >*/ +}; + +#define TYPE_DWC2_USB "dwc2-usb" +#define DWC2_USB(obj) \ + OBJECT_CHECK(DWC2State, (obj), TYPE_DWC2_USB) +#define DWC2_CLASS(klass) \ + OBJECT_CLASS_CHECK(DWC2Class, (klass), TYPE_DWC2_USB) +#define DWC2_GET_CLASS(obj) \ + OBJECT_GET_CLASS(DWC2Class, (obj), TYPE_DWC2_USB) + +#endif diff --git a/hw/usb/trace-events b/hw/usb/trace-events index 1c24d82..5817ce4 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -176,6 +176,56 @@ usb_xhci_xfer_error(void *xfer, uint32_t ret) "%p: ret %d" usb_xhci_unimplemented(const char *item, int nr) "%s (0x%x)" usb_xhci_enforced_limit(const char *item) "%s" +# hcd-dwc2.c +usb_dwc2_update_irq(uint32_t level) "level=%d" +usb_dwc2_raise_global_irq(uint32_t intr) "0x%08x" +usb_dwc2_lower_global_irq(uint32_t intr) "0x%08x" +usb_dwc2_raise_host_irq(uint32_t intr) "0x%04x" +usb_dwc2_lower_host_irq(uint32_t intr) "0x%04x" +usb_dwc2_sof(int64_t next) "next SOF %" PRId64 +usb_dwc2_bus_start(void) "start SOFs" +usb_dwc2_bus_stop(void) "stop SOFs" +usb_dwc2_find_device(uint8_t addr) "%d" +usb_dwc2_port_disabled(uint32_t pnum) "port %d disabled" +usb_dwc2_device_found(uint32_t pnum) "device found on port %d" +usb_dwc2_device_not_found(void) "device not found" +usb_dwc2_handle_packet(uint32_t chan, void *dev, void *pkt, uint32_t ep, const char *type, const char *dir, uint32_t mps, uint32_t len, uint32_t pcnt) "ch %d dev %p pkt %p ep %d type %s dir %s mps %d len %d pcnt %d" +usb_dwc2_memory_read(uint32_t addr, uint32_t len) "addr %d len %d" +usb_dwc2_packet_status(const char *status, uint32_t len) "status %s len %d" +usb_dwc2_packet_error(const char *status) "ERROR %s" +usb_dwc2_async_packet(void *pkt, uint32_t chan, void *dev, uint32_t ep, const char *dir, uint32_t len) "pkt %p ch %d dev %p ep %d %s len %d" +usb_dwc2_memory_write(uint32_t addr, uint32_t len) "addr %d len %d" +usb_dwc2_packet_done(const char *status, uint32_t actual, uint32_t len, uint32_t pcnt) "status %s actual %d len %d pcnt %d" +usb_dwc2_packet_next(const char *status, uint32_t len, uint32_t pcnt) "status %s len %d pcnt %d" +usb_dwc2_attach(void *port) "port %p" +usb_dwc2_attach_speed(const char *speed) "%s-speed device attached" +usb_dwc2_detach(void *port) "port %p" +usb_dwc2_child_detach(void *port, void *child) "port %p child %p" +usb_dwc2_wakeup(void *port) "port %p" +usb_dwc2_async_packet_complete(void *port, void *pkt, uint32_t chan, void *dev, uint32_t ep, const char *dir, uint32_t len) "port %p packet %p ch %d dev %p ep %d %s len %d" +usb_dwc2_work_bh(void) "" +usb_dwc2_work_bh_service(uint32_t first, uint32_t current, void *dev, uint32_t ep) "first %d servicing %d dev %p ep %d" +usb_dwc2_work_bh_next(uint32_t chan) "next %d" +usb_dwc2_enable_chan(uint32_t chan, void *dev, void *pkt, uint32_t ep) "ch %d dev %p pkt %p ep %d" +usb_dwc2_glbreg_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_glbreg_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_fszreg_read(uint64_t addr, uint32_t val) " 0x%04" PRIx64 " HPTXFSIZ val 0x%08x" +usb_dwc2_fszreg_write(uint64_t addr, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " HPTXFSIZ val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg0_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_hreg0_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg1_read(uint64_t addr, const char *reg, uint64_t chan, uint32_t val) " 0x%04" PRIx64 " %s%" PRId64 " val 0x%08x" +usb_dwc2_hreg1_write(uint64_t addr, const char *reg, uint64_t chan, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " %s%" PRId64 " val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_pcgreg_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_pcgreg_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg2_read(uint64_t addr, uint64_t fifo, uint32_t val) " 0x%04" PRIx64 " FIFO%" PRId64 " val 0x%08x" +usb_dwc2_hreg2_write(uint64_t addr, uint64_t fifo, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " FIFO%" PRId64 " val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg0_action(const char *s) "%s" +usb_dwc2_wakeup_endpoint(void *ep, uint32_t stream) "endp %p stream %d" +usb_dwc2_work_timer(void) "" +usb_dwc2_reset_enter(void) "=== RESET enter ===" +usb_dwc2_reset_hold(void) "=== RESET hold ===" +usb_dwc2_reset_exit(void) "=== RESET exit ===" + # desc.c usb_desc_device(int addr, int len, int ret) "dev %d query device, len %d, ret %d" usb_desc_device_qualifier(int addr, int len, int ret) "dev %d query device qualifier, len %d, ret %d" diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 2e8655a..48a0ad1 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -21,11 +21,13 @@ #include "hw/misc/bcm2835_property.h" #include "hw/misc/bcm2835_rng.h" #include "hw/misc/bcm2835_mbox.h" +#include "hw/misc/bcm2835_mphi.h" #include "hw/misc/bcm2835_thermal.h" #include "hw/sd/sdhci.h" #include "hw/sd/bcm2835_sdhost.h" #include "hw/gpio/bcm2835_gpio.h" #include "hw/timer/bcm2835_systmr.h" +#include "hw/usb/hcd-dwc2.h" #include "hw/misc/unimp.h" #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" @@ -42,6 +44,7 @@ typedef struct BCM2835PeripheralState { qemu_irq irq, fiq; BCM2835SystemTimerState systmr; + BCM2835MphiState mphi; UnimplementedDeviceState armtmr; UnimplementedDeviceState cprman; UnimplementedDeviceState a2w; @@ -65,7 +68,7 @@ typedef struct BCM2835PeripheralState { UnimplementedDeviceState ave0; UnimplementedDeviceState bscsl; UnimplementedDeviceState smi; - UnimplementedDeviceState dwc2; + DWC2State dwc2; UnimplementedDeviceState sdramc; } BCM2835PeripheralState; diff --git a/include/hw/misc/bcm2835_mphi.h b/include/hw/misc/bcm2835_mphi.h new file mode 100644 index 0000000..e084314 --- /dev/null +++ b/include/hw/misc/bcm2835_mphi.h @@ -0,0 +1,44 @@ +/* + * BCM2835 SOC MPHI state definitions + * + * Copyright (c) 2020 Paul Zimmerman <pauldzim@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef HW_MISC_BCM2835_MPHI_H +#define HW_MISC_BCM2835_MPHI_H + +#include "hw/irq.h" +#include "hw/sysbus.h" + +#define MPHI_MMIO_SIZE 0x1000 + +typedef struct BCM2835MphiState BCM2835MphiState; + +struct BCM2835MphiState { + SysBusDevice parent_obj; + qemu_irq irq; + MemoryRegion iomem; + + uint32_t outdda; + uint32_t outddb; + uint32_t ctrl; + uint32_t intstat; + uint32_t swirq; +}; + +#define TYPE_BCM2835_MPHI "bcm2835-mphi" + +#define BCM2835_MPHI(obj) \ + OBJECT_CHECK(BCM2835MphiState, (obj), TYPE_BCM2835_MPHI) + +#endif diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h new file mode 100644 index 0000000..40af23a --- /dev/null +++ b/include/hw/usb/dwc2-regs.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Imported from the Linux kernel file drivers/usb/dwc2/hw.h, commit + * a89bae709b3492b478480a2c9734e7e9393b279c ("usb: dwc2: Move + * UTMI_PHY_DATA defines closer") + * + * hw.h - DesignWare HS OTG Controller hardware definitions + * + * Copyright 2004-2013 Synopsys, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the above-listed copyright holders may not be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DWC2_HW_H__ +#define __DWC2_HW_H__ + +#define HSOTG_REG(x) (x) + +#define GOTGCTL HSOTG_REG(0x000) +#define GOTGCTL_CHIRPEN BIT(27) +#define GOTGCTL_MULT_VALID_BC_MASK (0x1f << 22) +#define GOTGCTL_MULT_VALID_BC_SHIFT 22 +#define GOTGCTL_OTGVER BIT(20) +#define GOTGCTL_BSESVLD BIT(19) +#define GOTGCTL_ASESVLD BIT(18) +#define GOTGCTL_DBNC_SHORT BIT(17) +#define GOTGCTL_CONID_B BIT(16) +#define GOTGCTL_DBNCE_FLTR_BYPASS BIT(15) +#define GOTGCTL_DEVHNPEN BIT(11) +#define GOTGCTL_HSTSETHNPEN BIT(10) +#define GOTGCTL_HNPREQ BIT(9) +#define GOTGCTL_HSTNEGSCS BIT(8) +#define GOTGCTL_SESREQ BIT(1) +#define GOTGCTL_SESREQSCS BIT(0) + +#define GOTGINT HSOTG_REG(0x004) +#define GOTGINT_DBNCE_DONE BIT(19) +#define GOTGINT_A_DEV_TOUT_CHG BIT(18) +#define GOTGINT_HST_NEG_DET BIT(17) +#define GOTGINT_HST_NEG_SUC_STS_CHNG BIT(9) +#define GOTGINT_SES_REQ_SUC_STS_CHNG BIT(8) +#define GOTGINT_SES_END_DET BIT(2) + +#define GAHBCFG HSOTG_REG(0x008) +#define GAHBCFG_AHB_SINGLE BIT(23) +#define GAHBCFG_NOTI_ALL_DMA_WRIT BIT(22) +#define GAHBCFG_REM_MEM_SUPP BIT(21) +#define GAHBCFG_P_TXF_EMP_LVL BIT(8) +#define GAHBCFG_NP_TXF_EMP_LVL BIT(7) +#define GAHBCFG_DMA_EN BIT(5) +#define GAHBCFG_HBSTLEN_MASK (0xf << 1) +#define GAHBCFG_HBSTLEN_SHIFT 1 +#define GAHBCFG_HBSTLEN_SINGLE 0 +#define GAHBCFG_HBSTLEN_INCR 1 +#define GAHBCFG_HBSTLEN_INCR4 3 +#define GAHBCFG_HBSTLEN_INCR8 5 +#define GAHBCFG_HBSTLEN_INCR16 7 +#define GAHBCFG_GLBL_INTR_EN BIT(0) +#define GAHBCFG_CTRL_MASK (GAHBCFG_P_TXF_EMP_LVL | \ + GAHBCFG_NP_TXF_EMP_LVL | \ + GAHBCFG_DMA_EN | \ + GAHBCFG_GLBL_INTR_EN) + +#define GUSBCFG HSOTG_REG(0x00C) +#define GUSBCFG_FORCEDEVMODE BIT(30) +#define GUSBCFG_FORCEHOSTMODE BIT(29) +#define GUSBCFG_TXENDDELAY BIT(28) +#define GUSBCFG_ICTRAFFICPULLREMOVE BIT(27) +#define GUSBCFG_ICUSBCAP BIT(26) +#define GUSBCFG_ULPI_INT_PROT_DIS BIT(25) +#define GUSBCFG_INDICATORPASSTHROUGH BIT(24) +#define GUSBCFG_INDICATORCOMPLEMENT BIT(23) +#define GUSBCFG_TERMSELDLPULSE BIT(22) +#define GUSBCFG_ULPI_INT_VBUS_IND BIT(21) +#define GUSBCFG_ULPI_EXT_VBUS_DRV BIT(20) +#define GUSBCFG_ULPI_CLK_SUSP_M BIT(19) +#define GUSBCFG_ULPI_AUTO_RES BIT(18) +#define GUSBCFG_ULPI_FS_LS BIT(17) +#define GUSBCFG_OTG_UTMI_FS_SEL BIT(16) +#define GUSBCFG_PHY_LP_CLK_SEL BIT(15) +#define GUSBCFG_USBTRDTIM_MASK (0xf << 10) +#define GUSBCFG_USBTRDTIM_SHIFT 10 +#define GUSBCFG_HNPCAP BIT(9) +#define GUSBCFG_SRPCAP BIT(8) +#define GUSBCFG_DDRSEL BIT(7) +#define GUSBCFG_PHYSEL BIT(6) +#define GUSBCFG_FSINTF BIT(5) +#define GUSBCFG_ULPI_UTMI_SEL BIT(4) +#define GUSBCFG_PHYIF16 BIT(3) +#define GUSBCFG_PHYIF8 (0 << 3) +#define GUSBCFG_TOUTCAL_MASK (0x7 << 0) +#define GUSBCFG_TOUTCAL_SHIFT 0 +#define GUSBCFG_TOUTCAL_LIMIT 0x7 +#define GUSBCFG_TOUTCAL(_x) ((_x) << 0) + +#define GRSTCTL HSOTG_REG(0x010) +#define GRSTCTL_AHBIDLE BIT(31) +#define GRSTCTL_DMAREQ BIT(30) +#define GRSTCTL_TXFNUM_MASK (0x1f << 6) +#define GRSTCTL_TXFNUM_SHIFT 6 +#define GRSTCTL_TXFNUM_LIMIT 0x1f +#define GRSTCTL_TXFNUM(_x) ((_x) << 6) +#define GRSTCTL_TXFFLSH BIT(5) +#define GRSTCTL_RXFFLSH BIT(4) +#define GRSTCTL_IN_TKNQ_FLSH BIT(3) +#define GRSTCTL_FRMCNTRRST BIT(2) +#define GRSTCTL_HSFTRST BIT(1) +#define GRSTCTL_CSFTRST BIT(0) + +#define GINTSTS HSOTG_REG(0x014) +#define GINTMSK HSOTG_REG(0x018) +#define GINTSTS_WKUPINT BIT(31) +#define GINTSTS_SESSREQINT BIT(30) +#define GINTSTS_DISCONNINT BIT(29) +#define GINTSTS_CONIDSTSCHNG BIT(28) +#define GINTSTS_LPMTRANRCVD BIT(27) +#define GINTSTS_PTXFEMP BIT(26) +#define GINTSTS_HCHINT BIT(25) +#define GINTSTS_PRTINT BIT(24) +#define GINTSTS_RESETDET BIT(23) +#define GINTSTS_FET_SUSP BIT(22) +#define GINTSTS_INCOMPL_IP BIT(21) +#define GINTSTS_INCOMPL_SOOUT BIT(21) +#define GINTSTS_INCOMPL_SOIN BIT(20) +#define GINTSTS_OEPINT BIT(19) +#define GINTSTS_IEPINT BIT(18) +#define GINTSTS_EPMIS BIT(17) +#define GINTSTS_RESTOREDONE BIT(16) +#define GINTSTS_EOPF BIT(15) +#define GINTSTS_ISOUTDROP BIT(14) +#define GINTSTS_ENUMDONE BIT(13) +#define GINTSTS_USBRST BIT(12) +#define GINTSTS_USBSUSP BIT(11) +#define GINTSTS_ERLYSUSP BIT(10) +#define GINTSTS_I2CINT BIT(9) +#define GINTSTS_ULPI_CK_INT BIT(8) +#define GINTSTS_GOUTNAKEFF BIT(7) +#define GINTSTS_GINNAKEFF BIT(6) +#define GINTSTS_NPTXFEMP BIT(5) +#define GINTSTS_RXFLVL BIT(4) +#define GINTSTS_SOF BIT(3) +#define GINTSTS_OTGINT BIT(2) +#define GINTSTS_MODEMIS BIT(1) +#define GINTSTS_CURMODE_HOST BIT(0) + +#define GRXSTSR HSOTG_REG(0x01C) +#define GRXSTSP HSOTG_REG(0x020) +#define GRXSTS_FN_MASK (0x7f << 25) +#define GRXSTS_FN_SHIFT 25 +#define GRXSTS_PKTSTS_MASK (0xf << 17) +#define GRXSTS_PKTSTS_SHIFT 17 +#define GRXSTS_PKTSTS_GLOBALOUTNAK 1 +#define GRXSTS_PKTSTS_OUTRX 2 +#define GRXSTS_PKTSTS_HCHIN 2 +#define GRXSTS_PKTSTS_OUTDONE 3 +#define GRXSTS_PKTSTS_HCHIN_XFER_COMP 3 +#define GRXSTS_PKTSTS_SETUPDONE 4 +#define GRXSTS_PKTSTS_DATATOGGLEERR 5 +#define GRXSTS_PKTSTS_SETUPRX 6 +#define GRXSTS_PKTSTS_HCHHALTED 7 +#define GRXSTS_HCHNUM_MASK (0xf << 0) +#define GRXSTS_HCHNUM_SHIFT 0 +#define GRXSTS_DPID_MASK (0x3 << 15) +#define GRXSTS_DPID_SHIFT 15 +#define GRXSTS_BYTECNT_MASK (0x7ff << 4) +#define GRXSTS_BYTECNT_SHIFT 4 +#define GRXSTS_EPNUM_MASK (0xf << 0) +#define GRXSTS_EPNUM_SHIFT 0 + +#define GRXFSIZ HSOTG_REG(0x024) +#define GRXFSIZ_DEPTH_MASK (0xffff << 0) +#define GRXFSIZ_DEPTH_SHIFT 0 + +#define GNPTXFSIZ HSOTG_REG(0x028) +/* Use FIFOSIZE_* constants to access this register */ + +#define GNPTXSTS HSOTG_REG(0x02C) +#define GNPTXSTS_NP_TXQ_TOP_MASK (0x7f << 24) +#define GNPTXSTS_NP_TXQ_TOP_SHIFT 24 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_MASK (0xff << 16) +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_SHIFT 16 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_GET(_v) (((_v) >> 16) & 0xff) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_MASK (0xffff << 0) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_SHIFT 0 +#define GNPTXSTS_NP_TXF_SPC_AVAIL_GET(_v) (((_v) >> 0) & 0xffff) + +#define GI2CCTL HSOTG_REG(0x0030) +#define GI2CCTL_BSYDNE BIT(31) +#define GI2CCTL_RW BIT(30) +#define GI2CCTL_I2CDATSE0 BIT(28) +#define GI2CCTL_I2CDEVADDR_MASK (0x3 << 26) +#define GI2CCTL_I2CDEVADDR_SHIFT 26 +#define GI2CCTL_I2CSUSPCTL BIT(25) +#define GI2CCTL_ACK BIT(24) +#define GI2CCTL_I2CEN BIT(23) +#define GI2CCTL_ADDR_MASK (0x7f << 16) +#define GI2CCTL_ADDR_SHIFT 16 +#define GI2CCTL_REGADDR_MASK (0xff << 8) +#define GI2CCTL_REGADDR_SHIFT 8 +#define GI2CCTL_RWDATA_MASK (0xff << 0) +#define GI2CCTL_RWDATA_SHIFT 0 + +#define GPVNDCTL HSOTG_REG(0x0034) +#define GGPIO HSOTG_REG(0x0038) +#define GGPIO_STM32_OTG_GCCFG_PWRDWN BIT(16) + +#define GUID HSOTG_REG(0x003c) +#define GSNPSID HSOTG_REG(0x0040) +#define GHWCFG1 HSOTG_REG(0x0044) +#define GSNPSID_ID_MASK GENMASK(31, 16) + +#define GHWCFG2 HSOTG_REG(0x0048) +#define GHWCFG2_OTG_ENABLE_IC_USB BIT(31) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_MASK (0x1f << 26) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT 26 +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_MASK (0x3 << 24) +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT 24 +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_MASK (0x3 << 22) +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT 22 +#define GHWCFG2_MULTI_PROC_INT BIT(20) +#define GHWCFG2_DYNAMIC_FIFO BIT(19) +#define GHWCFG2_PERIO_EP_SUPPORTED BIT(18) +#define GHWCFG2_NUM_HOST_CHAN_MASK (0xf << 14) +#define GHWCFG2_NUM_HOST_CHAN_SHIFT 14 +#define GHWCFG2_NUM_DEV_EP_MASK (0xf << 10) +#define GHWCFG2_NUM_DEV_EP_SHIFT 10 +#define GHWCFG2_FS_PHY_TYPE_MASK (0x3 << 8) +#define GHWCFG2_FS_PHY_TYPE_SHIFT 8 +#define GHWCFG2_FS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_FS_PHY_TYPE_DEDICATED 1 +#define GHWCFG2_FS_PHY_TYPE_SHARED_UTMI 2 +#define GHWCFG2_FS_PHY_TYPE_SHARED_ULPI 3 +#define GHWCFG2_HS_PHY_TYPE_MASK (0x3 << 6) +#define GHWCFG2_HS_PHY_TYPE_SHIFT 6 +#define GHWCFG2_HS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_HS_PHY_TYPE_UTMI 1 +#define GHWCFG2_HS_PHY_TYPE_ULPI 2 +#define GHWCFG2_HS_PHY_TYPE_UTMI_ULPI 3 +#define GHWCFG2_POINT2POINT BIT(5) +#define GHWCFG2_ARCHITECTURE_MASK (0x3 << 3) +#define GHWCFG2_ARCHITECTURE_SHIFT 3 +#define GHWCFG2_SLAVE_ONLY_ARCH 0 +#define GHWCFG2_EXT_DMA_ARCH 1 +#define GHWCFG2_INT_DMA_ARCH 2 +#define GHWCFG2_OP_MODE_MASK (0x7 << 0) +#define GHWCFG2_OP_MODE_SHIFT 0 +#define GHWCFG2_OP_MODE_HNP_SRP_CAPABLE 0 +#define GHWCFG2_OP_MODE_SRP_ONLY_CAPABLE 1 +#define GHWCFG2_OP_MODE_NO_HNP_SRP_CAPABLE 2 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_DEVICE 3 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_DEVICE 4 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_HOST 5 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST 6 +#define GHWCFG2_OP_MODE_UNDEFINED 7 + +#define GHWCFG3 HSOTG_REG(0x004c) +#define GHWCFG3_DFIFO_DEPTH_MASK (0xffff << 16) +#define GHWCFG3_DFIFO_DEPTH_SHIFT 16 +#define GHWCFG3_OTG_LPM_EN BIT(15) +#define GHWCFG3_BC_SUPPORT BIT(14) +#define GHWCFG3_OTG_ENABLE_HSIC BIT(13) +#define GHWCFG3_ADP_SUPP BIT(12) +#define GHWCFG3_SYNCH_RESET_TYPE BIT(11) +#define GHWCFG3_OPTIONAL_FEATURES BIT(10) +#define GHWCFG3_VENDOR_CTRL_IF BIT(9) +#define GHWCFG3_I2C BIT(8) +#define GHWCFG3_OTG_FUNC BIT(7) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_MASK (0x7 << 4) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT 4 +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_MASK (0xf << 0) +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT 0 + +#define GHWCFG4 HSOTG_REG(0x0050) +#define GHWCFG4_DESC_DMA_DYN BIT(31) +#define GHWCFG4_DESC_DMA BIT(30) +#define GHWCFG4_NUM_IN_EPS_MASK (0xf << 26) +#define GHWCFG4_NUM_IN_EPS_SHIFT 26 +#define GHWCFG4_DED_FIFO_EN BIT(25) +#define GHWCFG4_DED_FIFO_SHIFT 25 +#define GHWCFG4_SESSION_END_FILT_EN BIT(24) +#define GHWCFG4_B_VALID_FILT_EN BIT(23) +#define GHWCFG4_A_VALID_FILT_EN BIT(22) +#define GHWCFG4_VBUS_VALID_FILT_EN BIT(21) +#define GHWCFG4_IDDIG_FILT_EN BIT(20) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_MASK (0xf << 16) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_SHIFT 16 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK (0x3 << 14) +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT 14 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8 0 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_16 1 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16 2 +#define GHWCFG4_ACG_SUPPORTED BIT(12) +#define GHWCFG4_IPG_ISOC_SUPPORTED BIT(11) +#define GHWCFG4_SERVICE_INTERVAL_SUPPORTED BIT(10) +#define GHWCFG4_XHIBER BIT(7) +#define GHWCFG4_HIBER BIT(6) +#define GHWCFG4_MIN_AHB_FREQ BIT(5) +#define GHWCFG4_POWER_OPTIMIZ BIT(4) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK (0xf << 0) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT 0 + +#define GLPMCFG HSOTG_REG(0x0054) +#define GLPMCFG_INVSELHSIC BIT(31) +#define GLPMCFG_HSICCON BIT(30) +#define GLPMCFG_RSTRSLPSTS BIT(29) +#define GLPMCFG_ENBESL BIT(28) +#define GLPMCFG_LPM_RETRYCNT_STS_MASK (0x7 << 25) +#define GLPMCFG_LPM_RETRYCNT_STS_SHIFT 25 +#define GLPMCFG_SNDLPM BIT(24) +#define GLPMCFG_RETRY_CNT_MASK (0x7 << 21) +#define GLPMCFG_RETRY_CNT_SHIFT 21 +#define GLPMCFG_LPM_REJECT_CTRL_CONTROL BIT(21) +#define GLPMCFG_LPM_ACCEPT_CTRL_ISOC BIT(22) +#define GLPMCFG_LPM_CHNL_INDX_MASK (0xf << 17) +#define GLPMCFG_LPM_CHNL_INDX_SHIFT 17 +#define GLPMCFG_L1RESUMEOK BIT(16) +#define GLPMCFG_SLPSTS BIT(15) +#define GLPMCFG_COREL1RES_MASK (0x3 << 13) +#define GLPMCFG_COREL1RES_SHIFT 13 +#define GLPMCFG_HIRD_THRES_MASK (0x1f << 8) +#define GLPMCFG_HIRD_THRES_SHIFT 8 +#define GLPMCFG_HIRD_THRES_EN (0x10 << 8) +#define GLPMCFG_ENBLSLPM BIT(7) +#define GLPMCFG_BREMOTEWAKE BIT(6) +#define GLPMCFG_HIRD_MASK (0xf << 2) +#define GLPMCFG_HIRD_SHIFT 2 +#define GLPMCFG_APPL1RES BIT(1) +#define GLPMCFG_LPMCAP BIT(0) + +#define GPWRDN HSOTG_REG(0x0058) +#define GPWRDN_MULT_VAL_ID_BC_MASK (0x1f << 24) +#define GPWRDN_MULT_VAL_ID_BC_SHIFT 24 +#define GPWRDN_ADP_INT BIT(23) +#define GPWRDN_BSESSVLD BIT(22) +#define GPWRDN_IDSTS BIT(21) +#define GPWRDN_LINESTATE_MASK (0x3 << 19) +#define GPWRDN_LINESTATE_SHIFT 19 +#define GPWRDN_STS_CHGINT_MSK BIT(18) +#define GPWRDN_STS_CHGINT BIT(17) +#define GPWRDN_SRP_DET_MSK BIT(16) +#define GPWRDN_SRP_DET BIT(15) +#define GPWRDN_CONNECT_DET_MSK BIT(14) +#define GPWRDN_CONNECT_DET BIT(13) +#define GPWRDN_DISCONN_DET_MSK BIT(12) +#define GPWRDN_DISCONN_DET BIT(11) +#define GPWRDN_RST_DET_MSK BIT(10) +#define GPWRDN_RST_DET BIT(9) +#define GPWRDN_LNSTSCHG_MSK BIT(8) +#define GPWRDN_LNSTSCHG BIT(7) +#define GPWRDN_DIS_VBUS BIT(6) +#define GPWRDN_PWRDNSWTCH BIT(5) +#define GPWRDN_PWRDNRSTN BIT(4) +#define GPWRDN_PWRDNCLMP BIT(3) +#define GPWRDN_RESTORE BIT(2) +#define GPWRDN_PMUACTV BIT(1) +#define GPWRDN_PMUINTSEL BIT(0) + +#define GDFIFOCFG HSOTG_REG(0x005c) +#define GDFIFOCFG_EPINFOBASE_MASK (0xffff << 16) +#define GDFIFOCFG_EPINFOBASE_SHIFT 16 +#define GDFIFOCFG_GDFIFOCFG_MASK (0xffff << 0) +#define GDFIFOCFG_GDFIFOCFG_SHIFT 0 + +#define ADPCTL HSOTG_REG(0x0060) +#define ADPCTL_AR_MASK (0x3 << 27) +#define ADPCTL_AR_SHIFT 27 +#define ADPCTL_ADP_TMOUT_INT_MSK BIT(26) +#define ADPCTL_ADP_SNS_INT_MSK BIT(25) +#define ADPCTL_ADP_PRB_INT_MSK BIT(24) +#define ADPCTL_ADP_TMOUT_INT BIT(23) +#define ADPCTL_ADP_SNS_INT BIT(22) +#define ADPCTL_ADP_PRB_INT BIT(21) +#define ADPCTL_ADPENA BIT(20) +#define ADPCTL_ADPRES BIT(19) +#define ADPCTL_ENASNS BIT(18) +#define ADPCTL_ENAPRB BIT(17) +#define ADPCTL_RTIM_MASK (0x7ff << 6) +#define ADPCTL_RTIM_SHIFT 6 +#define ADPCTL_PRB_PER_MASK (0x3 << 4) +#define ADPCTL_PRB_PER_SHIFT 4 +#define ADPCTL_PRB_DELTA_MASK (0x3 << 2) +#define ADPCTL_PRB_DELTA_SHIFT 2 +#define ADPCTL_PRB_DSCHRG_MASK (0x3 << 0) +#define ADPCTL_PRB_DSCHRG_SHIFT 0 + +#define GREFCLK HSOTG_REG(0x0064) +#define GREFCLK_REFCLKPER_MASK (0x1ffff << 15) +#define GREFCLK_REFCLKPER_SHIFT 15 +#define GREFCLK_REF_CLK_MODE BIT(14) +#define GREFCLK_SOF_CNT_WKUP_ALERT_MASK (0x3ff) +#define GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT 0 + +#define GINTMSK2 HSOTG_REG(0x0068) +#define GINTMSK2_WKUP_ALERT_INT_MSK BIT(0) + +#define GINTSTS2 HSOTG_REG(0x006c) +#define GINTSTS2_WKUP_ALERT_INT BIT(0) + +#define HPTXFSIZ HSOTG_REG(0x100) +/* Use FIFOSIZE_* constants to access this register */ + +#define DPTXFSIZN(_a) HSOTG_REG(0x104 + (((_a) - 1) * 4)) +/* Use FIFOSIZE_* constants to access this register */ + +/* These apply to the GNPTXFSIZ, HPTXFSIZ and DPTXFSIZN registers */ +#define FIFOSIZE_DEPTH_MASK (0xffff << 16) +#define FIFOSIZE_DEPTH_SHIFT 16 +#define FIFOSIZE_STARTADDR_MASK (0xffff << 0) +#define FIFOSIZE_STARTADDR_SHIFT 0 +#define FIFOSIZE_DEPTH_GET(_x) (((_x) >> 16) & 0xffff) + +/* Device mode registers */ + +#define DCFG HSOTG_REG(0x800) +#define DCFG_DESCDMA_EN BIT(23) +#define DCFG_EPMISCNT_MASK (0x1f << 18) +#define DCFG_EPMISCNT_SHIFT 18 +#define DCFG_EPMISCNT_LIMIT 0x1f +#define DCFG_EPMISCNT(_x) ((_x) << 18) +#define DCFG_IPG_ISOC_SUPPORDED BIT(17) +#define DCFG_PERFRINT_MASK (0x3 << 11) +#define DCFG_PERFRINT_SHIFT 11 +#define DCFG_PERFRINT_LIMIT 0x3 +#define DCFG_PERFRINT(_x) ((_x) << 11) +#define DCFG_DEVADDR_MASK (0x7f << 4) +#define DCFG_DEVADDR_SHIFT 4 +#define DCFG_DEVADDR_LIMIT 0x7f +#define DCFG_DEVADDR(_x) ((_x) << 4) +#define DCFG_NZ_STS_OUT_HSHK BIT(2) +#define DCFG_DEVSPD_MASK (0x3 << 0) +#define DCFG_DEVSPD_SHIFT 0 +#define DCFG_DEVSPD_HS 0 +#define DCFG_DEVSPD_FS 1 +#define DCFG_DEVSPD_LS 2 +#define DCFG_DEVSPD_FS48 3 + +#define DCTL HSOTG_REG(0x804) +#define DCTL_SERVICE_INTERVAL_SUPPORTED BIT(19) +#define DCTL_PWRONPRGDONE BIT(11) +#define DCTL_CGOUTNAK BIT(10) +#define DCTL_SGOUTNAK BIT(9) +#define DCTL_CGNPINNAK BIT(8) +#define DCTL_SGNPINNAK BIT(7) +#define DCTL_TSTCTL_MASK (0x7 << 4) +#define DCTL_TSTCTL_SHIFT 4 +#define DCTL_GOUTNAKSTS BIT(3) +#define DCTL_GNPINNAKSTS BIT(2) +#define DCTL_SFTDISCON BIT(1) +#define DCTL_RMTWKUPSIG BIT(0) + +#define DSTS HSOTG_REG(0x808) +#define DSTS_SOFFN_MASK (0x3fff << 8) +#define DSTS_SOFFN_SHIFT 8 +#define DSTS_SOFFN_LIMIT 0x3fff +#define DSTS_SOFFN(_x) ((_x) << 8) +#define DSTS_ERRATICERR BIT(3) +#define DSTS_ENUMSPD_MASK (0x3 << 1) +#define DSTS_ENUMSPD_SHIFT 1 +#define DSTS_ENUMSPD_HS 0 +#define DSTS_ENUMSPD_FS 1 +#define DSTS_ENUMSPD_LS 2 +#define DSTS_ENUMSPD_FS48 3 +#define DSTS_SUSPSTS BIT(0) + +#define DIEPMSK HSOTG_REG(0x810) +#define DIEPMSK_NAKMSK BIT(13) +#define DIEPMSK_BNAININTRMSK BIT(9) +#define DIEPMSK_TXFIFOUNDRNMSK BIT(8) +#define DIEPMSK_TXFIFOEMPTY BIT(7) +#define DIEPMSK_INEPNAKEFFMSK BIT(6) +#define DIEPMSK_INTKNEPMISMSK BIT(5) +#define DIEPMSK_INTKNTXFEMPMSK BIT(4) +#define DIEPMSK_TIMEOUTMSK BIT(3) +#define DIEPMSK_AHBERRMSK BIT(2) +#define DIEPMSK_EPDISBLDMSK BIT(1) +#define DIEPMSK_XFERCOMPLMSK BIT(0) + +#define DOEPMSK HSOTG_REG(0x814) +#define DOEPMSK_BNAMSK BIT(9) +#define DOEPMSK_BACK2BACKSETUP BIT(6) +#define DOEPMSK_STSPHSERCVDMSK BIT(5) +#define DOEPMSK_OUTTKNEPDISMSK BIT(4) +#define DOEPMSK_SETUPMSK BIT(3) +#define DOEPMSK_AHBERRMSK BIT(2) +#define DOEPMSK_EPDISBLDMSK BIT(1) +#define DOEPMSK_XFERCOMPLMSK BIT(0) + +#define DAINT HSOTG_REG(0x818) +#define DAINTMSK HSOTG_REG(0x81C) +#define DAINT_OUTEP_SHIFT 16 +#define DAINT_OUTEP(_x) (1 << ((_x) + 16)) +#define DAINT_INEP(_x) (1 << (_x)) + +#define DTKNQR1 HSOTG_REG(0x820) +#define DTKNQR2 HSOTG_REG(0x824) +#define DTKNQR3 HSOTG_REG(0x830) +#define DTKNQR4 HSOTG_REG(0x834) +#define DIEPEMPMSK HSOTG_REG(0x834) + +#define DVBUSDIS HSOTG_REG(0x828) +#define DVBUSPULSE HSOTG_REG(0x82C) + +#define DIEPCTL0 HSOTG_REG(0x900) +#define DIEPCTL(_a) HSOTG_REG(0x900 + ((_a) * 0x20)) + +#define DOEPCTL0 HSOTG_REG(0xB00) +#define DOEPCTL(_a) HSOTG_REG(0xB00 + ((_a) * 0x20)) + +/* EP0 specialness: + * bits[29..28] - reserved (no SetD0PID, SetD1PID) + * bits[25..22] - should always be zero, this isn't a periodic endpoint + * bits[10..0] - MPS setting different for EP0 + */ +#define D0EPCTL_MPS_MASK (0x3 << 0) +#define D0EPCTL_MPS_SHIFT 0 +#define D0EPCTL_MPS_64 0 +#define D0EPCTL_MPS_32 1 +#define D0EPCTL_MPS_16 2 +#define D0EPCTL_MPS_8 3 + +#define DXEPCTL_EPENA BIT(31) +#define DXEPCTL_EPDIS BIT(30) +#define DXEPCTL_SETD1PID BIT(29) +#define DXEPCTL_SETODDFR BIT(29) +#define DXEPCTL_SETD0PID BIT(28) +#define DXEPCTL_SETEVENFR BIT(28) +#define DXEPCTL_SNAK BIT(27) +#define DXEPCTL_CNAK BIT(26) +#define DXEPCTL_TXFNUM_MASK (0xf << 22) +#define DXEPCTL_TXFNUM_SHIFT 22 +#define DXEPCTL_TXFNUM_LIMIT 0xf +#define DXEPCTL_TXFNUM(_x) ((_x) << 22) +#define DXEPCTL_STALL BIT(21) +#define DXEPCTL_SNP BIT(20) +#define DXEPCTL_EPTYPE_MASK (0x3 << 18) +#define DXEPCTL_EPTYPE_CONTROL (0x0 << 18) +#define DXEPCTL_EPTYPE_ISO (0x1 << 18) +#define DXEPCTL_EPTYPE_BULK (0x2 << 18) +#define DXEPCTL_EPTYPE_INTERRUPT (0x3 << 18) + +#define DXEPCTL_NAKSTS BIT(17) +#define DXEPCTL_DPID BIT(16) +#define DXEPCTL_EOFRNUM BIT(16) +#define DXEPCTL_USBACTEP BIT(15) +#define DXEPCTL_NEXTEP_MASK (0xf << 11) +#define DXEPCTL_NEXTEP_SHIFT 11 +#define DXEPCTL_NEXTEP_LIMIT 0xf +#define DXEPCTL_NEXTEP(_x) ((_x) << 11) +#define DXEPCTL_MPS_MASK (0x7ff << 0) +#define DXEPCTL_MPS_SHIFT 0 +#define DXEPCTL_MPS_LIMIT 0x7ff +#define DXEPCTL_MPS(_x) ((_x) << 0) + +#define DIEPINT(_a) HSOTG_REG(0x908 + ((_a) * 0x20)) +#define DOEPINT(_a) HSOTG_REG(0xB08 + ((_a) * 0x20)) +#define DXEPINT_SETUP_RCVD BIT(15) +#define DXEPINT_NYETINTRPT BIT(14) +#define DXEPINT_NAKINTRPT BIT(13) +#define DXEPINT_BBLEERRINTRPT BIT(12) +#define DXEPINT_PKTDRPSTS BIT(11) +#define DXEPINT_BNAINTR BIT(9) +#define DXEPINT_TXFIFOUNDRN BIT(8) +#define DXEPINT_OUTPKTERR BIT(8) +#define DXEPINT_TXFEMP BIT(7) +#define DXEPINT_INEPNAKEFF BIT(6) +#define DXEPINT_BACK2BACKSETUP BIT(6) +#define DXEPINT_INTKNEPMIS BIT(5) +#define DXEPINT_STSPHSERCVD BIT(5) +#define DXEPINT_INTKNTXFEMP BIT(4) +#define DXEPINT_OUTTKNEPDIS BIT(4) +#define DXEPINT_TIMEOUT BIT(3) +#define DXEPINT_SETUP BIT(3) +#define DXEPINT_AHBERR BIT(2) +#define DXEPINT_EPDISBLD BIT(1) +#define DXEPINT_XFERCOMPL BIT(0) + +#define DIEPTSIZ0 HSOTG_REG(0x910) +#define DIEPTSIZ0_PKTCNT_MASK (0x3 << 19) +#define DIEPTSIZ0_PKTCNT_SHIFT 19 +#define DIEPTSIZ0_PKTCNT_LIMIT 0x3 +#define DIEPTSIZ0_PKTCNT(_x) ((_x) << 19) +#define DIEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DIEPTSIZ0_XFERSIZE_SHIFT 0 +#define DIEPTSIZ0_XFERSIZE_LIMIT 0x7f +#define DIEPTSIZ0_XFERSIZE(_x) ((_x) << 0) + +#define DOEPTSIZ0 HSOTG_REG(0xB10) +#define DOEPTSIZ0_SUPCNT_MASK (0x3 << 29) +#define DOEPTSIZ0_SUPCNT_SHIFT 29 +#define DOEPTSIZ0_SUPCNT_LIMIT 0x3 +#define DOEPTSIZ0_SUPCNT(_x) ((_x) << 29) +#define DOEPTSIZ0_PKTCNT BIT(19) +#define DOEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DOEPTSIZ0_XFERSIZE_SHIFT 0 + +#define DIEPTSIZ(_a) HSOTG_REG(0x910 + ((_a) * 0x20)) +#define DOEPTSIZ(_a) HSOTG_REG(0xB10 + ((_a) * 0x20)) +#define DXEPTSIZ_MC_MASK (0x3 << 29) +#define DXEPTSIZ_MC_SHIFT 29 +#define DXEPTSIZ_MC_LIMIT 0x3 +#define DXEPTSIZ_MC(_x) ((_x) << 29) +#define DXEPTSIZ_PKTCNT_MASK (0x3ff << 19) +#define DXEPTSIZ_PKTCNT_SHIFT 19 +#define DXEPTSIZ_PKTCNT_LIMIT 0x3ff +#define DXEPTSIZ_PKTCNT_GET(_v) (((_v) >> 19) & 0x3ff) +#define DXEPTSIZ_PKTCNT(_x) ((_x) << 19) +#define DXEPTSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define DXEPTSIZ_XFERSIZE_SHIFT 0 +#define DXEPTSIZ_XFERSIZE_LIMIT 0x7ffff +#define DXEPTSIZ_XFERSIZE_GET(_v) (((_v) >> 0) & 0x7ffff) +#define DXEPTSIZ_XFERSIZE(_x) ((_x) << 0) + +#define DIEPDMA(_a) HSOTG_REG(0x914 + ((_a) * 0x20)) +#define DOEPDMA(_a) HSOTG_REG(0xB14 + ((_a) * 0x20)) + +#define DTXFSTS(_a) HSOTG_REG(0x918 + ((_a) * 0x20)) + +#define PCGCTL HSOTG_REG(0x0e00) +#define PCGCTL_IF_DEV_MODE BIT(31) +#define PCGCTL_P2HD_PRT_SPD_MASK (0x3 << 29) +#define PCGCTL_P2HD_PRT_SPD_SHIFT 29 +#define PCGCTL_P2HD_DEV_ENUM_SPD_MASK (0x3 << 27) +#define PCGCTL_P2HD_DEV_ENUM_SPD_SHIFT 27 +#define PCGCTL_MAC_DEV_ADDR_MASK (0x7f << 20) +#define PCGCTL_MAC_DEV_ADDR_SHIFT 20 +#define PCGCTL_MAX_TERMSEL BIT(19) +#define PCGCTL_MAX_XCVRSELECT_MASK (0x3 << 17) +#define PCGCTL_MAX_XCVRSELECT_SHIFT 17 +#define PCGCTL_PORT_POWER BIT(16) +#define PCGCTL_PRT_CLK_SEL_MASK (0x3 << 14) +#define PCGCTL_PRT_CLK_SEL_SHIFT 14 +#define PCGCTL_ESS_REG_RESTORED BIT(13) +#define PCGCTL_EXTND_HIBER_SWITCH BIT(12) +#define PCGCTL_EXTND_HIBER_PWRCLMP BIT(11) +#define PCGCTL_ENBL_EXTND_HIBER BIT(10) +#define PCGCTL_RESTOREMODE BIT(9) +#define PCGCTL_RESETAFTSUSP BIT(8) +#define PCGCTL_DEEP_SLEEP BIT(7) +#define PCGCTL_PHY_IN_SLEEP BIT(6) +#define PCGCTL_ENBL_SLEEP_GATING BIT(5) +#define PCGCTL_RSTPDWNMODULE BIT(3) +#define PCGCTL_PWRCLMP BIT(2) +#define PCGCTL_GATEHCLK BIT(1) +#define PCGCTL_STOPPCLK BIT(0) + +#define PCGCCTL1 HSOTG_REG(0xe04) +#define PCGCCTL1_TIMER (0x3 << 1) +#define PCGCCTL1_GATEEN BIT(0) + +#define EPFIFO(_a) HSOTG_REG(0x1000 + ((_a) * 0x1000)) + +/* Host Mode Registers */ + +#define HCFG HSOTG_REG(0x0400) +#define HCFG_MODECHTIMEN BIT(31) +#define HCFG_PERSCHEDENA BIT(26) +#define HCFG_FRLISTEN_MASK (0x3 << 24) +#define HCFG_FRLISTEN_SHIFT 24 +#define HCFG_FRLISTEN_8 (0 << 24) +#define FRLISTEN_8_SIZE 8 +#define HCFG_FRLISTEN_16 BIT(24) +#define FRLISTEN_16_SIZE 16 +#define HCFG_FRLISTEN_32 (2 << 24) +#define FRLISTEN_32_SIZE 32 +#define HCFG_FRLISTEN_64 (3 << 24) +#define FRLISTEN_64_SIZE 64 +#define HCFG_DESCDMA BIT(23) +#define HCFG_RESVALID_MASK (0xff << 8) +#define HCFG_RESVALID_SHIFT 8 +#define HCFG_ENA32KHZ BIT(7) +#define HCFG_FSLSSUPP BIT(2) +#define HCFG_FSLSPCLKSEL_MASK (0x3 << 0) +#define HCFG_FSLSPCLKSEL_SHIFT 0 +#define HCFG_FSLSPCLKSEL_30_60_MHZ 0 +#define HCFG_FSLSPCLKSEL_48_MHZ 1 +#define HCFG_FSLSPCLKSEL_6_MHZ 2 + +#define HFIR HSOTG_REG(0x0404) +#define HFIR_FRINT_MASK (0xffff << 0) +#define HFIR_FRINT_SHIFT 0 +#define HFIR_RLDCTRL BIT(16) + +#define HFNUM HSOTG_REG(0x0408) +#define HFNUM_FRREM_MASK (0xffff << 16) +#define HFNUM_FRREM_SHIFT 16 +#define HFNUM_FRNUM_MASK (0xffff << 0) +#define HFNUM_FRNUM_SHIFT 0 +#define HFNUM_MAX_FRNUM 0x3fff + +#define HPTXSTS HSOTG_REG(0x0410) +#define TXSTS_QTOP_ODD BIT(31) +#define TXSTS_QTOP_CHNEP_MASK (0xf << 27) +#define TXSTS_QTOP_CHNEP_SHIFT 27 +#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) +#define TXSTS_QTOP_TOKEN_SHIFT 25 +#define TXSTS_QTOP_TERMINATE BIT(24) +#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_SHIFT 16 +#define TXSTS_FSPCAVAIL_MASK (0xffff << 0) +#define TXSTS_FSPCAVAIL_SHIFT 0 + +#define HAINT HSOTG_REG(0x0414) +#define HAINTMSK HSOTG_REG(0x0418) +#define HFLBADDR HSOTG_REG(0x041c) + +#define HPRT0 HSOTG_REG(0x0440) +#define HPRT0_SPD_MASK (0x3 << 17) +#define HPRT0_SPD_SHIFT 17 +#define HPRT0_SPD_HIGH_SPEED 0 +#define HPRT0_SPD_FULL_SPEED 1 +#define HPRT0_SPD_LOW_SPEED 2 +#define HPRT0_TSTCTL_MASK (0xf << 13) +#define HPRT0_TSTCTL_SHIFT 13 +#define HPRT0_PWR BIT(12) +#define HPRT0_LNSTS_MASK (0x3 << 10) +#define HPRT0_LNSTS_SHIFT 10 +#define HPRT0_RST BIT(8) +#define HPRT0_SUSP BIT(7) +#define HPRT0_RES BIT(6) +#define HPRT0_OVRCURRCHG BIT(5) +#define HPRT0_OVRCURRACT BIT(4) +#define HPRT0_ENACHG BIT(3) +#define HPRT0_ENA BIT(2) +#define HPRT0_CONNDET BIT(1) +#define HPRT0_CONNSTS BIT(0) + +#define HCCHAR(_ch) HSOTG_REG(0x0500 + 0x20 * (_ch)) +#define HCCHAR_CHENA BIT(31) +#define HCCHAR_CHDIS BIT(30) +#define HCCHAR_ODDFRM BIT(29) +#define HCCHAR_DEVADDR_MASK (0x7f << 22) +#define HCCHAR_DEVADDR_SHIFT 22 +#define HCCHAR_MULTICNT_MASK (0x3 << 20) +#define HCCHAR_MULTICNT_SHIFT 20 +#define HCCHAR_EPTYPE_MASK (0x3 << 18) +#define HCCHAR_EPTYPE_SHIFT 18 +#define HCCHAR_LSPDDEV BIT(17) +#define HCCHAR_EPDIR BIT(15) +#define HCCHAR_EPNUM_MASK (0xf << 11) +#define HCCHAR_EPNUM_SHIFT 11 +#define HCCHAR_MPS_MASK (0x7ff << 0) +#define HCCHAR_MPS_SHIFT 0 + +#define HCSPLT(_ch) HSOTG_REG(0x0504 + 0x20 * (_ch)) +#define HCSPLT_SPLTENA BIT(31) +#define HCSPLT_COMPSPLT BIT(16) +#define HCSPLT_XACTPOS_MASK (0x3 << 14) +#define HCSPLT_XACTPOS_SHIFT 14 +#define HCSPLT_XACTPOS_MID 0 +#define HCSPLT_XACTPOS_END 1 +#define HCSPLT_XACTPOS_BEGIN 2 +#define HCSPLT_XACTPOS_ALL 3 +#define HCSPLT_HUBADDR_MASK (0x7f << 7) +#define HCSPLT_HUBADDR_SHIFT 7 +#define HCSPLT_PRTADDR_MASK (0x7f << 0) +#define HCSPLT_PRTADDR_SHIFT 0 + +#define HCINT(_ch) HSOTG_REG(0x0508 + 0x20 * (_ch)) +#define HCINTMSK(_ch) HSOTG_REG(0x050c + 0x20 * (_ch)) +#define HCINTMSK_RESERVED14_31 (0x3ffff << 14) +#define HCINTMSK_FRM_LIST_ROLL BIT(13) +#define HCINTMSK_XCS_XACT BIT(12) +#define HCINTMSK_BNA BIT(11) +#define HCINTMSK_DATATGLERR BIT(10) +#define HCINTMSK_FRMOVRUN BIT(9) +#define HCINTMSK_BBLERR BIT(8) +#define HCINTMSK_XACTERR BIT(7) +#define HCINTMSK_NYET BIT(6) +#define HCINTMSK_ACK BIT(5) +#define HCINTMSK_NAK BIT(4) +#define HCINTMSK_STALL BIT(3) +#define HCINTMSK_AHBERR BIT(2) +#define HCINTMSK_CHHLTD BIT(1) +#define HCINTMSK_XFERCOMPL BIT(0) + +#define HCTSIZ(_ch) HSOTG_REG(0x0510 + 0x20 * (_ch)) +#define TSIZ_DOPNG BIT(31) +#define TSIZ_SC_MC_PID_MASK (0x3 << 29) +#define TSIZ_SC_MC_PID_SHIFT 29 +#define TSIZ_SC_MC_PID_DATA0 0 +#define TSIZ_SC_MC_PID_DATA2 1 +#define TSIZ_SC_MC_PID_DATA1 2 +#define TSIZ_SC_MC_PID_MDATA 3 +#define TSIZ_SC_MC_PID_SETUP 3 +#define TSIZ_PKTCNT_MASK (0x3ff << 19) +#define TSIZ_PKTCNT_SHIFT 19 +#define TSIZ_NTD_MASK (0xff << 8) +#define TSIZ_NTD_SHIFT 8 +#define TSIZ_SCHINFO_MASK (0xff << 0) +#define TSIZ_SCHINFO_SHIFT 0 +#define TSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define TSIZ_XFERSIZE_SHIFT 0 + +#define HCDMA(_ch) HSOTG_REG(0x0514 + 0x20 * (_ch)) + +#define HCDMAB(_ch) HSOTG_REG(0x051c + 0x20 * (_ch)) + +#define HCFIFO(_ch) HSOTG_REG(0x1000 + 0x1000 * (_ch)) + +/** + * struct dwc2_dma_desc - DMA descriptor structure, + * used for both host and gadget modes + * + * @status: DMA descriptor status quadlet + * @buf: DMA descriptor data buffer pointer + * + * DMA Descriptor structure contains two quadlets: + * Status quadlet and Data buffer pointer. + */ +struct dwc2_dma_desc { + uint32_t status; + uint32_t buf; +} __packed; + +/* Host Mode DMA descriptor status quadlet */ + +#define HOST_DMA_A BIT(31) +#define HOST_DMA_STS_MASK (0x3 << 28) +#define HOST_DMA_STS_SHIFT 28 +#define HOST_DMA_STS_PKTERR BIT(28) +#define HOST_DMA_EOL BIT(26) +#define HOST_DMA_IOC BIT(25) +#define HOST_DMA_SUP BIT(24) +#define HOST_DMA_ALT_QTD BIT(23) +#define HOST_DMA_QTD_OFFSET_MASK (0x3f << 17) +#define HOST_DMA_QTD_OFFSET_SHIFT 17 +#define HOST_DMA_ISOC_NBYTES_MASK (0xfff << 0) +#define HOST_DMA_ISOC_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_MASK (0x1ffff << 0) +#define HOST_DMA_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_LIMIT 131071 + +/* Device Mode DMA descriptor status quadlet */ + +#define DEV_DMA_BUFF_STS_MASK (0x3 << 30) +#define DEV_DMA_BUFF_STS_SHIFT 30 +#define DEV_DMA_BUFF_STS_HREADY 0 +#define DEV_DMA_BUFF_STS_DMABUSY 1 +#define DEV_DMA_BUFF_STS_DMADONE 2 +#define DEV_DMA_BUFF_STS_HBUSY 3 +#define DEV_DMA_STS_MASK (0x3 << 28) +#define DEV_DMA_STS_SHIFT 28 +#define DEV_DMA_STS_SUCC 0 +#define DEV_DMA_STS_BUFF_FLUSH 1 +#define DEV_DMA_STS_BUFF_ERR 3 +#define DEV_DMA_L BIT(27) +#define DEV_DMA_SHORT BIT(26) +#define DEV_DMA_IOC BIT(25) +#define DEV_DMA_SR BIT(24) +#define DEV_DMA_MTRF BIT(23) +#define DEV_DMA_ISOC_PID_MASK (0x3 << 23) +#define DEV_DMA_ISOC_PID_SHIFT 23 +#define DEV_DMA_ISOC_PID_DATA0 0 +#define DEV_DMA_ISOC_PID_DATA2 1 +#define DEV_DMA_ISOC_PID_DATA1 2 +#define DEV_DMA_ISOC_PID_MDATA 3 +#define DEV_DMA_ISOC_FRNUM_MASK (0x7ff << 12) +#define DEV_DMA_ISOC_FRNUM_SHIFT 12 +#define DEV_DMA_ISOC_TX_NBYTES_MASK (0xfff << 0) +#define DEV_DMA_ISOC_TX_NBYTES_LIMIT 0xfff +#define DEV_DMA_ISOC_RX_NBYTES_MASK (0x7ff << 0) +#define DEV_DMA_ISOC_RX_NBYTES_LIMIT 0x7ff +#define DEV_DMA_ISOC_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_MASK (0xffff << 0) +#define DEV_DMA_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_LIMIT 0xffff + +#define MAX_DMA_DESC_NUM_GENERIC 64 +#define MAX_DMA_DESC_NUM_HS_ISOC 256 + +#endif /* __DWC2_HW_H__ */ diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index f800266..c76806d 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -13,7 +13,9 @@ #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "crypto/aes.h" +#include "vec_internal.h" union CRYPTO_STATE { uint8_t bytes[16]; @@ -22,25 +24,35 @@ union CRYPTO_STATE { }; #ifdef HOST_WORDS_BIGENDIAN -#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) -#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) +#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) +#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) #else -#define CR_ST_BYTE(state, i) (state.bytes[i]) -#define CR_ST_WORD(state, i) (state.words[i]) +#define CR_ST_BYTE(state, i) ((state).bytes[i]) +#define CR_ST_WORD(state, i) ((state).words[i]) #endif -void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) +/* + * The caller has not been converted to full gvec, and so only + * modifies the low 16 bytes of the vector register. + */ +static void clear_tail_16(void *vd, uint32_t desc) +{ + int opr_sz = simd_oprsz(desc); + int max_sz = simd_maxsz(desc); + + assert(opr_sz == 16); + clear_tail(vd, opr_sz, max_sz); +} + +static void do_crypto_aese(uint64_t *rd, uint64_t *rn, + uint64_t *rm, bool decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; - union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; int i; - assert(decrypt < 2); - /* xor state vector with round key */ rk.l[0] ^= st.l[0]; rk.l[1] ^= st.l[1]; @@ -54,7 +66,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aese(vd + i, vn + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -190,13 +213,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; - assert(decrypt < 2); - for (i = 0; i < 16; i += 4) { CR_ST_WORD(st, i >> 2) = mc[decrypt][CR_ST_BYTE(st, i)] ^ @@ -209,6 +228,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aesmc(vd + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + /* * SHA-1 logical functions */ @@ -228,52 +258,77 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) return (x & y) | ((x | y) & z); } -void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) +void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) +{ + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t d0, d1; + + d0 = d[1] ^ d[0] ^ m[0]; + d1 = n[0] ^ d[1] ^ m[1]; + d[0] = d0; + d[1] = d1; + + clear_tail_16(vd, desc); +} + +static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, + uint64_t *rm, uint32_t desc, + uint32_t (*fn)(union CRYPTO_STATE *d)) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + int i; - if (op == 3) { /* sha1su0 */ - d.l[0] ^= d.l[1] ^ m.l[0]; - d.l[1] ^= n.l[0] ^ m.l[1]; - } else { - int i; - - for (i = 0; i < 4; i++) { - uint32_t t; - - switch (op) { - case 0: /* sha1c */ - t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 1: /* sha1p */ - t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 2: /* sha1m */ - t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - default: - g_assert_not_reached(); - } - t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) - + CR_ST_WORD(m, i); - - CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); - CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); - CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); - CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); - CR_ST_WORD(d, 0) = t; - } + for (i = 0; i < 4; i++) { + uint32_t t = fn(&d); + + t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) + + CR_ST_WORD(m, i); + + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; } rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); +} + +static uint32_t do_sha1c(union CRYPTO_STATE *d) +{ + return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); +} + +static uint32_t do_sha1p(union CRYPTO_STATE *d) +{ + return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); } -void HELPER(crypto_sha1h)(void *vd, void *vm) +static uint32_t do_sha1m(union CRYPTO_STATE *d) +{ + return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); +} + +void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -284,9 +339,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm) rd[0] = m.l[0]; rd[1] = m.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha1su1)(void *vd, void *vm) +void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -300,6 +357,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -327,7 +386,7 @@ static uint32_t s1(uint32_t x) return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); } -void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -358,9 +417,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -383,9 +444,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su0)(void *vd, void *vm) +void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -399,9 +462,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -417,6 +482,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -453,7 +520,7 @@ static uint64_t s1_512(uint64_t x) return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); } -void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -466,9 +533,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -481,9 +550,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su0)(void *vd, void *vn) +void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -495,9 +566,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -505,9 +578,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) rd[0] += s1_512(rn[0]) + rm[0]; rd[1] += s1_512(rn[1]) + rm[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -531,9 +606,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -551,17 +628,18 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, - uint32_t opcode) +static inline void QEMU_ALWAYS_INLINE +crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, + uint32_t desc, uint32_t opcode) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t imm2 = simd_data(desc); uint32_t t; assert(imm2 < 4); @@ -576,7 +654,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, /* SM3TT2B */ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); } else { - g_assert_not_reached(); + qemu_build_not_reached(); } t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); @@ -601,8 +679,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); } +#define DO_SM3TT(NAME, OPCODE) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } + +DO_SM3TT(crypto_sm3tt1a, 0) +DO_SM3TT(crypto_sm3tt1b, 1) +DO_SM3TT(crypto_sm3tt2a, 2) +DO_SM3TT(crypto_sm3tt2b, 3) + +#undef DO_SM3TT + static uint8_t const sm4_sbox[] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, @@ -638,12 +729,10 @@ static uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; -void HELPER(crypto_sm4e)(void *vd, void *vn) +static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; - union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; uint32_t t, i; for (i = 0; i < 4; i++) { @@ -665,11 +754,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn) rd[1] = d.l[1]; } -void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4e(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; @@ -693,3 +789,24 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4ekey(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = n[i] ^ rol64(m[i], 1); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/target/arm/helper.h b/target/arm/helper.h index 49336dc..2a20c81 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -510,29 +510,40 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) -DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 8beb1db..bd1b0e1 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -165,14 +165,16 @@ VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0 VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same -SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp +@3same_crypto .... .... .... .... .... .... .... .... \ + &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1 + +SHA1C_3s 1111 001 0 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1P_3s 1111 001 0 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1M_3s 1111 001 0 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1SU0_3s 1111 001 0 0 . 11 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp @@ -199,3 +201,199 @@ VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp + +###################################################################### +# 2-reg-and-shift grouping: +# 1111 001 U 1 D immH:3 immL:3 Vd:4 opc:4 L Q M 1 Vm:4 +###################################################################### +&2reg_shift vm vd q shift size + +# Right shifts are encoded as N - shift, where N is the element size in bits. +%neon_rshift_i6 16:6 !function=rsub_64 +%neon_rshift_i5 16:5 !function=rsub_32 +%neon_rshift_i4 16:4 !function=rsub_16 +%neon_rshift_i3 16:3 !function=rsub_8 + +@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6 +@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5 +@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4 +@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3 + +@2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 +@2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 +@2reg_shl_h .... ... . . . 01 shift:4 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 +@2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 + +# Narrowing right shifts: here the Q bit is part of the opcode decode +@2reg_shrn_d .... ... . . . 1 ..... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \ + shift=%neon_rshift_i5 +@2reg_shrn_s .... ... . . . 01 .... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \ + shift=%neon_rshift_i4 +@2reg_shrn_h .... ... . . . 001 ... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \ + shift=%neon_rshift_i3 + +# Long left shifts: again Q is part of opcode decode +@2reg_shll_s .... ... . . . 1 shift:5 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 +@2reg_shll_h .... ... . . . 01 shift:4 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 +@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0 + +# We use size=0 for fp32 and size=1 for fp16 to match the 3-same encodings. +@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i5 + +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b + +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b + +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b + +VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b + +VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b + +VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b + +VSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d +VSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s +VSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h + +VRSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d +VRSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s +VRSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h + +VQSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d +VQSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s +VQSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h + +VQRSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d +VQRSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s +VQRSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h + +# VQSHRN with signed input +VQSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d +VQSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s +VQSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h + +# VQRSHRN with signed input +VQRSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d +VQRSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s +VQRSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h + +# VQSHRN with unsigned input +VQSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d +VQSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s +VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h + +# VQRSHRN with unsigned input +VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d +VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s +VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h + +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b + +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b + +# VCVT fixed<->float conversions +# TODO: FP16 fixed<->float conversions are opc==0b1100 and 0b1101 +VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt +VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt +VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt +VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt + +###################################################################### +# 1-reg-and-modified-immediate grouping: +# 1111 001 i 1 D 000 imm:3 Vd:4 cmode:4 0 Q op 1 Vm:4 +###################################################################### + +&1reg_imm vd q imm cmode op + +%asimd_imm_value 24:1 16:3 0:4 + +@1reg_imm .... ... . . . ... ... .... .... . q:1 . . .... \ + &1reg_imm imm=%asimd_imm_value vd=%vd_dp + +# The cmode/op bits here decode VORR/VBIC/VMOV/VMNV, but +# not in a way we can conveniently represent in decodetree without +# a lot of repetition: +# VORR: op=0, (cmode & 1) && cmode < 12 +# VBIC: op=1, (cmode & 1) && cmode < 12 +# VMOV: everything else +# So we have a single decode line and check the cmode/op in the +# trans function. +Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 874f3eb..a0e72ad 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -571,6 +571,15 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, is_q ? 16 : 8, vec_full_reg_size(s)); } +/* Expand a 2-operand operation using an out-of-line helper. */ +static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, + int rn, int data, gen_helper_gvec_2 *fn) +{ + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); +} + /* Expand a 3-operand operation using an out-of-line helper. */ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int data, gen_helper_gvec_3 *fn) @@ -13403,9 +13412,8 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - TCGv_i32 tcg_decrypt; - CryptoThreeOpIntFn *genfn; + gen_helper_gvec_2 *genfn2 = NULL; + gen_helper_gvec_3 *genfn3 = NULL; if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); @@ -13415,19 +13423,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ decrypt = 0; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ decrypt = 0; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ decrypt = 1; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x7: /* AESIMC */ decrypt = 1; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; default: unallocated_encoding(s); @@ -13437,16 +13445,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_decrypt = tcg_const_i32(decrypt); - - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_i32(tcg_decrypt); + if (genfn2) { + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + } else { + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + } } /* Crypto three-reg SHA @@ -13462,8 +13465,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoThreeOpFn *genfn; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + gen_helper_gvec_3 *genfn; bool feature; if (size != 0) { @@ -13473,10 +13475,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1C */ + genfn = gen_helper_crypto_sha1c; + feature = dc_isar_feature(aa64_sha1, s); + break; case 1: /* SHA1P */ + genfn = gen_helper_crypto_sha1p; + feature = dc_isar_feature(aa64_sha1, s); + break; case 2: /* SHA1M */ + genfn = gen_helper_crypto_sha1m; + feature = dc_isar_feature(aa64_sha1, s); + break; case 3: /* SHA1SU0 */ - genfn = NULL; + genfn = gen_helper_crypto_sha1su0; feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ @@ -13504,24 +13515,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - if (genfn) { - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - } else { - TCGv_i32 tcg_opcode = tcg_const_i32(opcode); - - gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr, - tcg_rm_ptr, tcg_opcode); - tcg_temp_free_i32(tcg_opcode); - } - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); } /* Crypto two-reg SHA @@ -13536,9 +13530,8 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoTwoOpFn *genfn; + gen_helper_gvec_2 *genfn; bool feature; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { unallocated_encoding(s); @@ -13571,14 +13564,33 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } + gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); +} - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} - genfn(tcg_rd_ptr, tcg_rn_ptr); +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); } /* Crypto three-reg SHA512 @@ -13595,25 +13607,26 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn; + gen_helper_gvec_3 *oolfn = NULL; + GVecGen3Fn *gvecfn = NULL; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h; + oolfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h2; + oolfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su1; + oolfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; + gvecfn = gen_gvec_rax1; break; default: g_assert_not_reached(); @@ -13622,15 +13635,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SM3PARTW1 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw1; + oolfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw2; + oolfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4ekey; + oolfn = gen_helper_crypto_sm4ekey; break; default: unallocated_encoding(s); @@ -13647,41 +13660,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (genfn) { - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); } else { - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res[0]); - tcg_temp_free_i64(tcg_res[1]); + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } } @@ -13696,18 +13678,14 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 10, 2); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; - CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -13723,13 +13701,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); + switch (opcode) { + case 0: /* SHA512SU0 */ + gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); + break; + case 1: /* SM4E */ + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); + break; + default: + g_assert_not_reached(); + } } /* Crypto four-register @@ -13885,13 +13866,15 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) { + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, + gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, + }; int opcode = extract32(insn, 10, 2); int imm2 = extract32(insn, 12, 2); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - TCGv_i32 tcg_imm2, tcg_opcode; if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); @@ -13902,20 +13885,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - tcg_imm2 = tcg_const_i32(imm2); - tcg_opcode = tcg_const_i32(opcode); - - gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2, - tcg_opcode); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); - tcg_temp_free_i32(tcg_imm2); - tcg_temp_free_i32(tcg_opcode); + gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); } /* C3.6 Data processing - SIMD, inc Crypto diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index f02fbb6..da0f59a 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -115,4 +115,7 @@ static inline int vec_full_reg_size(DisasContext *s) bool disas_sve(DisasContext *, uint32_t); +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 3fe65a0..664d361 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -31,6 +31,24 @@ static inline int plus1(DisasContext *s, int x) return x + 1; } +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + /* Include the generated Neon decoder */ #include "decode-neon-dp.inc.c" #include "decode-neon-ls.inc.c" @@ -661,12 +679,14 @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE) DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) DO_3SAME_CMP(VCEQ, TCG_COND_EQ) -static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) -{ - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, - 0, gen_helper_gvec_pmul_b); -} +#define WRAP_OOL_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ + } + +WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) { @@ -691,144 +711,34 @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) -static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - TCGv_i32 tmp; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha1, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - tmp = tcg_const_i32(a->optype); - gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp); - tcg_temp_free_i32(tmp); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} - -static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256h(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} - -static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} - -static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; +#define DO_SHA1(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha1, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ } - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } +DO_SHA1(SHA1C, gen_helper_crypto_sha1c) +DO_SHA1(SHA1P, gen_helper_crypto_sha1p) +DO_SHA1(SHA1M, gen_helper_crypto_sha1m) +DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) - if (!vfp_access_check(s)) { - return true; +#define DO_SHA2(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha2, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ } - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} +DO_SHA2(SHA256H, gen_helper_crypto_sha256h) +DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) +DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) #define DO_3SAME_64(INSN, FUNC) \ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ @@ -1310,3 +1220,609 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds) DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs) DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins) + +static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) +{ + /* Handle a 2-reg-shift insn which can be vectorized. */ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); + return true; +} + +#define DO_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_vector_2sh(s, a, FUNC); \ + } \ + +DO_2SH(VSHL, tcg_gen_gvec_shli) +DO_2SH(VSLI, gen_gvec_sli) +DO_2SH(VSRI, gen_gvec_sri) +DO_2SH(VSRA_S, gen_gvec_ssra) +DO_2SH(VSRA_U, gen_gvec_usra) +DO_2SH(VRSHR_S, gen_gvec_srshr) +DO_2SH(VRSHR_U, gen_gvec_urshr) +DO_2SH(VRSRA_S, gen_gvec_srsra) +DO_2SH(VRSRA_U, gen_gvec_ursra) + +static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Signed shift out of range results in all-sign-bits */ + a->shift = MIN(a->shift, (8 << a->size) - 1); + return do_vector_2sh(s, a, tcg_gen_gvec_sari); +} + +static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); +} + +static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Shift out of range is architecturally valid and results in zero. */ + if (a->shift >= (8 << a->size)) { + return do_vector_2sh(s, a, gen_zero_rd_2sh); + } else { + return do_vector_2sh(s, a, tcg_gen_gvec_shri); + } +} + +static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpEnvFn *fn) +{ + /* + * 2-reg-and-shift operations, size == 3 case, where the + * function needs to be passed cpu_env. + */ + TCGv_i64 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i64(dup_const(a->size, a->shift)); + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + neon_load_reg64(tmp, a->vm + pass); + fn(tmp, cpu_env, tmp, constimm); + neon_store_reg64(tmp, a->vd + pass); + } + tcg_temp_free_i64(constimm); + return true; +} + +static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpEnvFn *fn) +{ + /* + * 2-reg-and-shift operations, size < 3 case, where the + * helper needs to be passed cpu_env. + */ + TCGv_i32 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i32(dup_const(a->size, a->shift)); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(a->vm, pass); + fn(tmp, cpu_env, tmp, constimm); + neon_store_reg(a->vd, pass, tmp); + } + tcg_temp_free_i32(constimm); + return true; +} + +#define DO_2SHIFT_ENV(INSN, FUNC) \ + static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ + } \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + static NeonGenTwoOpEnvFn * const fns[] = { \ + gen_helper_neon_##FUNC##8, \ + gen_helper_neon_##FUNC##16, \ + gen_helper_neon_##FUNC##32, \ + }; \ + assert(a->size < ARRAY_SIZE(fns)); \ + return do_2shift_env_32(s, a, fns[a->size]); \ + } + +DO_2SHIFT_ENV(VQSHLU, qshlu_s) +DO_2SHIFT_ENV(VQSHL_U, qshl_u) +DO_2SHIFT_ENV(VQSHL_S, qshl_s) + +static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ + TCGv_i64 constimm, rm1, rm2; + TCGv_i32 rd; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count. + */ + constimm = tcg_const_i64(-a->shift); + rm1 = tcg_temp_new_i64(); + rm2 = tcg_temp_new_i64(); + + /* Load both inputs first to avoid potential overwrite if rm == rd */ + neon_load_reg64(rm1, a->vm); + neon_load_reg64(rm2, a->vm + 1); + + shiftfn(rm1, rm1, constimm); + rd = tcg_temp_new_i32(); + narrowfn(rd, cpu_env, rm1); + neon_store_reg(a->vd, 0, rd); + + shiftfn(rm2, rm2, constimm); + rd = tcg_temp_new_i32(); + narrowfn(rd, cpu_env, rm2); + neon_store_reg(a->vd, 1, rd); + + tcg_temp_free_i64(rm1); + tcg_temp_free_i64(rm2); + tcg_temp_free_i64(constimm); + + return true; +} + +static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ + TCGv_i32 constimm, rm1, rm2, rm3, rm4; + TCGv_i64 rtmp; + uint32_t imm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count + * duplicated into each lane of the immediate value. + */ + if (a->size == 1) { + imm = (uint16_t)(-a->shift); + imm |= imm << 16; + } else { + /* size == 2 */ + imm = -a->shift; + } + constimm = tcg_const_i32(imm); + + /* Load all inputs first to avoid potential overwrite */ + rm1 = neon_load_reg(a->vm, 0); + rm2 = neon_load_reg(a->vm, 1); + rm3 = neon_load_reg(a->vm + 1, 0); + rm4 = neon_load_reg(a->vm + 1, 1); + rtmp = tcg_temp_new_i64(); + + shiftfn(rm1, rm1, constimm); + shiftfn(rm2, rm2, constimm); + + tcg_gen_concat_i32_i64(rtmp, rm1, rm2); + tcg_temp_free_i32(rm2); + + narrowfn(rm1, cpu_env, rtmp); + neon_store_reg(a->vd, 0, rm1); + + shiftfn(rm3, rm3, constimm); + shiftfn(rm4, rm4, constimm); + tcg_temp_free_i32(constimm); + + tcg_gen_concat_i32_i64(rtmp, rm3, rm4); + tcg_temp_free_i32(rm4); + + narrowfn(rm3, cpu_env, rtmp); + tcg_temp_free_i64(rtmp); + neon_store_reg(a->vd, 1, rm3); + return true; +} + +#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ + } +#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ + } + +static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + tcg_gen_extrl_i64_i32(dest, src); +} + +static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u16(dest, src); +} + +static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u8(dest, src); +} + +DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) +DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) +DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) +DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) +DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) + +DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) +DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) + +DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) + +static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenWidenFn *widenfn, bool u) +{ + TCGv_i64 tmp; + TCGv_i32 rm0, rm1; + uint64_t widen_mask = 0; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is a widen-and-shift operation. The shift is always less + * than the width of the source type, so after widening the input + * vector we can simply shift the whole 64-bit widened register, + * and then clear the potential overflow bits resulting from left + * bits of the narrow input appearing as right bits of the left + * neighbour narrow input. Calculate a mask of bits to clear. + */ + if ((a->shift != 0) && (a->size < 2 || u)) { + int esize = 8 << a->size; + widen_mask = MAKE_64BIT_MASK(0, esize); + widen_mask >>= esize - a->shift; + widen_mask = dup_const(a->size + 1, widen_mask); + } + + rm0 = neon_load_reg(a->vm, 0); + rm1 = neon_load_reg(a->vm, 1); + tmp = tcg_temp_new_i64(); + + widenfn(tmp, rm0); + if (a->shift != 0) { + tcg_gen_shli_i64(tmp, tmp, a->shift); + tcg_gen_andi_i64(tmp, tmp, ~widen_mask); + } + neon_store_reg64(tmp, a->vd); + + widenfn(tmp, rm1); + if (a->shift != 0) { + tcg_gen_shli_i64(tmp, tmp, a->shift); + tcg_gen_andi_i64(tmp, tmp, ~widen_mask); + } + neon_store_reg64(tmp, a->vd + 1); + tcg_temp_free_i64(tmp); + return true; +} + +static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + NeonGenWidenFn *widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], false); +} + +static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + NeonGenWidenFn *widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], true); +} + +static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoSingleOPFn *fn) +{ + /* FP operations in 2-reg-and-shift group */ + TCGv_i32 tmp, shiftv; + TCGv_ptr fpstatus; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpstatus = get_fpstatus_ptr(1); + shiftv = tcg_const_i32(a->shift); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(a->vm, pass); + fn(tmp, tmp, shiftv, fpstatus); + neon_store_reg(a->vd, pass, tmp); + } + tcg_temp_free_ptr(fpstatus); + tcg_temp_free_i32(shiftv); + return true; +} + +#define DO_FP_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_fp_2sh(s, a, FUNC); \ + } + +DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) +DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) +DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) +DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) + +static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) +{ + /* + * Expand the encoded constant. + * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. + */ + switch (cmode) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + if (op) { + /* + * This is the only case where the top and bottom 32 bits + * of the encoded constant differ. + */ + uint64_t imm64 = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << n)) { + imm64 |= (0xffULL << (n * 8)); + } + } + return imm64; + } + imm |= (imm << 8) | (imm << 16) | (imm << 24); + break; + case 15: + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (op) { + imm = ~imm; + } + return dup_const(MO_32, imm); +} + +static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, + GVecGen2iFn *fn) +{ + uint64_t imm; + int reg_ofs, vec_size; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + reg_ofs = neon_reg_offset(a->vd, 0); + vec_size = a->q ? 16 : 8; + imm = asimd_imm_const(a->imm, a->cmode, a->op); + + fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); + return true; +} + +static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); +} + +static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) +{ + /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ + GVecGen2iFn *fn; + + if ((a->cmode & 1) && a->cmode < 12) { + /* for op=1, the imm will be inverted, so BIC becomes AND. */ + fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1) { + return false; + } + fn = gen_VMOV_1r; + } + return do_1reg_imm(s, a, fn); +} diff --git a/target/arm/translate.c b/target/arm/translate.c index c829611..bcdfec3 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -3011,29 +3011,6 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) } } -#define GEN_NEON_INTEGER_OP_ENV(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) - static TCGv_i32 neon_load_scratch(int scratch) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -3224,40 +3201,6 @@ static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src) } } -static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift, - int q, int u) -{ - if (q) { - if (u) { - switch (size) { - case 1: gen_helper_neon_rshl_u16(var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_rshl_s16(var, var, shift); break; - case 2: gen_helper_neon_rshl_s32(var, var, shift); break; - default: abort(); - } - } - } else { - if (u) { - switch (size) { - case 1: gen_helper_neon_shl_u16(var, var, shift); break; - case 2: gen_ushl_i32(var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_shl_s16(var, var, shift); break; - case 2: gen_sshl_i32(var, var, shift); break; - default: abort(); - } - } - } -} - static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u) { if (u) { @@ -5250,14 +5193,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int q; int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; - int shift; int pass; - int count; int u; int vec_size; uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; - TCGv_ptr ptr1, ptr2; + TCGv_ptr ptr1; TCGv_i64 tmp64; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -5291,433 +5232,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) /* Three register same length: handled by decodetree */ return 1; } else if (insn & (1 << 4)) { - if ((insn & 0x00380080) != 0) { - /* Two registers and shift. */ - op = (insn >> 8) & 0xf; - if (insn & (1 << 7)) { - /* 64-bit shift. */ - if (op > 7) { - return 1; - } - size = 3; - } else { - size = 2; - while ((insn & (1 << (size + 19))) == 0) - size--; - } - shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op < 8) { - /* Shift by immediate: - VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ - if (q && ((rd | rm) & 1)) { - return 1; - } - if (!u && (op == 4 || op == 6)) { - return 1; - } - /* Right shifts are encoded as N - shift, where N is the - element size in bits. */ - if (op <= 4) { - shift = shift - (1 << (size + 3)); - } - - switch (op) { - case 0: /* VSHR */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, - MIN(shift, (8 << size) - 1), - vec_size, vec_size); - } else if (shift >= 8 << size) { - tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size, - vec_size, 0); - } else { - tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 1: /* VSRA */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_usra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_ssra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 2: /* VRSHR */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_urshr(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_srshr(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 3: /* VRSRA */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_ursra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_srsra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 4: /* VSRI */ - if (!u) { - return 1; - } - /* Right shift comes here negative. */ - shift = -shift; - gen_gvec_sri(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - return 0; - - case 5: /* VSHL, VSLI */ - if (u) { /* VSLI */ - gen_gvec_sli(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { /* VSHL */ - tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - } - - if (size == 3) { - count = q + 1; - } else { - count = q ? 4: 2; - } - - /* To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - imm = dup_const(size, shift); - - for (pass = 0; pass < count; pass++) { - if (size == 3) { - neon_load_reg64(cpu_V0, rm + pass); - tcg_gen_movi_i64(cpu_V1, imm); - switch (op) { - case 6: /* VQSHLU */ - gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - break; - case 7: /* VQSHL */ - if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qshl_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; - default: - g_assert_not_reached(); - } - neon_store_reg64(cpu_V0, rd + pass); - } else { /* size < 3 */ - /* Operands in T0 and T1. */ - tmp = neon_load_reg(rm, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp2, imm); - switch (op) { - case 6: /* VQSHLU */ - switch (size) { - case 0: - gen_helper_neon_qshlu_s8(tmp, cpu_env, - tmp, tmp2); - break; - case 1: - gen_helper_neon_qshlu_s16(tmp, cpu_env, - tmp, tmp2); - break; - case 2: - gen_helper_neon_qshlu_s32(tmp, cpu_env, - tmp, tmp2); - break; - default: - abort(); - } - break; - case 7: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - default: - g_assert_not_reached(); - } - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } - } /* for pass */ - } else if (op < 10) { - /* Shift by immediate and narrow: - VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ - int input_unsigned = (op == 8) ? !u : u; - if (rm & 1) { - return 1; - } - shift = shift - (1 << (size + 3)); - size++; - if (size == 3) { - tmp64 = tcg_const_i64(shift); - neon_load_reg64(cpu_V0, rm); - neon_load_reg64(cpu_V1, rm + 1); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 in; - if (pass == 0) { - in = cpu_V0; - } else { - in = cpu_V1; - } - if (q) { - if (input_unsigned) { - gen_helper_neon_rshl_u64(cpu_V0, in, tmp64); - } else { - gen_helper_neon_rshl_s64(cpu_V0, in, tmp64); - } - } else { - if (input_unsigned) { - gen_ushl_i64(cpu_V0, in, tmp64); - } else { - gen_sshl_i64(cpu_V0, in, tmp64); - } - } - tmp = tcg_temp_new_i32(); - gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); - neon_store_reg(rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i64(tmp64); - } else { - if (size == 1) { - imm = (uint16_t)shift; - imm |= imm << 16; - } else { - /* size == 2 */ - imm = (uint32_t)shift; - } - tmp2 = tcg_const_i32(imm); - tmp4 = neon_load_reg(rm + 1, 0); - tmp5 = neon_load_reg(rm + 1, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(rm, 0); - } else { - tmp = tmp4; - } - gen_neon_shift_narrow(size, tmp, tmp2, q, - input_unsigned); - if (pass == 0) { - tmp3 = neon_load_reg(rm, 1); - } else { - tmp3 = tmp5; - } - gen_neon_shift_narrow(size, tmp3, tmp2, q, - input_unsigned); - tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp3); - tmp = tcg_temp_new_i32(); - gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); - neon_store_reg(rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i32(tmp2); - } - } else if (op == 10) { - /* VSHLL, VMOVL */ - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(rm, 0); - tmp2 = neon_load_reg(rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - - gen_neon_widen(cpu_V0, tmp, size, u); - - if (shift != 0) { - /* The shift is less than the width of the source - type, so we can just shift the whole register. */ - tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); - /* Widen the result of shift: we need to clear - * the potential overflow bits resulting from - * left bits of the narrow input appearing as - * right bits of left the neighbour narrow - * input. */ - if (size < 2 || !u) { - uint64_t imm64; - if (size == 0) { - imm = (0xffu >> (8 - shift)); - imm |= imm << 16; - } else if (size == 1) { - imm = 0xffff >> (16 - shift); - } else { - /* size == 2 */ - imm = 0xffffffff >> (32 - shift); - } - if (size < 2) { - imm64 = imm | (((uint64_t)imm) << 32); - } else { - imm64 = imm; - } - tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64); - } - } - neon_store_reg64(cpu_V0, rd + pass); - } - } else if (op >= 14) { - /* VCVT fixed-point. */ - TCGv_ptr fpst; - TCGv_i32 shiftv; - VFPGenFixPointFn *fn; - - if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { - return 1; - } - - if (!(op & 1)) { - if (u) { - fn = gen_helper_vfp_ultos; - } else { - fn = gen_helper_vfp_sltos; - } - } else { - if (u) { - fn = gen_helper_vfp_touls_round_to_zero; - } else { - fn = gen_helper_vfp_tosls_round_to_zero; - } - } - - /* We have already masked out the must-be-1 top bit of imm6, - * hence this 32-shift where the ARM ARM has 64-imm6. - */ - shift = 32 - shift; - fpst = get_fpstatus_ptr(1); - shiftv = tcg_const_i32(shift); - for (pass = 0; pass < (q ? 4 : 2); pass++) { - TCGv_i32 tmpf = neon_load_reg(rm, pass); - fn(tmpf, tmpf, shiftv, fpst); - neon_store_reg(rd, pass, tmpf); - } - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(shiftv); - } else { - return 1; - } - } else { /* (insn & 0x00380080) == 0 */ - int invert, reg_ofs, vec_size; - - if (q && (rd & 1)) { - return 1; - } - - op = (insn >> 8) & 0xf; - /* One register and immediate. */ - imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); - invert = (insn & (1 << 5)) != 0; - /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. - * We choose to not special-case this and will behave as if a - * valid constant encoding of 0 had been given. - */ - switch (op) { - case 0: case 1: - /* no-op */ - break; - case 2: case 3: - imm <<= 8; - break; - case 4: case 5: - imm <<= 16; - break; - case 6: case 7: - imm <<= 24; - break; - case 8: case 9: - imm |= imm << 16; - break; - case 10: case 11: - imm = (imm << 8) | (imm << 24); - break; - case 12: - imm = (imm << 8) | 0xff; - break; - case 13: - imm = (imm << 16) | 0xffff; - break; - case 14: - imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) { - imm = ~imm; - } - break; - case 15: - if (invert) { - return 1; - } - imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) - | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); - break; - } - if (invert) { - imm = ~imm; - } - - reg_ofs = neon_reg_offset(rd, 0); - vec_size = q ? 16 : 8; - - if (op & 1 && op < 12) { - if (invert) { - /* The immediate value has already been inverted, - * so BIC becomes AND. - */ - tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } else { - tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } - } else { - /* VMOV, VMVN. */ - if (op == 14 && invert) { - TCGv_i64 t64 = tcg_temp_new_i64(); - - for (pass = 0; pass <= q; ++pass) { - uint64_t val = 0; - int n; - - for (n = 0; n < 8; n++) { - if (imm & (1 << (n + pass * 8))) { - val |= 0xffull << (n * 8); - } - } - tcg_gen_movi_i64(t64, val); - neon_store_reg64(t64, rd + pass); - } - tcg_temp_free_i64(t64); - } else { - tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size, - vec_size, imm); - } - } - } + /* Two registers and shift or reg and imm: handled by decodetree */ + return 1; } else { /* (insn & 0x00800010 == 0x00800000) */ if (size != 3) { op = (insn >> 8) & 0xf; @@ -6350,34 +5866,30 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - - /* Bit 6 is the lowest opcode bit; it distinguishes between - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) - */ - tmp3 = tcg_const_i32(extract32(insn, 6, 1)); - + /* + * Bit 6 is the lowest opcode bit; it distinguishes + * between encryption (AESE/AESMC) and decryption + * (AESD/AESIMC). + */ if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(ptr1, ptr2, tmp3); + tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd), + vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aese); } else { - gen_helper_crypto_aesmc(ptr1, ptr2, tmp3); + tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aesmc); } - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - - gen_helper_crypto_sha1h(ptr1, ptr2); - - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); + tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0, + gen_helper_crypto_sha1h); break; case NEON_2RM_SHA1SU1: if ((rm | rd) & 1) { @@ -6391,17 +5903,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - if (q) { - gen_helper_crypto_sha256su0(ptr1, ptr2); - } else { - gen_helper_crypto_sha1su1(ptr1, ptr2); - } - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); + tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0, + q ? gen_helper_crypto_sha256su0 + : gen_helper_crypto_sha1su1); break; - case NEON_2RM_VMVN: tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); break; diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 50a4992..7d76412 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -22,7 +22,7 @@ #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" - +#include "vec_internal.h" /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ @@ -36,16 +36,6 @@ #define H4(x) (x) #endif -static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) -{ - uint64_t *d = vd + opr_sz; - uintptr_t i; - - for (i = opr_sz; i < max_sz; i += 8) { - *d++ = 0; - } -} - /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, int16_t src3, uint32_t *sat) diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h new file mode 100644 index 0000000..00a8277 --- /dev/null +++ b/target/arm/vec_internal.h @@ -0,0 +1,33 @@ +/* + * ARM AdvSIMD / SVE Vector Helpers + * + * Copyright (c) 2020 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TARGET_ARM_VEC_INTERNALS_H +#define TARGET_ARM_VEC_INTERNALS_H + +static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) +{ + uint64_t *d = vd + opr_sz; + uintptr_t i; + + for (i = opr_sz; i < max_sz; i += 8) { + *d++ = 0; + } +} + +#endif /* TARGET_ARM_VEC_INTERNALS_H */ diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index 12725d4..3f3aa0c 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -308,6 +308,32 @@ class BootLinuxConsole(LinuxKernelTest): console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) + def test_aarch64_xlnx_versal_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:xlnx-versal-virt + :avocado: tags=device:pl011 + :avocado: tags=device:arm_gicv3 + """ + kernel_url = ('http://ports.ubuntu.com/ubuntu-ports/dists/' + 'bionic-updates/main/installer-arm64/current/images/' + 'netboot/ubuntu-installer/arm64/linux') + kernel_hash = '5bfc54cf7ed8157d93f6e5b0241e727b6dc22c50' + kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) + + initrd_url = ('http://ports.ubuntu.com/ubuntu-ports/dists/' + 'bionic-updates/main/installer-arm64/current/images/' + 'netboot/ubuntu-installer/arm64/initrd.gz') + initrd_hash = 'd385d3e88d53e2004c5d43cbe668b458a094f772' + initrd_path = self.fetch_asset(initrd_url, asset_hash=initrd_hash) + + self.vm.set_console() + self.vm.add_args('-m', '2G', + '-kernel', kernel_path, + '-initrd', initrd_path) + self.vm.launch() + self.wait_for_console_pattern('Checked W+X mappings: passed') + def test_arm_virt(self): """ :avocado: tags=arch:arm @@ -379,13 +405,18 @@ class BootLinuxConsole(LinuxKernelTest): self.vm.set_console() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + - serial_kernel_cmdline[uart_id]) + serial_kernel_cmdline[uart_id] + + ' root=/dev/mmcblk0p2 rootwait ' + + 'dwc_otg.fiq_fsm_enable=0') self.vm.add_args('-kernel', kernel_path, '-dtb', dtb_path, - '-append', kernel_command_line) + '-append', kernel_command_line, + '-device', 'usb-kbd') self.vm.launch() console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) + console_pattern = 'Product: QEMU USB Keyboard' + self.wait_for_console_pattern(console_pattern) def test_arm_raspi2_uart0(self): """ |