diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-06-19 16:32:25 +0200 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-06-19 16:32:25 +0200 |
commit | 48ab886d3da4f3ab94f79f6c0f8b4535b446bbfd (patch) | |
tree | b3c5e674788806a0c0c7b303ad236db359e9f4bd | |
parent | e3660cc1e3cb136af50c0eaaeac27943c2438d1d (diff) | |
parent | 074259c0f2ac40042dce766d870318cc22f388eb (diff) | |
download | qemu-48ab886d3da4f3ab94f79f6c0f8b4535b446bbfd.zip qemu-48ab886d3da4f3ab94f79f6c0f8b4535b446bbfd.tar.gz qemu-48ab886d3da4f3ab94f79f6c0f8b4535b446bbfd.tar.bz2 |
Merge tag 'pull-target-arm-20230619' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* Fix return value from LDSMIN/LDSMAX 8/16 bit atomics
* Return correct result for LDG when ATA=0
* Conversion of system insns, loads and stores to decodetree
* hw/intc/allwinner-a10-pic: Handle IRQ levels other than 0 or 1
* hw/sd/allwinner-sdhost: Don't send non-boolean IRQ line levels
* hw/timer/nrf51_timer: Don't lose time when timer is queried in tight loop
* hw/arm/Kconfig: sbsa-ref uses Bochs display
* imx_serial: set wake bit when we receive a data byte
* docs: sbsa: document board to firmware interface
* hw/misc/bcm2835_property: avoid hard-coded constants
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmSQZd0ZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3lvoEACHH2dWWb1WAMB4GSZbM0PA
# kStY9PO7Ex87BRN6cX2T6qv40eWvZsLsgJn/igDmuv9kXIuejgw5Ri36I+Jce0ZN
# +d2DyrsEH/GlIDcl86HnbG1WGB27uAu0imE8kiokNymsFbyvfLZrByi03rwPRxkp
# fBVK2aFXTq1cZhjo3/43ySbF4/09ajci8uHPtnLla+WpZzoxP38GZ8qsY6WdxgEv
# +ap1h2641DDCpkqqan+tEbFUczJ8QrSvUoofreOJhEAnAuqlRX8V4eiiK9McUX+P
# LLUYUAMeTf9Ts2YRuJd9eUvTmxJo2WBiXFpxSvOfu5YOR5pBiDkDrGLkbY5bUvNu
# Qte/O0gEG0GBwZptCnUWJtF1DoMDAnPjB3JjuBkAo0N5ch7G/McoGfNYEaNEbb6N
# uKetTzlR4s0Zxv/SGxow+/kEkiDNCwna2mni563bz+L7+sRJWFEORErcNHCWckkk
# 1W+C1S+pKv9EZvO4lcvJgZus6i5VlWjEOm0IrRcYO+dbA1F7T3j4miIu8JYYIPFu
# IPyZytawpwq8irxTD0Z1hpsjrbkfOMb3hEbmtK4ruSCBRMBA3Zj2cd1ZrL9A00JE
# xC7rLXWxUAOxEXlJ0mDLMU3XGcp5j6wbMtin9odYR0ccXOHaV8dplzLNgAusXtWO
# GqKcq+m7oeSklKl/YIJsuQ==
# =5BGp
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 19 Jun 2023 04:27:41 PM CEST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
* tag 'pull-target-arm-20230619' of https://git.linaro.org/people/pmaydell/qemu-arm: (33 commits)
hw/misc/bcm2835_property: Handle CORE_CLK_ID firmware property
hw/misc/bcm2835_property: Replace magic frequency values by definitions
hw/misc/bcm2835_property: Use 'raspberrypi-fw-defs.h' definitions
hw/arm/raspi: Import Linux raspi definitions as 'raspberrypi-fw-defs.h'
docs: sbsa: document board to firmware interface
imx_serial: set wake bit when we receive a data byte
hw/arm/Kconfig: sbsa-ref uses Bochs display
hw/timer/nrf51_timer: Don't lose time when timer is queried in tight loop
hw/sd/allwinner-sdhost: Don't send non-boolean IRQ line levels
hw/intc/allwinner-a10-pic: Handle IRQ levels other than 0 or 1
target/arm: Convert load/store tags insns to decodetree
target/arm: Convert load/store single structure to decodetree
target/arm: Convert load/store (multiple structures) to decodetree
target/arm: Convert LDAPR/STLR (imm) to decodetree
target/arm: Convert load (pointer auth) insns to decodetree
target/arm: Convert atomic memory ops to decodetree
target/arm: Convert LDR/STR reg+reg to decodetree
target/arm: Convert LDR/STR with 12-bit immediate to decodetree
target/arm: Convert ld/st reg+imm9 insns to decodetree
target/arm: Convert load/store-pair to decodetree
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | docs/system/arm/sbsa.rst | 38 | ||||
-rw-r--r-- | hw/arm/Kconfig | 1 | ||||
-rw-r--r-- | hw/char/imx_serial.c | 5 | ||||
-rw-r--r-- | hw/intc/allwinner-a10-pic.c | 2 | ||||
-rw-r--r-- | hw/misc/bcm2835_property.c | 112 | ||||
-rw-r--r-- | hw/sd/allwinner-sdhost.c | 2 | ||||
-rw-r--r-- | hw/timer/nrf51_timer.c | 7 | ||||
-rw-r--r-- | include/hw/arm/raspi_platform.h | 10 | ||||
-rw-r--r-- | include/hw/char/imx_serial.h | 1 | ||||
-rw-r--r-- | include/hw/misc/raspberrypi-fw-defs.h | 163 | ||||
-rw-r--r-- | target/arm/tcg/a64.decode | 403 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c | 3089 |
12 files changed, 2042 insertions, 1791 deletions
diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst index 016776a..f571fe6 100644 --- a/docs/system/arm/sbsa.rst +++ b/docs/system/arm/sbsa.rst @@ -6,12 +6,7 @@ any real hardware the ``sbsa-ref`` board intends to look like real hardware. The `Server Base System Architecture <https://developer.arm.com/documentation/den0029/latest>`_ defines a minimum base line of hardware support and importantly how the firmware -reports that to any operating system. It is a static system that -reports a very minimal DT to the firmware for non-discoverable -information about components affected by the qemu command line (i.e. -cpus and memory). As a result it must have a firmware specifically -built to expect a certain hardware layout (as you would in a real -machine). +reports that to any operating system. It is intended to be a machine for developing firmware and testing standards compliance with operating systems. @@ -19,7 +14,7 @@ standards compliance with operating systems. Supported devices """"""""""""""""" -The sbsa-ref board supports: +The ``sbsa-ref`` board supports: - A configurable number of AArch64 CPUs - GIC version 3 @@ -30,3 +25,32 @@ The sbsa-ref board supports: - Bochs display adapter on PCIe bus - A generic SBSA watchdog device + +Board to firmware interface +""""""""""""""""""""""""""" + +``sbsa-ref`` is a static system that reports a very minimal devicetree to the +firmware for non-discoverable information about system components. This +includes both internal hardware and parts affected by the qemu command line +(i.e. CPUs and memory). As a result it must have a firmware specifically built +to expect a certain hardware layout (as you would in a real machine). + +DeviceTree information +'''''''''''''''''''''' + +The devicetree provided by the board model to the firmware is not intended +to be a complete compliant DT. It currently reports: + + - CPUs + - memory + - platform version + - GIC addresses + +The platform version is only for informing platform firmware about +what kind of ``sbsa-ref`` board it is running on. It is neither +a QEMU versioned machine type nor a reflection of the level of the +SBSA/SystemReady SR support provided. + +The ``machine-version-major`` value is updated when changes breaking +fw compatibility are introduced. The ``machine-version-minor`` value +is updated when features are added that don't break fw compatibility. diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 2159de3..7de17d1 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -268,6 +268,7 @@ config SBSA_REF select PL061 # GPIO select USB_EHCI_SYSBUS select WDT_SBSA + select BOCHS_DISPLAY config SABRELITE bool diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c index ee1375e..1b75a89 100644 --- a/hw/char/imx_serial.c +++ b/hw/char/imx_serial.c @@ -80,7 +80,7 @@ static void imx_update(IMXSerialState *s) * TCEN and TXDC are both bit 3 * RDR and DREN are both bit 0 */ - mask |= s->ucr4 & (UCR4_TCEN | UCR4_DREN); + mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN); usr2 = s->usr2 & mask; @@ -321,6 +321,9 @@ static void imx_put_data(void *opaque, uint32_t value) static void imx_receive(void *opaque, const uint8_t *buf, int size) { + IMXSerialState *s = (IMXSerialState *)opaque; + + s->usr2 |= USR2_WAKE; imx_put_data(opaque, *buf); } diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c index 4875e68..d0bf8d5 100644 --- a/hw/intc/allwinner-a10-pic.c +++ b/hw/intc/allwinner-a10-pic.c @@ -51,7 +51,7 @@ static void aw_a10_pic_set_irq(void *opaque, int irq, int level) AwA10PICState *s = opaque; uint32_t *pending_reg = &s->irq_pending[irq / 32]; - *pending_reg = deposit32(*pending_reg, irq % 32, 1, level); + *pending_reg = deposit32(*pending_reg, irq % 32, 1, !!level); aw_a10_pic_update(s); } diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 251b3d8..4ed9faa 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -12,10 +12,12 @@ #include "migration/vmstate.h" #include "hw/irq.h" #include "hw/misc/bcm2835_mbox_defs.h" +#include "hw/misc/raspberrypi-fw-defs.h" #include "sysemu/dma.h" #include "qemu/log.h" #include "qemu/module.h" #include "trace.h" +#include "hw/arm/raspi_platform.h" /* https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface */ @@ -51,48 +53,48 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) /* @(value + 8) : Request/response indicator */ resplen = 0; switch (tag) { - case 0x00000000: /* End tag */ + case RPI_FWREQ_PROPERTY_END: break; - case 0x00000001: /* Get firmware revision */ + case RPI_FWREQ_GET_FIRMWARE_REVISION: stl_le_phys(&s->dma_as, value + 12, 346337); resplen = 4; break; - case 0x00010001: /* Get board model */ + case RPI_FWREQ_GET_BOARD_MODEL: qemu_log_mask(LOG_UNIMP, "bcm2835_property: 0x%08x get board model NYI\n", tag); resplen = 4; break; - case 0x00010002: /* Get board revision */ + case RPI_FWREQ_GET_BOARD_REVISION: stl_le_phys(&s->dma_as, value + 12, s->board_rev); resplen = 4; break; - case 0x00010003: /* Get board MAC address */ + case RPI_FWREQ_GET_BOARD_MAC_ADDRESS: resplen = sizeof(s->macaddr.a); dma_memory_write(&s->dma_as, value + 12, s->macaddr.a, resplen, MEMTXATTRS_UNSPECIFIED); break; - case 0x00010004: /* Get board serial */ + case RPI_FWREQ_GET_BOARD_SERIAL: qemu_log_mask(LOG_UNIMP, "bcm2835_property: 0x%08x get board serial NYI\n", tag); resplen = 8; break; - case 0x00010005: /* Get ARM memory */ + case RPI_FWREQ_GET_ARM_MEMORY: /* base */ stl_le_phys(&s->dma_as, value + 12, 0); /* size */ stl_le_phys(&s->dma_as, value + 16, s->fbdev->vcram_base); resplen = 8; break; - case 0x00010006: /* Get VC memory */ + case RPI_FWREQ_GET_VC_MEMORY: /* base */ stl_le_phys(&s->dma_as, value + 12, s->fbdev->vcram_base); /* size */ stl_le_phys(&s->dma_as, value + 16, s->fbdev->vcram_size); resplen = 8; break; - case 0x00028001: /* Set power state */ + case RPI_FWREQ_SET_POWER_STATE: /* Assume that whatever device they asked for exists, * and we'll just claim we set it to the desired state */ @@ -103,38 +105,42 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) /* Clocks */ - case 0x00030001: /* Get clock state */ + case RPI_FWREQ_GET_CLOCK_STATE: stl_le_phys(&s->dma_as, value + 16, 0x1); resplen = 8; break; - case 0x00038001: /* Set clock state */ + case RPI_FWREQ_SET_CLOCK_STATE: qemu_log_mask(LOG_UNIMP, "bcm2835_property: 0x%08x set clock state NYI\n", tag); resplen = 8; break; - case 0x00030002: /* Get clock rate */ - case 0x00030004: /* Get max clock rate */ - case 0x00030007: /* Get min clock rate */ + case RPI_FWREQ_GET_CLOCK_RATE: + case RPI_FWREQ_GET_MAX_CLOCK_RATE: + case RPI_FWREQ_GET_MIN_CLOCK_RATE: switch (ldl_le_phys(&s->dma_as, value + 12)) { - case 1: /* EMMC */ - stl_le_phys(&s->dma_as, value + 16, 50000000); + case RPI_FIRMWARE_EMMC_CLK_ID: + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_EMMC_CLK_RATE); break; - case 2: /* UART */ - stl_le_phys(&s->dma_as, value + 16, 3000000); + case RPI_FIRMWARE_UART_CLK_ID: + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_UART_CLK_RATE); + break; + case RPI_FIRMWARE_CORE_CLK_ID: + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_CORE_CLK_RATE); break; default: - stl_le_phys(&s->dma_as, value + 16, 700000000); + stl_le_phys(&s->dma_as, value + 16, + RPI_FIRMWARE_DEFAULT_CLK_RATE); break; } resplen = 8; break; - case 0x00038002: /* Set clock rate */ - case 0x00038004: /* Set max clock rate */ - case 0x00038007: /* Set min clock rate */ + case RPI_FWREQ_SET_CLOCK_RATE: + case RPI_FWREQ_SET_MAX_CLOCK_RATE: + case RPI_FWREQ_SET_MIN_CLOCK_RATE: qemu_log_mask(LOG_UNIMP, "bcm2835_property: 0x%08x set clock rate NYI\n", tag); @@ -143,121 +149,121 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) /* Temperature */ - case 0x00030006: /* Get temperature */ + case RPI_FWREQ_GET_TEMPERATURE: stl_le_phys(&s->dma_as, value + 16, 25000); resplen = 8; break; - case 0x0003000A: /* Get max temperature */ + case RPI_FWREQ_GET_MAX_TEMPERATURE: stl_le_phys(&s->dma_as, value + 16, 99000); resplen = 8; break; /* Frame buffer */ - case 0x00040001: /* Allocate buffer */ + case RPI_FWREQ_FRAMEBUFFER_ALLOCATE: stl_le_phys(&s->dma_as, value + 12, fbconfig.base); stl_le_phys(&s->dma_as, value + 16, bcm2835_fb_get_size(&fbconfig)); resplen = 8; break; - case 0x00048001: /* Release buffer */ + case RPI_FWREQ_FRAMEBUFFER_RELEASE: resplen = 0; break; - case 0x00040002: /* Blank screen */ + case RPI_FWREQ_FRAMEBUFFER_BLANK: resplen = 4; break; - case 0x00044003: /* Test physical display width/height */ - case 0x00044004: /* Test virtual display width/height */ + case RPI_FWREQ_FRAMEBUFFER_TEST_PHYSICAL_WIDTH_HEIGHT: + case RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_WIDTH_HEIGHT: resplen = 8; break; - case 0x00048003: /* Set physical display width/height */ + case RPI_FWREQ_FRAMEBUFFER_SET_PHYSICAL_WIDTH_HEIGHT: fbconfig.xres = ldl_le_phys(&s->dma_as, value + 12); fbconfig.yres = ldl_le_phys(&s->dma_as, value + 16); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040003: /* Get physical display width/height */ + case RPI_FWREQ_FRAMEBUFFER_GET_PHYSICAL_WIDTH_HEIGHT: stl_le_phys(&s->dma_as, value + 12, fbconfig.xres); stl_le_phys(&s->dma_as, value + 16, fbconfig.yres); resplen = 8; break; - case 0x00048004: /* Set virtual display width/height */ + case RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_WIDTH_HEIGHT: fbconfig.xres_virtual = ldl_le_phys(&s->dma_as, value + 12); fbconfig.yres_virtual = ldl_le_phys(&s->dma_as, value + 16); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040004: /* Get virtual display width/height */ + case RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_WIDTH_HEIGHT: stl_le_phys(&s->dma_as, value + 12, fbconfig.xres_virtual); stl_le_phys(&s->dma_as, value + 16, fbconfig.yres_virtual); resplen = 8; break; - case 0x00044005: /* Test depth */ + case RPI_FWREQ_FRAMEBUFFER_TEST_DEPTH: resplen = 4; break; - case 0x00048005: /* Set depth */ + case RPI_FWREQ_FRAMEBUFFER_SET_DEPTH: fbconfig.bpp = ldl_le_phys(&s->dma_as, value + 12); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040005: /* Get depth */ + case RPI_FWREQ_FRAMEBUFFER_GET_DEPTH: stl_le_phys(&s->dma_as, value + 12, fbconfig.bpp); resplen = 4; break; - case 0x00044006: /* Test pixel order */ + case RPI_FWREQ_FRAMEBUFFER_TEST_PIXEL_ORDER: resplen = 4; break; - case 0x00048006: /* Set pixel order */ + case RPI_FWREQ_FRAMEBUFFER_SET_PIXEL_ORDER: fbconfig.pixo = ldl_le_phys(&s->dma_as, value + 12); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040006: /* Get pixel order */ + case RPI_FWREQ_FRAMEBUFFER_GET_PIXEL_ORDER: stl_le_phys(&s->dma_as, value + 12, fbconfig.pixo); resplen = 4; break; - case 0x00044007: /* Test pixel alpha */ + case RPI_FWREQ_FRAMEBUFFER_TEST_ALPHA_MODE: resplen = 4; break; - case 0x00048007: /* Set alpha */ + case RPI_FWREQ_FRAMEBUFFER_SET_ALPHA_MODE: fbconfig.alpha = ldl_le_phys(&s->dma_as, value + 12); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040007: /* Get alpha */ + case RPI_FWREQ_FRAMEBUFFER_GET_ALPHA_MODE: stl_le_phys(&s->dma_as, value + 12, fbconfig.alpha); resplen = 4; break; - case 0x00040008: /* Get pitch */ + case RPI_FWREQ_FRAMEBUFFER_GET_PITCH: stl_le_phys(&s->dma_as, value + 12, bcm2835_fb_get_pitch(&fbconfig)); resplen = 4; break; - case 0x00044009: /* Test virtual offset */ + case RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_OFFSET: resplen = 8; break; - case 0x00048009: /* Set virtual offset */ + case RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_OFFSET: fbconfig.xoffset = ldl_le_phys(&s->dma_as, value + 12); fbconfig.yoffset = ldl_le_phys(&s->dma_as, value + 16); bcm2835_fb_validate_config(&fbconfig); fbconfig_updated = true; /* fall through */ - case 0x00040009: /* Get virtual offset */ + case RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_OFFSET: stl_le_phys(&s->dma_as, value + 12, fbconfig.xoffset); stl_le_phys(&s->dma_as, value + 16, fbconfig.yoffset); resplen = 8; break; - case 0x0004000a: /* Get/Test/Set overscan */ - case 0x0004400a: - case 0x0004800a: + case RPI_FWREQ_FRAMEBUFFER_GET_OVERSCAN: + case RPI_FWREQ_FRAMEBUFFER_TEST_OVERSCAN: + case RPI_FWREQ_FRAMEBUFFER_SET_OVERSCAN: stl_le_phys(&s->dma_as, value + 12, 0); stl_le_phys(&s->dma_as, value + 16, 0); stl_le_phys(&s->dma_as, value + 20, 0); stl_le_phys(&s->dma_as, value + 24, 0); resplen = 16; break; - case 0x0004800b: /* Set palette */ + case RPI_FWREQ_FRAMEBUFFER_SET_PALETTE: offset = ldl_le_phys(&s->dma_as, value + 12); length = ldl_le_phys(&s->dma_as, value + 16); n = 0; @@ -270,18 +276,18 @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) stl_le_phys(&s->dma_as, value + 12, 0); resplen = 4; break; - case 0x00040013: /* Get number of displays */ + case RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS: stl_le_phys(&s->dma_as, value + 12, 1); resplen = 4; break; - case 0x00060001: /* Get DMA channels */ + case RPI_FWREQ_GET_DMA_CHANNELS: /* channels 2-5 */ stl_le_phys(&s->dma_as, value + 12, 0x003C); resplen = 4; break; - case 0x00050001: /* Get command line */ + case RPI_FWREQ_GET_COMMAND_LINE: /* * We follow the firmware behaviour: no NUL terminator is * written to the buffer, and if the buffer is too short diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c index 286e009..1a576d6 100644 --- a/hw/sd/allwinner-sdhost.c +++ b/hw/sd/allwinner-sdhost.c @@ -193,7 +193,7 @@ static void allwinner_sdhost_update_irq(AwSdHostState *s) } trace_allwinner_sdhost_update_irq(irq); - qemu_set_irq(s->irq, irq); + qemu_set_irq(s->irq, !!irq); } static void allwinner_sdhost_update_transfer_cnt(AwSdHostState *s, diff --git a/hw/timer/nrf51_timer.c b/hw/timer/nrf51_timer.c index 42be79c..50c6772 100644 --- a/hw/timer/nrf51_timer.c +++ b/hw/timer/nrf51_timer.c @@ -45,7 +45,12 @@ static uint32_t update_counter(NRF51TimerState *s, int64_t now) uint32_t ticks = ns_to_ticks(s, now - s->update_counter_ns); s->counter = (s->counter + ticks) % BIT(bitwidths[s->bitmode]); - s->update_counter_ns = now; + /* + * Only advance the sync time to the timestamp of the last tick, + * not all the way to 'now', so we don't lose time if we do + * multiple resyncs in a single tick. + */ + s->update_counter_ns += ticks_to_ns(s, ticks); return ticks; } diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h index 4a56dd4..ede98e6 100644 --- a/include/hw/arm/raspi_platform.h +++ b/include/hw/arm/raspi_platform.h @@ -170,4 +170,14 @@ #define INTERRUPT_ILLEGAL_TYPE0 6 #define INTERRUPT_ILLEGAL_TYPE1 7 +/* Clock rates */ +#define RPI_FIRMWARE_EMMC_CLK_RATE 50000000 +#define RPI_FIRMWARE_UART_CLK_RATE 3000000 +/* + * TODO: this is really SoC-specific; we might want to + * set it per-SoC if it turns out any guests care. + */ +#define RPI_FIRMWARE_CORE_CLK_RATE 350000000 +#define RPI_FIRMWARE_DEFAULT_CLK_RATE 700000000 + #endif diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h index 91c9894..b823f94 100644 --- a/include/hw/char/imx_serial.h +++ b/include/hw/char/imx_serial.h @@ -71,6 +71,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) #define UCR4_DREN BIT(0) /* Receive Data Ready interrupt enable */ #define UCR4_TCEN BIT(3) /* TX complete interrupt enable */ +#define UCR4_WKEN BIT(7) /* WAKE interrupt enable */ #define UTS1_TXEMPTY (1<<6) #define UTS1_RXEMPTY (1<<5) diff --git a/include/hw/misc/raspberrypi-fw-defs.h b/include/hw/misc/raspberrypi-fw-defs.h new file mode 100644 index 0000000..4551fe7 --- /dev/null +++ b/include/hw/misc/raspberrypi-fw-defs.h @@ -0,0 +1,163 @@ +/* + * Raspberry Pi firmware definitions + * + * Copyright (C) 2022 Auriga LLC, based on Linux kernel + * `include/soc/bcm2835/raspberrypi-firmware.h` (Copyright © 2015 Broadcom) + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ +#define INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ + +#include "qemu/osdep.h" + +enum rpi_firmware_property_tag { + RPI_FWREQ_PROPERTY_END = 0, + RPI_FWREQ_GET_FIRMWARE_REVISION = 0x00000001, + RPI_FWREQ_GET_FIRMWARE_VARIANT = 0x00000002, + RPI_FWREQ_GET_FIRMWARE_HASH = 0x00000003, + + RPI_FWREQ_SET_CURSOR_INFO = 0x00008010, + RPI_FWREQ_SET_CURSOR_STATE = 0x00008011, + + RPI_FWREQ_GET_BOARD_MODEL = 0x00010001, + RPI_FWREQ_GET_BOARD_REVISION = 0x00010002, + RPI_FWREQ_GET_BOARD_MAC_ADDRESS = 0x00010003, + RPI_FWREQ_GET_BOARD_SERIAL = 0x00010004, + RPI_FWREQ_GET_ARM_MEMORY = 0x00010005, + RPI_FWREQ_GET_VC_MEMORY = 0x00010006, + RPI_FWREQ_GET_CLOCKS = 0x00010007, + RPI_FWREQ_GET_POWER_STATE = 0x00020001, + RPI_FWREQ_GET_TIMING = 0x00020002, + RPI_FWREQ_SET_POWER_STATE = 0x00028001, + RPI_FWREQ_GET_CLOCK_STATE = 0x00030001, + RPI_FWREQ_GET_CLOCK_RATE = 0x00030002, + RPI_FWREQ_GET_VOLTAGE = 0x00030003, + RPI_FWREQ_GET_MAX_CLOCK_RATE = 0x00030004, + RPI_FWREQ_GET_MAX_VOLTAGE = 0x00030005, + RPI_FWREQ_GET_TEMPERATURE = 0x00030006, + RPI_FWREQ_GET_MIN_CLOCK_RATE = 0x00030007, + RPI_FWREQ_GET_MIN_VOLTAGE = 0x00030008, + RPI_FWREQ_GET_TURBO = 0x00030009, + RPI_FWREQ_GET_MAX_TEMPERATURE = 0x0003000a, + RPI_FWREQ_GET_STC = 0x0003000b, + RPI_FWREQ_ALLOCATE_MEMORY = 0x0003000c, + RPI_FWREQ_LOCK_MEMORY = 0x0003000d, + RPI_FWREQ_UNLOCK_MEMORY = 0x0003000e, + RPI_FWREQ_RELEASE_MEMORY = 0x0003000f, + RPI_FWREQ_EXECUTE_CODE = 0x00030010, + RPI_FWREQ_EXECUTE_QPU = 0x00030011, + RPI_FWREQ_SET_ENABLE_QPU = 0x00030012, + RPI_FWREQ_GET_DISPMANX_RESOURCE_MEM_HANDLE = 0x00030014, + RPI_FWREQ_GET_EDID_BLOCK = 0x00030020, + RPI_FWREQ_GET_CUSTOMER_OTP = 0x00030021, + RPI_FWREQ_GET_EDID_BLOCK_DISPLAY = 0x00030023, + RPI_FWREQ_GET_DOMAIN_STATE = 0x00030030, + RPI_FWREQ_GET_THROTTLED = 0x00030046, + RPI_FWREQ_GET_CLOCK_MEASURED = 0x00030047, + RPI_FWREQ_NOTIFY_REBOOT = 0x00030048, + RPI_FWREQ_SET_CLOCK_STATE = 0x00038001, + RPI_FWREQ_SET_CLOCK_RATE = 0x00038002, + RPI_FWREQ_SET_VOLTAGE = 0x00038003, + RPI_FWREQ_SET_MAX_CLOCK_RATE = 0x00038004, + RPI_FWREQ_SET_MIN_CLOCK_RATE = 0x00038007, + RPI_FWREQ_SET_TURBO = 0x00038009, + RPI_FWREQ_SET_CUSTOMER_OTP = 0x00038021, + RPI_FWREQ_SET_DOMAIN_STATE = 0x00038030, + RPI_FWREQ_GET_GPIO_STATE = 0x00030041, + RPI_FWREQ_SET_GPIO_STATE = 0x00038041, + RPI_FWREQ_SET_SDHOST_CLOCK = 0x00038042, + RPI_FWREQ_GET_GPIO_CONFIG = 0x00030043, + RPI_FWREQ_SET_GPIO_CONFIG = 0x00038043, + RPI_FWREQ_GET_PERIPH_REG = 0x00030045, + RPI_FWREQ_SET_PERIPH_REG = 0x00038045, + RPI_FWREQ_GET_POE_HAT_VAL = 0x00030049, + RPI_FWREQ_SET_POE_HAT_VAL = 0x00038049, + RPI_FWREQ_SET_POE_HAT_VAL_OLD = 0x00030050, + RPI_FWREQ_NOTIFY_XHCI_RESET = 0x00030058, + RPI_FWREQ_GET_REBOOT_FLAGS = 0x00030064, + RPI_FWREQ_SET_REBOOT_FLAGS = 0x00038064, + RPI_FWREQ_NOTIFY_DISPLAY_DONE = 0x00030066, + + /* Dispmanx TAGS */ + RPI_FWREQ_FRAMEBUFFER_ALLOCATE = 0x00040001, + RPI_FWREQ_FRAMEBUFFER_BLANK = 0x00040002, + RPI_FWREQ_FRAMEBUFFER_GET_PHYSICAL_WIDTH_HEIGHT = 0x00040003, + RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_WIDTH_HEIGHT = 0x00040004, + RPI_FWREQ_FRAMEBUFFER_GET_DEPTH = 0x00040005, + RPI_FWREQ_FRAMEBUFFER_GET_PIXEL_ORDER = 0x00040006, + RPI_FWREQ_FRAMEBUFFER_GET_ALPHA_MODE = 0x00040007, + RPI_FWREQ_FRAMEBUFFER_GET_PITCH = 0x00040008, + RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_OFFSET = 0x00040009, + RPI_FWREQ_FRAMEBUFFER_GET_OVERSCAN = 0x0004000a, + RPI_FWREQ_FRAMEBUFFER_GET_PALETTE = 0x0004000b, + RPI_FWREQ_FRAMEBUFFER_GET_LAYER = 0x0004000c, + RPI_FWREQ_FRAMEBUFFER_GET_TRANSFORM = 0x0004000d, + RPI_FWREQ_FRAMEBUFFER_GET_VSYNC = 0x0004000e, + RPI_FWREQ_FRAMEBUFFER_GET_TOUCHBUF = 0x0004000f, + RPI_FWREQ_FRAMEBUFFER_GET_GPIOVIRTBUF = 0x00040010, + RPI_FWREQ_FRAMEBUFFER_RELEASE = 0x00048001, + RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_ID = 0x00040016, + RPI_FWREQ_FRAMEBUFFER_SET_DISPLAY_NUM = 0x00048013, + RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS = 0x00040013, + RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_SETTINGS = 0x00040014, + RPI_FWREQ_FRAMEBUFFER_TEST_PHYSICAL_WIDTH_HEIGHT = 0x00044003, + RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_WIDTH_HEIGHT = 0x00044004, + RPI_FWREQ_FRAMEBUFFER_TEST_DEPTH = 0x00044005, + RPI_FWREQ_FRAMEBUFFER_TEST_PIXEL_ORDER = 0x00044006, + RPI_FWREQ_FRAMEBUFFER_TEST_ALPHA_MODE = 0x00044007, + RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_OFFSET = 0x00044009, + RPI_FWREQ_FRAMEBUFFER_TEST_OVERSCAN = 0x0004400a, + RPI_FWREQ_FRAMEBUFFER_TEST_PALETTE = 0x0004400b, + RPI_FWREQ_FRAMEBUFFER_TEST_LAYER = 0x0004400c, + RPI_FWREQ_FRAMEBUFFER_TEST_TRANSFORM = 0x0004400d, + RPI_FWREQ_FRAMEBUFFER_TEST_VSYNC = 0x0004400e, + RPI_FWREQ_FRAMEBUFFER_SET_PHYSICAL_WIDTH_HEIGHT = 0x00048003, + RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_WIDTH_HEIGHT = 0x00048004, + RPI_FWREQ_FRAMEBUFFER_SET_DEPTH = 0x00048005, + RPI_FWREQ_FRAMEBUFFER_SET_PIXEL_ORDER = 0x00048006, + RPI_FWREQ_FRAMEBUFFER_SET_ALPHA_MODE = 0x00048007, + RPI_FWREQ_FRAMEBUFFER_SET_PITCH = 0x00048008, + RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_OFFSET = 0x00048009, + RPI_FWREQ_FRAMEBUFFER_SET_OVERSCAN = 0x0004800a, + RPI_FWREQ_FRAMEBUFFER_SET_PALETTE = 0x0004800b, + + RPI_FWREQ_FRAMEBUFFER_SET_TOUCHBUF = 0x0004801f, + RPI_FWREQ_FRAMEBUFFER_SET_GPIOVIRTBUF = 0x00048020, + RPI_FWREQ_FRAMEBUFFER_SET_VSYNC = 0x0004800e, + RPI_FWREQ_FRAMEBUFFER_SET_LAYER = 0x0004800c, + RPI_FWREQ_FRAMEBUFFER_SET_TRANSFORM = 0x0004800d, + RPI_FWREQ_FRAMEBUFFER_SET_BACKLIGHT = 0x0004800f, + + RPI_FWREQ_VCHIQ_INIT = 0x00048010, + + RPI_FWREQ_SET_PLANE = 0x00048015, + RPI_FWREQ_GET_DISPLAY_TIMING = 0x00040017, + RPI_FWREQ_SET_TIMING = 0x00048017, + RPI_FWREQ_GET_DISPLAY_CFG = 0x00040018, + RPI_FWREQ_SET_DISPLAY_POWER = 0x00048019, + RPI_FWREQ_GET_COMMAND_LINE = 0x00050001, + RPI_FWREQ_GET_DMA_CHANNELS = 0x00060001, +}; + +enum rpi_firmware_clk_id { + RPI_FIRMWARE_EMMC_CLK_ID = 1, + RPI_FIRMWARE_UART_CLK_ID, + RPI_FIRMWARE_ARM_CLK_ID, + RPI_FIRMWARE_CORE_CLK_ID, + RPI_FIRMWARE_V3D_CLK_ID, + RPI_FIRMWARE_H264_CLK_ID, + RPI_FIRMWARE_ISP_CLK_ID, + RPI_FIRMWARE_SDRAM_CLK_ID, + RPI_FIRMWARE_PIXEL_CLK_ID, + RPI_FIRMWARE_PWM_CLK_ID, + RPI_FIRMWARE_HEVC_CLK_ID, + RPI_FIRMWARE_EMMC2_CLK_ID, + RPI_FIRMWARE_M2MC_CLK_ID, + RPI_FIRMWARE_PIXEL_BVB_CLK_ID, + RPI_FIRMWARE_VEC_CLK_ID, + RPI_FIRMWARE_NUM_CLK_ID, +}; + +#endif /* INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ */ diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 12a310d..ef64a3f 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -150,3 +150,406 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB # the processor is in halting debug state (which we don't implement). # The pattern is listed here as documentation. # DRPS 1101011 0101 11111 000000 11111 00000 + +# Hint instruction group +{ + [ + YIELD 1101 0101 0000 0011 0010 0000 001 11111 + WFE 1101 0101 0000 0011 0010 0000 010 11111 + WFI 1101 0101 0000 0011 0010 0000 011 11111 + # We implement WFE to never block, so our SEV/SEVL are NOPs + # SEV 1101 0101 0000 0011 0010 0000 100 11111 + # SEVL 1101 0101 0000 0011 0010 0000 101 11111 + # Our DGL is a NOP because we don't merge memory accesses anyway. + # DGL 1101 0101 0000 0011 0010 0000 110 11111 + XPACLRI 1101 0101 0000 0011 0010 0000 111 11111 + PACIA1716 1101 0101 0000 0011 0010 0001 000 11111 + PACIB1716 1101 0101 0000 0011 0010 0001 010 11111 + AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111 + AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111 + ESB 1101 0101 0000 0011 0010 0010 000 11111 + PACIAZ 1101 0101 0000 0011 0010 0011 000 11111 + PACIASP 1101 0101 0000 0011 0010 0011 001 11111 + PACIBZ 1101 0101 0000 0011 0010 0011 010 11111 + PACIBSP 1101 0101 0000 0011 0010 0011 011 11111 + AUTIAZ 1101 0101 0000 0011 0010 0011 100 11111 + AUTIASP 1101 0101 0000 0011 0010 0011 101 11111 + AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111 + AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111 + ] + # The canonical NOP has CRm == op2 == 0, but all of the space + # that isn't specifically allocated to an instruction must NOP + NOP 1101 0101 0000 0011 0010 ---- --- 11111 +} + +# Barriers + +CLREX 1101 0101 0000 0011 0011 ---- 010 11111 +DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111 +ISB 1101 0101 0000 0011 0011 ---- 110 11111 +SB 1101 0101 0000 0011 0011 0000 111 11111 + +# PSTATE + +CFINV 1101 0101 0000 0 000 0100 0000 000 11111 +XAFLAG 1101 0101 0000 0 000 0100 0000 001 11111 +AXFLAG 1101 0101 0000 0 000 0100 0000 010 11111 + +# These are architecturally all "MSR (immediate)"; we decode the destination +# register too because there is no commonality in our implementation. +@msr_i .... .... .... . ... .... imm:4 ... ..... +MSR_i_UAO 1101 0101 0000 0 000 0100 .... 011 11111 @msr_i +MSR_i_PAN 1101 0101 0000 0 000 0100 .... 100 11111 @msr_i +MSR_i_SPSEL 1101 0101 0000 0 000 0100 .... 101 11111 @msr_i +MSR_i_SBSS 1101 0101 0000 0 011 0100 .... 001 11111 @msr_i +MSR_i_DIT 1101 0101 0000 0 011 0100 .... 010 11111 @msr_i +MSR_i_TCO 1101 0101 0000 0 011 0100 .... 100 11111 @msr_i +MSR_i_DAIFSET 1101 0101 0000 0 011 0100 .... 110 11111 @msr_i +MSR_i_DAIFCLEAR 1101 0101 0000 0 011 0100 .... 111 11111 @msr_i +MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111 + +# MRS, MSR (register), SYS, SYSL. These are all essentially the +# same instruction as far as QEMU is concerned. +# NB: op0 is bits [20:19], but op0=0b00 is other insns, so we have +# to hand-decode it. +SYS 1101 0101 00 l:1 01 op1:3 crn:4 crm:4 op2:3 rt:5 op0=1 +SYS 1101 0101 00 l:1 10 op1:3 crn:4 crm:4 op2:3 rt:5 op0=2 +SYS 1101 0101 00 l:1 11 op1:3 crn:4 crm:4 op2:3 rt:5 op0=3 + +# Exception generation + +@i16 .... .... ... imm:16 ... .. &i +SVC 1101 0100 000 ................ 000 01 @i16 +HVC 1101 0100 000 ................ 000 10 @i16 +SMC 1101 0100 000 ................ 000 11 @i16 +BRK 1101 0100 001 ................ 000 00 @i16 +HLT 1101 0100 010 ................ 000 00 @i16 +# These insns always UNDEF unless in halting debug state, which +# we don't implement. So we don't need to decode them. The patterns +# are listed here as documentation. +# DCPS1 1101 0100 101 ................ 000 01 @i16 +# DCPS2 1101 0100 101 ................ 000 10 @i16 +# DCPS3 1101 0100 101 ................ 000 11 @i16 + +# Loads and stores + +&stxr rn rt rt2 rs sz lasr +&stlr rn rt sz lasr +@stxr sz:2 ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr +@stlr sz:2 ...... ... ..... lasr:1 ..... rn:5 rt:5 &stlr +%imm1_30_p2 30:1 !function=plus_2 +@stxp .. ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr sz=%imm1_30_p2 +STXR .. 001000 000 ..... . ..... ..... ..... @stxr # inc STLXR +LDXR .. 001000 010 ..... . ..... ..... ..... @stxr # inc LDAXR +STLR .. 001000 100 11111 . 11111 ..... ..... @stlr # inc STLLR +LDAR .. 001000 110 11111 . 11111 ..... ..... @stlr # inc LDLAR + +STXP 1 . 001000 001 ..... . ..... ..... ..... @stxp # inc STLXP +LDXP 1 . 001000 011 ..... . ..... ..... ..... @stxp # inc LDAXP + +# CASP, CASPA, CASPAL, CASPL (we don't decode the bits that determine +# acquire/release semantics because QEMU's cmpxchg always has those) +CASP 0 . 001000 0 - 1 rs:5 - 11111 rn:5 rt:5 sz=%imm1_30_p2 +# CAS, CASA, CASAL, CASL +CAS sz:2 001000 1 - 1 rs:5 - 11111 rn:5 rt:5 + +&ldlit rt imm sz sign +@ldlit .. ... . .. ................... rt:5 &ldlit imm=%imm19 + +LD_lit 00 011 0 00 ................... ..... @ldlit sz=2 sign=0 +LD_lit 01 011 0 00 ................... ..... @ldlit sz=3 sign=0 +LD_lit 10 011 0 00 ................... ..... @ldlit sz=2 sign=1 +LD_lit_v 00 011 1 00 ................... ..... @ldlit sz=2 sign=0 +LD_lit_v 01 011 1 00 ................... ..... @ldlit sz=3 sign=0 +LD_lit_v 10 011 1 00 ................... ..... @ldlit sz=4 sign=0 + +# PRFM +NOP 11 011 0 00 ------------------- ----- + +&ldstpair rt2 rt rn imm sz sign w p +@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair + +# STNP, LDNP: Signed offset, non-temporal hint. We don't emulate caches +# so we ignore hints about data access patterns, and handle these like +# plain signed offset. +STP 00 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP 10 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +# STP and LDP: post-indexed +STP 00 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 00 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 01 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=1 w=1 +STP 10 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP 10 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 00 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP_v 00 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +STP_v 01 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP_v 01 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 10 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 +LDP_v 10 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 + +# STP and LDP: offset +STP 00 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 01 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=0 +STP 10 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +# STP and LDP: pre-indexed +STP 00 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 00 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 01 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=1 +STP 10 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP 10 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 00 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP_v 00 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +STP_v 01 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP_v 01 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 10 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 +LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 + +# STGP: store tag and pair +STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 + +# Load/store register (unscaled immediate) +&ldst_imm rt rn imm sz sign w p unpriv ext +@ldst_imm .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 +@ldst_imm_pre .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=1 +@ldst_imm_post .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=1 w=1 +@ldst_imm_user .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=1 p=0 w=0 + +STR_i sz:2 111 0 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=1 + +# PRFM : prefetch memory: a no-op for QEMU +NOP 11 111 0 00 10 0 --------- 00 ----- ----- + +STR_v_i sz:2 111 1 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 + +STR_v_i sz:2 111 1 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 + +STR_v_i sz:2 111 1 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 + +# Load/store with an unsigned 12 bit immediate, which is scaled by the +# element size. The function gets the sz:imm and returns the scaled immediate. +%uimm_scaled 10:12 sz:3 !function=uimm_scaled + +@ldst_uimm .. ... . .. .. ............ rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 imm=%uimm_scaled + +STR_i sz:2 111 0 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 +LDR_i 00 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=0 +LDR_i 01 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=1 +LDR_i 10 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=2 +LDR_i 11 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=3 +LDR_i 00 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=0 +LDR_i 01 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=1 +LDR_i 10 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=2 +LDR_i 00 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=0 +LDR_i 01 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=1 + +# PRFM +NOP 11 111 0 01 10 ------------ ----- ----- + +STR_v_i sz:2 111 1 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 +STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 +LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 + +# Load/store with register offset +&ldst rm rn rt sign ext sz opt s +@ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst +STR sz:2 111 0 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +LDR 00 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=0 +LDR 01 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=1 +LDR 10 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=2 +LDR 11 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=3 +LDR 00 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=0 +LDR 01 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=1 +LDR 10 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=2 +LDR 00 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=0 +LDR 01 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=1 + +# PRFM +NOP 11 111 0 00 10 1 ----- -1- - 10 ----- ----- + +STR_v sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +STR_v 00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 +LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 + +# Atomic memory operations +&atomic rs rn rt a r sz +@atomic sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic +LDADD .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic +LDCLR .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic +LDEOR .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic +LDSET .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic +LDSMAX .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic +LDSMIN .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic +LDUMAX .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic +LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic +SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic + +LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 + +# Load/store register (pointer authentication) + +# LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous +%ldra_imm 22:s1 12:9 !function=times_2 + +LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm + +&ldapr_stlr_i rn rt imm sz sign ext +@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i +STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 +LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 +LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 +LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=1 +LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2 +LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 +LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 + +# Load/store multiple structures +# The 4-bit opcode in [15:12] encodes repeat count and structure elements +&ldst_mult rm rn rt sz q p rpt selem +@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult +ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 +ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 +ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 +ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 + +LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 +LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 +LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 +LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 + +# Load/store single structure +&ldst_single rm rn rt p selem index scale + +%ldst_single_selem 13:1 21:1 !function=plus_1 + +%ldst_single_index_b 30:1 10:3 +%ldst_single_index_h 30:1 11:2 +%ldst_single_index_s 30:1 12:1 + +@ldst_single_b .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=0 selem=%ldst_single_selem \ + index=%ldst_single_index_b +@ldst_single_h .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=1 selem=%ldst_single_selem \ + index=%ldst_single_index_h +@ldst_single_s .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=2 selem=%ldst_single_selem \ + index=%ldst_single_index_s +@ldst_single_d . index:1 ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=3 selem=%ldst_single_selem + +ST_single 0 . 001101 . 0 . ..... 00 . ... ..... ..... @ldst_single_b +ST_single 0 . 001101 . 0 . ..... 01 . ..0 ..... ..... @ldst_single_h +ST_single 0 . 001101 . 0 . ..... 10 . .00 ..... ..... @ldst_single_s +ST_single 0 . 001101 . 0 . ..... 10 . 001 ..... ..... @ldst_single_d + +LD_single 0 . 001101 . 1 . ..... 00 . ... ..... ..... @ldst_single_b +LD_single 0 . 001101 . 1 . ..... 01 . ..0 ..... ..... @ldst_single_h +LD_single 0 . 001101 . 1 . ..... 10 . .00 ..... ..... @ldst_single_s +LD_single 0 . 001101 . 1 . ..... 10 . 001 ..... ..... @ldst_single_d + +# Replicating load case +LD_single_repl 0 q:1 001101 p:1 1 . rm:5 11 . 0 scale:2 rn:5 rt:5 selem=%ldst_single_selem + +%tag_offset 12:s9 !function=scale_by_log2_tag_granule +&ldst_tag rn rt imm p w +@ldst_tag ........ .. . ......... .. rn:5 rt:5 &ldst_tag imm=%tag_offset +@ldst_tag_mult ........ .. . 000000000 .. rn:5 rt:5 &ldst_tag imm=0 + +STZGM 11011001 00 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +STG 11011001 00 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STG 11011001 00 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STG 11011001 00 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +LDG 11011001 01 1 ......... 00 ..... ..... @ldst_tag p=0 w=0 +STZG 11011001 01 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STZG 11011001 01 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STZG 11011001 01 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +STGM 11011001 10 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +ST2G 11011001 10 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +ST2G 11011001 10 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +ST2G 11011001 10 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +LDGM 11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +STZ2G 11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STZ2G 11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STZ2G 11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index aa93f37..3baab6a 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -47,6 +47,28 @@ enum a64_shift_type { }; /* + * Helpers for extracting complex instruction fields + */ + +/* + * For load/store with an unsigned 12 bit immediate scaled by the element + * size. The input has the immediate field in bits [14:3] and the element + * size in [2:0]. + */ +static int uimm_scaled(DisasContext *s, int x) +{ + unsigned imm = x >> 3; + unsigned scale = extract32(x, 0, 3); + return imm << scale; +} + +/* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ +static int scale_by_log2_tag_granule(DisasContext *s, int x) +{ + return x << LOG2_TAG_GRANULE; +} + +/* * Include the generated decoders. */ @@ -1649,201 +1671,239 @@ static bool trans_ERETA(DisasContext *s, arg_reta *a) return true; } -/* HINT instruction group, including various allocated HINTs */ -static void handle_hint(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static bool trans_NOP(DisasContext *s, arg_NOP *a) { - unsigned int selector = crm << 3 | op2; + return true; +} - if (op1 != 3) { - unallocated_encoding(s); - return; +static bool trans_YIELD(DisasContext *s, arg_YIELD *a) +{ + /* + * When running in MTTCG we don't generate jumps to the yield and + * WFE helpers as it won't affect the scheduling of other vCPUs. + * If we wanted to more completely model WFE/SEV so we don't busy + * spin unnecessarily we would need to do something more involved. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_YIELD; } + return true; +} - switch (selector) { - case 0b00000: /* NOP */ - break; - case 0b00011: /* WFI */ - s->base.is_jmp = DISAS_WFI; - break; - case 0b00001: /* YIELD */ - /* When running in MTTCG we don't generate jumps to the yield and - * WFE helpers as it won't affect the scheduling of other vCPUs. - * If we wanted to more completely model WFE/SEV so we don't busy - * spin unnecessarily we would need to do something more involved. +static bool trans_WFI(DisasContext *s, arg_WFI *a) +{ + s->base.is_jmp = DISAS_WFI; + return true; +} + +static bool trans_WFE(DisasContext *s, arg_WFI *a) +{ + /* + * When running in MTTCG we don't generate jumps to the yield and + * WFE helpers as it won't affect the scheduling of other vCPUs. + * If we wanted to more completely model WFE/SEV so we don't busy + * spin unnecessarily we would need to do something more involved. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_WFE; + } + return true; +} + +static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) +{ + if (s->pauth_active) { + gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); + } + return true; +} + +static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_ESB(DisasContext *s, arg_ESB *a) +{ + /* Without RAS, we must implement this as NOP. */ + if (dc_isar_feature(aa64_ras, s)) { + /* + * QEMU does not have a source of physical SErrors, + * so we are only concerned with virtual SErrors. + * The pseudocode in the ARM for this case is + * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then + * AArch64.vESBOperation(); + * Most of the condition can be evaluated at translation time. + * Test for EL2 present, and defer test for SEL2 to runtime. */ - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - s->base.is_jmp = DISAS_YIELD; - } - break; - case 0b00010: /* WFE */ - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - s->base.is_jmp = DISAS_WFE; - } - break; - case 0b00100: /* SEV */ - case 0b00101: /* SEVL */ - case 0b00110: /* DGH */ - /* we treat all as NOP at least for now */ - break; - case 0b00111: /* XPACLRI */ - if (s->pauth_active) { - gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); - } - break; - case 0b01000: /* PACIA1716 */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01010: /* PACIB1716 */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01100: /* AUTIA1716 */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01110: /* AUTIB1716 */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); + if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { + gen_helper_vesb(cpu_env); } - break; - case 0b10000: /* ESB */ - /* Without RAS, we must implement this as NOP. */ - if (dc_isar_feature(aa64_ras, s)) { - /* - * QEMU does not have a source of physical SErrors, - * so we are only concerned with virtual SErrors. - * The pseudocode in the ARM for this case is - * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then - * AArch64.vESBOperation(); - * Most of the condition can be evaluated at translation time. - * Test for EL2 present, and defer test for SEL2 to runtime. - */ - if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { - gen_helper_vesb(cpu_env); - } - } - break; - case 0b11000: /* PACIAZ */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], - tcg_constant_i64(0)); - } - break; - case 0b11001: /* PACIASP */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - case 0b11010: /* PACIBZ */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], - tcg_constant_i64(0)); - } - break; - case 0b11011: /* PACIBSP */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - case 0b11100: /* AUTIAZ */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], - tcg_constant_i64(0)); - } - break; - case 0b11101: /* AUTIASP */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - case 0b11110: /* AUTIBZ */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], - tcg_constant_i64(0)); - } - break; - case 0b11111: /* AUTIBSP */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - default: - /* default specified as NOP equivalent */ - break; } + return true; +} + +static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); + } + return true; } -static void gen_clrex(DisasContext *s, uint32_t insn) +static bool trans_CLREX(DisasContext *s, arg_CLREX *a) { tcg_gen_movi_i64(cpu_exclusive_addr, -1); + return true; } -/* CLREX, DSB, DMB, ISB */ -static void handle_sync(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) { + /* We handle DSB and DMB the same way */ TCGBar bar; - if (op1 != 3) { - unallocated_encoding(s); - return; + switch (a->types) { + case 1: /* MBReqTypes_Reads */ + bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; + break; + case 2: /* MBReqTypes_Writes */ + bar = TCG_BAR_SC | TCG_MO_ST_ST; + break; + default: /* MBReqTypes_All */ + bar = TCG_BAR_SC | TCG_MO_ALL; + break; } + tcg_gen_mb(bar); + return true; +} - switch (op2) { - case 2: /* CLREX */ - gen_clrex(s, insn); - return; - case 4: /* DSB */ - case 5: /* DMB */ - switch (crm & 3) { - case 1: /* MBReqTypes_Reads */ - bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; - break; - case 2: /* MBReqTypes_Writes */ - bar = TCG_BAR_SC | TCG_MO_ST_ST; - break; - default: /* MBReqTypes_All */ - bar = TCG_BAR_SC | TCG_MO_ALL; - break; - } - tcg_gen_mb(bar); - return; - case 6: /* ISB */ - /* We need to break the TB after this insn to execute - * a self-modified code correctly and also to take - * any pending interrupts immediately. - */ - reset_btype(s); - gen_goto_tb(s, 0, 4); - return; +static bool trans_ISB(DisasContext *s, arg_ISB *a) +{ + /* + * We need to break the TB after this insn to execute + * self-modifying code correctly and also to take + * any pending interrupts immediately. + */ + reset_btype(s); + gen_goto_tb(s, 0, 4); + return true; +} - case 7: /* SB */ - if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { - goto do_unallocated; - } - /* - * TODO: There is no speculation barrier opcode for TCG; - * MB and end the TB instead. - */ - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); - gen_goto_tb(s, 0, 4); - return; +static bool trans_SB(DisasContext *s, arg_SB *a) +{ + if (!dc_isar_feature(aa64_sb, s)) { + return false; + } + /* + * TODO: There is no speculation barrier opcode for TCG; + * MB and end the TB instead. + */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + gen_goto_tb(s, 0, 4); + return true; +} - default: - do_unallocated: - unallocated_encoding(s); - return; +static bool trans_CFINV(DisasContext *s, arg_CFINV *a) +{ + if (!dc_isar_feature(aa64_condm_4, s)) { + return false; } + tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); + return true; } -static void gen_xaflag(void) +static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) { - TCGv_i32 z = tcg_temp_new_i32(); + TCGv_i32 z; + + if (!dc_isar_feature(aa64_condm_5, s)) { + return false; + } + + z = tcg_temp_new_i32(); tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); @@ -1867,10 +1927,16 @@ static void gen_xaflag(void) /* C | Z */ tcg_gen_or_i32(cpu_CF, cpu_CF, z); + + return true; } -static void gen_axflag(void) +static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) { + if (!dc_isar_feature(aa64_condm_5, s)) { + return false; + } + tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ @@ -1879,150 +1945,134 @@ static void gen_axflag(void) tcg_gen_movi_i32(cpu_NF, 0); tcg_gen_movi_i32(cpu_VF, 0); + + return true; } -/* MSR (immediate) - move immediate to processor state field */ -static void handle_msr_i(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) { - int op = op1 << 3 | op2; - - /* End the TB by default, chaining is ok. */ + if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_UAO); + } else { + clear_pstate_bits(PSTATE_UAO); + } + gen_rebuild_hflags(s); s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - switch (op) { - case 0x00: /* CFINV */ - if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { - goto do_unallocated; - } - tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); - s->base.is_jmp = DISAS_NEXT; - break; +static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_PAN); + } else { + clear_pstate_bits(PSTATE_PAN); + } + gen_rebuild_hflags(s); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x01: /* XAFlag */ - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { - goto do_unallocated; - } - gen_xaflag(); - s->base.is_jmp = DISAS_NEXT; - break; +static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) +{ + if (s->current_el == 0) { + return false; + } + gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(a->imm & PSTATE_SP)); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x02: /* AXFlag */ - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { - goto do_unallocated; - } - gen_axflag(); - s->base.is_jmp = DISAS_NEXT; - break; +static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_ssbs, s)) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_SSBS); + } else { + clear_pstate_bits(PSTATE_SSBS); + } + /* Don't need to rebuild hflags since SSBS is a nop */ + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x03: /* UAO */ - if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_UAO); - } else { - clear_pstate_bits(PSTATE_UAO); - } - gen_rebuild_hflags(s); - break; +static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_dit, s)) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_DIT); + } else { + clear_pstate_bits(PSTATE_DIT); + } + /* There's no need to rebuild hflags because DIT is a nop */ + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x04: /* PAN */ - if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_PAN); +static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) +{ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (a->imm & 1) { + set_pstate_bits(PSTATE_TCO); } else { - clear_pstate_bits(PSTATE_PAN); + clear_pstate_bits(PSTATE_TCO); } gen_rebuild_hflags(s); - break; - - case 0x05: /* SPSel */ - if (s->current_el == 0) { - goto do_unallocated; - } - gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); - break; - - case 0x19: /* SSBS */ - if (!dc_isar_feature(aa64_ssbs, s)) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_SSBS); - } else { - clear_pstate_bits(PSTATE_SSBS); - } - /* Don't need to rebuild hflags since SSBS is a nop */ - break; - - case 0x1a: /* DIT */ - if (!dc_isar_feature(aa64_dit, s)) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_DIT); - } else { - clear_pstate_bits(PSTATE_DIT); - } - /* There's no need to rebuild hflags because DIT is a nop */ - break; - - case 0x1e: /* DAIFSet */ - gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); - break; + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + return true; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + return true; + } else { + /* Insn not present */ + return false; + } +} - case 0x1f: /* DAIFClear */ - gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); - /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE_EXIT; - break; +static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) +{ + gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(a->imm)); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x1c: /* TCO */ - if (dc_isar_feature(aa64_mte, s)) { - /* Full MTE is enabled -- set the TCO bit as directed. */ - if (crm & 1) { - set_pstate_bits(PSTATE_TCO); - } else { - clear_pstate_bits(PSTATE_TCO); - } - gen_rebuild_hflags(s); - /* Many factors, including TCO, go into MTE_ACTIVE. */ - s->base.is_jmp = DISAS_UPDATE_NOCHAIN; - } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { - /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ - s->base.is_jmp = DISAS_NEXT; - } else { - goto do_unallocated; - } - break; +static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) +{ + gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(a->imm)); + /* Exit the cpu loop to re-evaluate pending IRQs. */ + s->base.is_jmp = DISAS_UPDATE_EXIT; + return true; +} - case 0x1b: /* SVCR* */ - if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { - goto do_unallocated; - } - if (sme_access_check(s)) { - int old = s->pstate_sm | (s->pstate_za << 1); - int new = (crm & 1) * 3; - int msk = (crm >> 1) & 3; +static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) +{ + if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { + return false; + } + if (sme_access_check(s)) { + int old = s->pstate_sm | (s->pstate_za << 1); + int new = a->imm * 3; - if ((old ^ new) & msk) { - /* At least one bit changes. */ - gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), - tcg_constant_i32(msk)); - } else { - s->base.is_jmp = DISAS_NEXT; - } + if ((old ^ new) & a->mask) { + /* At least one bit changes. */ + gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), + tcg_constant_i32(a->mask)); + s->base.is_jmp = DISAS_TOO_MANY; } - break; - - default: - do_unallocated: - unallocated_encoding(s); - return; } + return true; } static void gen_get_nzcv(TCGv_i64 tcg_rt) @@ -2094,7 +2144,7 @@ static void gen_sysreg_undef(DisasContext *s, bool isread, * These are all essentially the same insn in 'read' and 'write' * versions, with varying op0 fields. */ -static void handle_sys(DisasContext *s, uint32_t insn, bool isread, +static void handle_sys(DisasContext *s, bool isread, unsigned int op0, unsigned int op1, unsigned int op2, unsigned int crn, unsigned int crm, unsigned int rt) { @@ -2279,164 +2329,83 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } } -/* System - * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 - * +---------------------+---+-----+-----+-------+-------+-----+------+ - * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | - * +---------------------+---+-----+-----+-------+-------+-----+------+ - */ -static void disas_system(DisasContext *s, uint32_t insn) -{ - unsigned int l, op0, op1, crn, crm, op2, rt; - l = extract32(insn, 21, 1); - op0 = extract32(insn, 19, 2); - op1 = extract32(insn, 16, 3); - crn = extract32(insn, 12, 4); - crm = extract32(insn, 8, 4); - op2 = extract32(insn, 5, 3); - rt = extract32(insn, 0, 5); - - if (op0 == 0) { - if (l || rt != 31) { - unallocated_encoding(s); - return; - } - switch (crn) { - case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ - handle_hint(s, insn, op1, op2, crm); - break; - case 3: /* CLREX, DSB, DMB, ISB */ - handle_sync(s, insn, op1, op2, crm); - break; - case 4: /* MSR (immediate) */ - handle_msr_i(s, insn, op1, op2, crm); - break; - default: - unallocated_encoding(s); - break; - } - return; - } - handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); +static bool trans_SYS(DisasContext *s, arg_SYS *a) +{ + handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); + return true; } -/* Exception generation - * - * 31 24 23 21 20 5 4 2 1 0 - * +-----------------+-----+------------------------+-----+----+ - * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | - * +-----------------------+------------------------+----------+ - */ -static void disas_exc(DisasContext *s, uint32_t insn) +static bool trans_SVC(DisasContext *s, arg_i *a) { - int opc = extract32(insn, 21, 3); - int op2_ll = extract32(insn, 0, 5); - int imm16 = extract32(insn, 5, 16); - uint32_t syndrome; + /* + * For SVC, HVC and SMC we advance the single-step state + * machine before taking the exception. This is architecturally + * mandated, to ensure that single-stepping a system call + * instruction works properly. + */ + uint32_t syndrome = syn_aa64_svc(a->imm); + if (s->fgt_svc) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); + return true; + } + gen_ss_advance(s); + gen_exception_insn(s, 4, EXCP_SWI, syndrome); + return true; +} - switch (opc) { - case 0: - /* For SVC, HVC and SMC we advance the single-step state - * machine before taking the exception. This is architecturally - * mandated, to ensure that single-stepping a system call - * instruction works properly. - */ - switch (op2_ll) { - case 1: /* SVC */ - syndrome = syn_aa64_svc(imm16); - if (s->fgt_svc) { - gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); - break; - } - gen_ss_advance(s); - gen_exception_insn(s, 4, EXCP_SWI, syndrome); - break; - case 2: /* HVC */ - if (s->current_el == 0) { - unallocated_encoding(s); - break; - } - /* The pre HVC helper handles cases when HVC gets trapped - * as an undefined insn by runtime configuration. - */ - gen_a64_update_pc(s, 0); - gen_helper_pre_hvc(cpu_env); - gen_ss_advance(s); - gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); - break; - case 3: /* SMC */ - if (s->current_el == 0) { - unallocated_encoding(s); - break; - } - gen_a64_update_pc(s, 0); - gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); - gen_ss_advance(s); - gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); - break; - default: - unallocated_encoding(s); - break; - } - break; - case 1: - if (op2_ll != 0) { - unallocated_encoding(s); - break; - } - /* BRK */ - gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); - break; - case 2: - if (op2_ll != 0) { - unallocated_encoding(s); - break; - } - /* HLT. This has two purposes. - * Architecturally, it is an external halting debug instruction. - * Since QEMU doesn't implement external debug, we treat this as - * it is required for halting debug disabled: it will UNDEF. - * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. - */ - if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { - gen_exception_internal_insn(s, EXCP_SEMIHOST); - } else { - unallocated_encoding(s); - } - break; - case 5: - if (op2_ll < 1 || op2_ll > 3) { - unallocated_encoding(s); - break; - } - /* DCPS1, DCPS2, DCPS3 */ +static bool trans_HVC(DisasContext *s, arg_i *a) +{ + if (s->current_el == 0) { unallocated_encoding(s); - break; - default: + return true; + } + /* + * The pre HVC helper handles cases when HVC gets trapped + * as an undefined insn by runtime configuration. + */ + gen_a64_update_pc(s, 0); + gen_helper_pre_hvc(cpu_env); + /* Architecture requires ss advance before we do the actual work */ + gen_ss_advance(s); + gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2); + return true; +} + +static bool trans_SMC(DisasContext *s, arg_i *a) +{ + if (s->current_el == 0) { unallocated_encoding(s); - break; + return true; } + gen_a64_update_pc(s, 0); + gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(a->imm))); + /* Architecture requires ss advance before we do the actual work */ + gen_ss_advance(s); + gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); + return true; } -/* Branches, exception generating and system instructions */ -static void disas_b_exc_sys(DisasContext *s, uint32_t insn) +static bool trans_BRK(DisasContext *s, arg_i *a) { - switch (extract32(insn, 25, 7)) { - case 0x6a: /* Exception generation / System */ - if (insn & (1 << 24)) { - if (extract32(insn, 22, 2) == 0) { - disas_system(s, insn); - } else { - unallocated_encoding(s); - } - } else { - disas_exc(s, insn); - } - break; - default: + gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); + return true; +} + +static bool trans_HLT(DisasContext *s, arg_i *a) +{ + /* + * HLT. This has two purposes. + * Architecturally, it is an external halting debug instruction. + * Since QEMU doesn't implement external debug, we treat this as + * it is required for halting debug disabled: it will UNDEF. + * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. + */ + if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { + gen_exception_internal_insn(s, EXCP_SEMIHOST); + } else { unallocated_encoding(s); - break; } + return true; } /* @@ -2689,904 +2658,685 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, } } -/* Update the Sixty-Four bit (SF) registersize. This logic is derived +/* + * Compute the ISS.SF bit for syndrome information if an exception + * is taken on a load or store. This indicates whether the instruction + * is accessing a 32-bit or 64-bit register. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). */ -static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) +static bool ldst_iss_sf(int size, bool sign, bool ext) { - int opc0 = extract32(opc, 0, 1); - int regsize; - if (is_signed) { - regsize = opc0 ? 32 : 64; + if (sign) { + /* + * Signed loads are 64 bit results if we are not going to + * do a zero-extend from 32 to 64 after the load. + * (For a store, sign and ext are always false.) + */ + return !ext; } else { - regsize = size == 3 ? 64 : 32; + /* Unsigned loads/stores work at the specified size */ + return size == MO_64; } - return regsize == 64; } -/* Load/store exclusive - * - * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 - * +-----+-------------+----+---+----+------+----+-------+------+------+ - * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | - * +-----+-------------+----+---+----+------+----+-------+------+------+ - * - * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit - * L: 0 -> store, 1 -> load - * o2: 0 -> exclusive, 1 -> not - * o1: 0 -> single register, 1 -> register pair - * o0: 1 -> load-acquire/store-release, 0 -> not - */ -static void disas_ldst_excl(DisasContext *s, uint32_t insn) +static bool trans_STXR(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + if (a->lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); + return true; +} + +static bool trans_LDXR(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); + if (a->lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return true; +} + +static bool trans_STLR(DisasContext *s, arg_stlr *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rt2 = extract32(insn, 10, 5); - int rs = extract32(insn, 16, 5); - int is_lasr = extract32(insn, 15, 1); - int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; - int size = extract32(insn, 30, 2); TCGv_i64 clean_addr; MemOp memop; + bool iss_sf = ldst_iss_sf(a->sz, false, false); - switch (o2_L_o1_o0) { - case 0x0: /* STXR */ - case 0x1: /* STLXR */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - gen_store_exclusive(s, rs, rt, rt2, rn, size, false); - return; + /* + * StoreLORelease is the same as Store-Release for QEMU, but + * needs the feature-test. + */ + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { + return false; + } + /* Generate ISS for non-exclusive accesses including LASR. */ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + memop = check_ordered_align(s, a->rn, 0, true, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), + true, a->rn != 31, memop); + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, + iss_sf, a->lasr); + return true; +} - case 0x4: /* LDXR */ - case 0x5: /* LDAXR */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - gen_load_exclusive(s, rt, rt2, rn, size, false); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } - return; +static bool trans_LDAR(DisasContext *s, arg_stlr *a) +{ + TCGv_i64 clean_addr; + MemOp memop; + bool iss_sf = ldst_iss_sf(a->sz, false, false); - case 0x8: /* STLLR */ - if (!dc_isar_feature(aa64_lor, s)) { - break; - } - /* StoreLORelease is the same as Store-Release for QEMU. */ - /* fall through */ - case 0x9: /* STLR */ - /* Generate ISS for non-exclusive accesses including LASR. */ - if (rn == 31) { - gen_check_sp_alignment(s); - } + /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { + return false; + } + /* Generate ISS for non-exclusive accesses including LASR. */ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + memop = check_ordered_align(s, a->rn, 0, false, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), + false, a->rn != 31, memop); + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, + a->rt, iss_sf, a->lasr); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; +} + +static bool trans_STXP(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + if (a->lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - memop = check_ordered_align(s, rn, 0, true, size); - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, memop); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt, - disas_ldst_compute_iss_sf(size, false, 0), is_lasr); - return; + } + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); + return true; +} - case 0xc: /* LDLAR */ - if (!dc_isar_feature(aa64_lor, s)) { - break; - } - /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ - /* fall through */ - case 0xd: /* LDAR */ - /* Generate ISS for non-exclusive accesses including LASR. */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - memop = check_ordered_align(s, rn, 0, false, size); - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, memop); - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true, - rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); +static bool trans_LDXP(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); + if (a->lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - return; + } + return true; +} - case 0x2: case 0x3: /* CASP / STXP */ - if (size & 2) { /* STXP / STLXP */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - gen_store_exclusive(s, rs, rt, rt2, rn, size, true); - return; - } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { - /* CASP / CASPL */ - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); - return; - } - break; +static bool trans_CASP(DisasContext *s, arg_CASP *a) +{ + if (!dc_isar_feature(aa64_atomics, s)) { + return false; + } + if (((a->rt | a->rs) & 1) != 0) { + return false; + } - case 0x6: case 0x7: /* CASPA / LDXP */ - if (size & 2) { /* LDXP / LDAXP */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - gen_load_exclusive(s, rt, rt2, rn, size, true); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } - return; - } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { - /* CASPA / CASPAL */ - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); - return; - } - break; + gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); + return true; +} - case 0xa: /* CAS */ - case 0xb: /* CASL */ - case 0xe: /* CASA */ - case 0xf: /* CASAL */ - if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { - gen_compare_and_swap(s, rs, rt, rn, size); - return; - } - break; +static bool trans_CAS(DisasContext *s, arg_CAS *a) +{ + if (!dc_isar_feature(aa64_atomics, s)) { + return false; } - unallocated_encoding(s); + gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); + return true; } -/* - * Load register (literal) - * - * 31 30 29 27 26 25 24 23 5 4 0 - * +-----+-------+---+-----+-------------------+-------+ - * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | - * +-----+-------+---+-----+-------------------+-------+ - * - * V: 1 -> vector (simd/fp) - * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, - * 10-> 32 bit signed, 11 -> prefetch - * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) - */ -static void disas_ld_lit(DisasContext *s, uint32_t insn) +static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) { - int rt = extract32(insn, 0, 5); - int64_t imm = sextract32(insn, 5, 19) << 2; - bool is_vector = extract32(insn, 26, 1); - int opc = extract32(insn, 30, 2); - bool is_signed = false; - int size = 2; - TCGv_i64 tcg_rt, clean_addr; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); + TCGv_i64 tcg_rt = cpu_reg(s, a->rt); + TCGv_i64 clean_addr = tcg_temp_new_i64(); + MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); + + gen_pc_plus_diff(s, clean_addr, a->imm); + do_gpr_ld(s, tcg_rt, clean_addr, memop, + false, true, a->rt, iss_sf, false); + return true; +} + +static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) +{ + /* Load register (literal), vector version */ + TCGv_i64 clean_addr; MemOp memop; - if (is_vector) { - if (opc == 3) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; + } + memop = finalize_memop_asimd(s, a->sz); + clean_addr = tcg_temp_new_i64(); + gen_pc_plus_diff(s, clean_addr, a->imm); + do_fp_ld(s, a->rt, clean_addr, memop); + return true; +} + +static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + uint64_t offset, bool is_store, MemOp mop) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); + } + + *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, + (a->w || a->rn != 31), 2 << a->sz, mop); +} + +static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, + TCGv_i64 dirty_addr, uint64_t offset) +{ + if (a->w) { + if (a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - size = 2 + opc; - if (!fp_access_check(s)) { - return; + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); + } +} + +static bool trans_STP(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + MemOp mop = finalize_memop(s, a->sz); + + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); + /* + * We built mop above for the single logical access -- rebuild it + * now for the paired operation. + * + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + */ + mop = a->sz + 1; + if (s->align_mem) { + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + } + mop = finalize_memop_pair(s, mop); + if (a->sz == 2) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); } - memop = finalize_memop_asimd(s, size); + tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); } else { - if (opc == 3) { - /* PRFM (literal) : prefetch */ - return; + TCGv_i128 tmp = tcg_temp_new_i128(); + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); } - size = 2 + extract32(opc, 0, 1); - is_signed = extract32(opc, 1, 1); - memop = finalize_memop(s, size + is_signed * MO_SIGN); + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); } + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - tcg_rt = cpu_reg(s, rt); +static bool trans_LDP(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + MemOp mop = finalize_memop(s, a->sz); - clean_addr = tcg_temp_new_i64(); - gen_pc_plus_diff(s, clean_addr, imm); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); - if (is_vector) { - do_fp_ld(s, rt, clean_addr, memop); + /* + * We built mop above for the single logical access -- rebuild it + * now for the paired operation. + * + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + * + * This treats sign-extending loads like zero-extending loads, + * since that reuses the most code below. + */ + mop = a->sz + 1; + if (s->align_mem) { + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + } + mop = finalize_memop_pair(s, mop); + if (a->sz == 2) { + int o2 = s->be_data == MO_LE ? 32 : 0; + int o1 = o2 ^ 32; + + tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); + if (a->sign) { + tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); + } else { + tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); + } } else { - /* Only unsigned 32bit loads target 32bit registers. */ - bool iss_sf = opc != 0; - do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false); + TCGv_i128 tmp = tcg_temp_new_i128(); + + tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); + } else { + tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); + } } + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; } -/* - * LDNP (Load Pair - non-temporal hint) - * LDP (Load Pair - non vector) - * LDPSW (Load Pair Signed Word - non vector) - * STNP (Store Pair - non-temporal hint) - * STP (Store Pair - non vector) - * LDNP (Load Pair of SIMD&FP - non-temporal hint) - * LDP (Load Pair of SIMD&FP) - * STNP (Store Pair of SIMD&FP - non-temporal hint) - * STP (Store Pair of SIMD&FP) - * - * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 - * +-----+-------+---+---+-------+---+-----------------------------+ - * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | - * +-----+-------+---+---+-------+---+-------+-------+------+------+ - * - * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW/STGP 01 - * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit - * V: 0 -> GPR, 1 -> Vector - * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, - * 10 -> signed offset, 11 -> pre-index - * L: 0 -> Store 1 -> Load - * - * Rt, Rt2 = GPR or SIMD registers to be stored - * Rn = general purpose register containing address - * imm7 = signed offset (multiple of 4 or 8 depending on size) - */ -static void disas_ldst_pair(DisasContext *s, uint32_t insn) +static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rt2 = extract32(insn, 10, 5); - uint64_t offset = sextract64(insn, 15, 7); - int index = extract32(insn, 23, 2); - bool is_vector = extract32(insn, 26, 1); - bool is_load = extract32(insn, 22, 1); - int opc = extract32(insn, 30, 2); - bool is_signed = false; - bool postindex = false; - bool wback = false; - bool set_tag = false; + uint64_t offset = a->imm << a->sz; TCGv_i64 clean_addr, dirty_addr; MemOp mop; - int size; - if (opc == 3) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; } - if (is_vector) { - size = 2 + opc; - } else if (opc == 1 && !is_load) { - /* STGP */ - if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { - unallocated_encoding(s); - return; - } - size = 3; - set_tag = true; - } else { - size = 2 + extract32(opc, 1, 1); - is_signed = extract32(opc, 0, 1); - if (!is_load && is_signed) { - unallocated_encoding(s); - return; - } - } + /* LSE2 does not merge FP pairs; leave these as separate operations. */ + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); + do_fp_st(s, a->rt, clean_addr, mop); + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); + do_fp_st(s, a->rt2, clean_addr, mop); + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - switch (index) { - case 1: /* post-index */ - postindex = true; - wback = true; - break; - case 0: - /* signed offset with "non-temporal" hint. Since we don't emulate - * caches we don't care about hints to the cache system about - * data access patterns, and handle this identically to plain - * signed offset. - */ - if (is_signed) { - /* There is no non-temporal-hint version of LDPSW */ - unallocated_encoding(s); - return; - } - postindex = false; - break; - case 2: /* signed offset, rn not updated */ - postindex = false; - break; - case 3: /* pre-index */ - postindex = false; - wback = true; - break; - } +static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr; + MemOp mop; - if (is_vector && !fp_access_check(s)) { - return; + if (!fp_access_check(s)) { + return true; } - offset <<= (set_tag ? LOG2_TAG_GRANULE : size); + /* LSE2 does not merge FP pairs; leave these as separate operations. */ + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); + do_fp_ld(s, a->rt, clean_addr, mop); + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); + do_fp_ld(s, a->rt2, clean_addr, mop); + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - if (rn == 31) { - gen_check_sp_alignment(s); +static bool trans_STGP(DisasContext *s, arg_ldstpair *a) +{ + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + uint64_t offset = a->imm << LOG2_TAG_GRANULE; + MemOp mop; + TCGv_i128 tmp; + + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + return false; } - dirty_addr = read_cpu_reg_sp(s, rn, 1); - if (!postindex) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + if (a->rn == 31) { + gen_check_sp_alignment(s); } - if (set_tag) { - if (!s->ata) { - /* - * TODO: We could rely on the stores below, at least for - * system mode, if we arrange to add MO_ALIGN_16. - */ - gen_helper_stg_stub(cpu_env, dirty_addr); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); - } else { - gen_helper_stg(cpu_env, dirty_addr, dirty_addr); - } + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - if (is_vector) { - mop = finalize_memop_asimd(s, size); + if (!s->ata) { + /* + * TODO: We could rely on the stores below, at least for + * system mode, if we arrange to add MO_ALIGN_16. + */ + gen_helper_stg_stub(cpu_env, dirty_addr); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); } else { - mop = finalize_memop(s, size); + gen_helper_stg(cpu_env, dirty_addr, dirty_addr); } - clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, - (wback || rn != 31) && !set_tag, - 2 << size, mop); - if (is_vector) { - /* LSE2 does not merge FP pairs; leave these as separate operations. */ - if (is_load) { - do_fp_ld(s, rt, clean_addr, mop); - } else { - do_fp_st(s, rt, clean_addr, mop); - } - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - if (is_load) { - do_fp_ld(s, rt2, clean_addr, mop); - } else { - do_fp_st(s, rt2, clean_addr, mop); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); + mop = finalize_memop(s, a->sz); + clean_addr = gen_mte_checkN(s, dirty_addr, true, false, 2 << a->sz, mop); - /* - * We built mop above for the single logical access -- rebuild it - * now for the paired operation. - * - * With LSE2, non-sign-extending pairs are treated atomically if - * aligned, and if unaligned one of the pair will be completely - * within a 16-byte block and that element will be atomic. - * Otherwise each element is separately atomic. - * In all cases, issue one operation with the correct atomicity. - * - * This treats sign-extending loads like zero-extending loads, - * since that reuses the most code below. - */ - mop = size + 1; - if (s->align_mem) { - mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8); - } - mop = finalize_memop_pair(s, mop); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); - if (is_load) { - if (size == 2) { - int o2 = s->be_data == MO_LE ? 32 : 0; - int o1 = o2 ^ 32; + assert(a->sz == 3); - tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); - if (is_signed) { - tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); - tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); - } else { - tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); - tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); - } - } else { - TCGv_i128 tmp = tcg_temp_new_i128(); + tmp = tcg_temp_new_i128(); + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); - tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); - if (s->be_data == MO_LE) { - tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); - } else { - tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); - } - } - } else { - if (size == 2) { - TCGv_i64 tmp = tcg_temp_new_i64(); + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - if (s->be_data == MO_LE) { - tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); - } else { - tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); - } - tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); - } else { - TCGv_i128 tmp = tcg_temp_new_i128(); +static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + uint64_t offset, bool is_store, MemOp mop) +{ + int memidx; - if (s->be_data == MO_LE) { - tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); - } else { - tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); - } - tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); - } - } + if (a->rn == 31) { + gen_check_sp_alignment(s); } - if (wback) { - if (postindex) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - } - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } + memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, + a->w || a->rn != 31, + mop, a->unpriv, memidx); } -/* - * Load/store (immediate post-indexed) - * Load/store (immediate pre-indexed) - * Load/store (unscaled immediate) - * - * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 - * +----+-------+---+-----+-----+---+--------+-----+------+------+ - * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | - * +----+-------+---+-----+-----+---+--------+-----+------+------+ - * - * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) - 10 -> unprivileged - * V = 0 -> non-vector - * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - */ -static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, + TCGv_i64 dirty_addr, uint64_t offset) { - int rn = extract32(insn, 5, 5); - int imm9 = sextract32(insn, 12, 9); - int idx = extract32(insn, 10, 2); - bool is_signed = false; - bool is_store = false; - bool is_extended = false; - bool is_unpriv = (idx == 2); - bool iss_valid; - bool post_index; - bool writeback; - int memidx; - MemOp memop; - TCGv_i64 clean_addr, dirty_addr; - - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4 || is_unpriv) { - unallocated_encoding(s); - return; - } - is_store = ((opc & 1) == 0); - if (!fp_access_check(s)) { - return; - } - memop = finalize_memop_asimd(s, size); - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - if (idx != 0) { - unallocated_encoding(s); - return; - } - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; + if (a->w) { + if (a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - is_store = (opc == 0); - is_signed = !is_store && extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); - memop = finalize_memop(s, size + is_signed * MO_SIGN); + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); } +} - switch (idx) { - case 0: - case 2: - post_index = false; - writeback = false; - break; - case 1: - post_index = true; - writeback = true; - break; - case 3: - post_index = false; - writeback = true; - break; - default: - g_assert_not_reached(); - } +static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) +{ + bool iss_sf, iss_valid = !a->w; + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); - iss_valid = !is_vector && !writeback; + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); - if (rn == 31) { - gen_check_sp_alignment(s); - } + tcg_rt = cpu_reg(s, a->rt); + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); - dirty_addr = read_cpu_reg_sp(s, rn, 1); - if (!post_index) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); - } + do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, + iss_valid, a->rt, iss_sf, false); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} - memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); +static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) +{ + bool iss_sf, iss_valid = !a->w; + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); - clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, - writeback || rn != 31, - size, is_unpriv, memidx); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, memop); - } else { - do_fp_ld(s, rt, clean_addr, memop); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); + tcg_rt = cpu_reg(s, a->rt); + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); - if (is_store) { - do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx, - iss_valid, rt, iss_sf, false); - } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop, - is_extended, memidx, - iss_valid, rt, iss_sf, false); - } - } + do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, + a->ext, memidx, iss_valid, a->rt, iss_sf, false); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} - if (writeback) { - TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); - if (post_index) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); - } - tcg_gen_mov_i64(tcg_rn, dirty_addr); +static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) +{ + TCGv_i64 clean_addr, dirty_addr; + MemOp mop; + + if (!fp_access_check(s)) { + return true; } + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); + do_fp_st(s, a->rt, clean_addr, mop); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; } -/* - * Load/store (register offset) - * - * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ - * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ - * - * For non-vector: - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - * For vector: - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated - * opc<0>: 0 -> store, 1 -> load - * V: 1 -> vector/simd - * opt: extend encoding (see DecodeRegExtend) - * S: if S=1 then scale (essentially index by sizeof(size)) - * Rt: register to transfer into/out of - * Rn: address register or SP for base - * Rm: offset register or ZR for offset - */ -static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) { - int rn = extract32(insn, 5, 5); - int shift = extract32(insn, 12, 1); - int rm = extract32(insn, 16, 5); - int opt = extract32(insn, 13, 3); - bool is_signed = false; - bool is_store = false; - bool is_extended = false; - TCGv_i64 tcg_rm, clean_addr, dirty_addr; - MemOp memop; + TCGv_i64 clean_addr, dirty_addr; + MemOp mop; - if (extract32(opt, 1, 1) == 0) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; } + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); + do_fp_ld(s, a->rt, clean_addr, mop); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4) { - unallocated_encoding(s); - return; - } - is_store = !extract32(opc, 0, 1); - if (!fp_access_check(s)) { - return; - } - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; - } - is_store = (opc == 0); - is_signed = !is_store && extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); - } +static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + bool is_store, MemOp memop) +{ + TCGv_i64 tcg_rm; - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + + tcg_rm = read_cpu_reg(s, a->rm, 1); + ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); + + tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); + *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); +} - tcg_rm = read_cpu_reg(s, rm, 1); - ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); +static bool trans_LDR(DisasContext *s, arg_ldst *a) +{ + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); + MemOp memop; - tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); + if (extract32(a->opt, 1, 1) == 0) { + return false; + } - memop = finalize_memop(s, size + is_signed * MO_SIGN); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop); + memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); + tcg_rt = cpu_reg(s, a->rt); + do_gpr_ld(s, tcg_rt, clean_addr, memop, + a->ext, true, a->rt, iss_sf, false); + return true; +} - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, memop); - } else { - do_fp_ld(s, rt, clean_addr, memop); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); +static bool trans_STR(DisasContext *s, arg_ldst *a) +{ + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); + MemOp memop; - if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, memop, - true, rt, iss_sf, false); - } else { - do_gpr_ld(s, tcg_rt, clean_addr, memop, - is_extended, true, rt, iss_sf, false); - } + if (extract32(a->opt, 1, 1) == 0) { + return false; } + + memop = finalize_memop(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); + tcg_rt = cpu_reg(s, a->rt); + do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); + return true; } -/* - * Load/store (unsigned immediate) - * - * 31 30 29 27 26 25 24 23 22 21 10 9 5 - * +----+-------+---+-----+-----+------------+-------+------+ - * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | - * +----+-------+---+-----+-----+------------+-------+------+ - * - * For non-vector: - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - * For vector: - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated - * opc<0>: 0 -> store, 1 -> load - * Rn: base address register (inc SP) - * Rt: target register - */ -static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static bool trans_LDR_v(DisasContext *s, arg_ldst *a) { - int rn = extract32(insn, 5, 5); - unsigned int imm12 = extract32(insn, 10, 12); - unsigned int offset; TCGv_i64 clean_addr, dirty_addr; - bool is_store; - bool is_signed = false; - bool is_extended = false; MemOp memop; - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4) { - unallocated_encoding(s); - return; - } - is_store = !extract32(opc, 0, 1); - if (!fp_access_check(s)) { - return; - } - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; - } - is_store = (opc == 0); - is_signed = !is_store && extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); + if (extract32(a->opt, 1, 1) == 0) { + return false; } - if (rn == 31) { - gen_check_sp_alignment(s); + if (!fp_access_check(s)) { + return true; } - dirty_addr = read_cpu_reg_sp(s, rn, 1); - offset = imm12 << size; - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - - memop = finalize_memop(s, size + is_signed * MO_SIGN); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop); - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, memop); - } else { - do_fp_ld(s, rt, clean_addr, memop); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); - if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false); - } else { - do_gpr_ld(s, tcg_rt, clean_addr, memop, - is_extended, true, rt, iss_sf, false); - } - } + memop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); + do_fp_ld(s, a->rt, clean_addr, memop); + return true; } -/* Atomic memory operations - * - * 31 30 27 26 24 22 21 16 15 12 10 5 0 - * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ - * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | - * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ - * - * Rt: the result register - * Rn: base address or SP - * Rs: the source register for the operation - * V: vector flag (always 0 as of v8.3) - * A: acquire flag - * R: release flag - */ -static void disas_ldst_atomic(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static bool trans_STR_v(DisasContext *s, arg_ldst *a) { - int rs = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int o3_opc = extract32(insn, 12, 4); - bool r = extract32(insn, 22, 1); - bool a = extract32(insn, 23, 1); - TCGv_i64 tcg_rs, tcg_rt, clean_addr; - AtomicThreeOpFn *fn = NULL; - MemOp mop = size; + TCGv_i64 clean_addr, dirty_addr; + MemOp memop; - if (is_vector || !dc_isar_feature(aa64_atomics, s)) { - unallocated_encoding(s); - return; - } - switch (o3_opc) { - case 000: /* LDADD */ - fn = tcg_gen_atomic_fetch_add_i64; - break; - case 001: /* LDCLR */ - fn = tcg_gen_atomic_fetch_and_i64; - break; - case 002: /* LDEOR */ - fn = tcg_gen_atomic_fetch_xor_i64; - break; - case 003: /* LDSET */ - fn = tcg_gen_atomic_fetch_or_i64; - break; - case 004: /* LDSMAX */ - fn = tcg_gen_atomic_fetch_smax_i64; - mop |= MO_SIGN; - break; - case 005: /* LDSMIN */ - fn = tcg_gen_atomic_fetch_smin_i64; - mop |= MO_SIGN; - break; - case 006: /* LDUMAX */ - fn = tcg_gen_atomic_fetch_umax_i64; - break; - case 007: /* LDUMIN */ - fn = tcg_gen_atomic_fetch_umin_i64; - break; - case 010: /* SWP */ - fn = tcg_gen_atomic_xchg_i64; - break; - case 014: /* LDAPR, LDAPRH, LDAPRB */ - if (!dc_isar_feature(aa64_rcpc_8_3, s) || - rs != 31 || a != 1 || r != 0) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; + if (extract32(a->opt, 1, 1) == 0) { + return false; } - if (rn == 31) { - gen_check_sp_alignment(s); + if (!fp_access_check(s)) { + return true; } - mop = check_atomic_align(s, rn, mop); - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop); + memop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); + do_fp_st(s, a->rt, clean_addr, memop); + return true; +} - if (o3_opc == 014) { - /* - * LDAPR* are a special case because they are a simple load, not a - * fetch-and-do-something op. - * The architectural consistency requirements here are weaker than - * full load-acquire (we only need "load-acquire processor consistent"), - * but we choose to implement them as full LDAQ. - */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false, - true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - return; - } - tcg_rs = read_cpu_reg(s, rs, true); - tcg_rt = cpu_reg(s, rt); +static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, + int sign, bool invert) +{ + MemOp mop = a->sz | sign; + TCGv_i64 clean_addr, tcg_rs, tcg_rt; - if (o3_opc == 1) { /* LDCLR */ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, mop); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + tcg_rs = read_cpu_reg(s, a->rs, true); + tcg_rt = cpu_reg(s, a->rt); + if (invert) { tcg_gen_not_i64(tcg_rs, tcg_rs); } - - /* The tcg atomic primitives are all full barriers. Therefore we + /* + * The tcg atomic primitives are all full barriers. Therefore we * can ignore the Acquire and Release bits of this instruction. */ fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); - if ((mop & MO_SIGN) && size != MO_64) { - tcg_gen_ext32u_i64(tcg_rt, tcg_rt); + if (mop & MO_SIGN) { + switch (a->sz) { + case MO_8: + tcg_gen_ext8u_i64(tcg_rt, tcg_rt); + break; + case MO_16: + tcg_gen_ext16u_i64(tcg_rt, tcg_rt); + break; + case MO_32: + tcg_gen_ext32u_i64(tcg_rt, tcg_rt); + break; + case MO_64: + break; + default: + g_assert_not_reached(); + } } + return true; } -/* - * PAC memory operations - * - * 31 30 27 26 24 22 21 12 11 10 5 0 - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ - * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ - * - * Rt: the result register - * Rn: base address or SP - * V: vector flag (always 0 as of v8.3) - * M: clear for key DA, set for key DB - * W: pre-indexing flag - * S: sign for imm9. - */ -static void disas_ldst_pac(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) +TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) +TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) +TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) +TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) +TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) +TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) +TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) +TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) + +static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) +{ + bool iss_sf = ldst_iss_sf(a->sz, false, false); + TCGv_i64 clean_addr; + MemOp mop; + + if (!dc_isar_feature(aa64_atomics, s) || + !dc_isar_feature(aa64_rcpc_8_3, s)) { + return false; + } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + /* + * LDAPR* are a special case because they are a simple load, not a + * fetch-and-do-something op. + * The architectural consistency requirements here are weaker than + * full load-acquire (we only need "load-acquire processor consistent"), + * but we choose to implement them as full LDAQ. + */ + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, + true, a->rt, iss_sf, true); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; +} + +static bool trans_LDRA(DisasContext *s, arg_LDRA *a) { - int rn = extract32(insn, 5, 5); - bool is_wback = extract32(insn, 11, 1); - bool use_key_a = !extract32(insn, 23, 1); - int offset; TCGv_i64 clean_addr, dirty_addr, tcg_rt; MemOp memop; - if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { - unallocated_encoding(s); - return; + /* Load with pointer authentication */ + if (!dc_isar_feature(aa64_pauth, s)) { + return false; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); if (s->pauth_active) { - if (use_key_a) { + if (!a->m) { gen_helper_autda(dirty_addr, cpu_env, dirty_addr, tcg_constant_i64(0)); } else { @@ -3595,241 +3345,194 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, } } - /* Form the 10-bit signed, scaled offset. */ - offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); - offset = sextract32(offset << size, 0, 10 + size); - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); - memop = finalize_memop(s, size); + memop = finalize_memop(s, MO_64); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ clean_addr = gen_mte_check1(s, dirty_addr, false, - is_wback || rn != 31, memop); + a->w || a->rn != 31, memop); - tcg_rt = cpu_reg(s, rt); + tcg_rt = cpu_reg(s, a->rt); do_gpr_ld(s, tcg_rt, clean_addr, memop, - /* extend */ false, /* iss_valid */ !is_wback, - /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); + /* extend */ false, /* iss_valid */ !a->w, + /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); - if (is_wback) { - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); + if (a->w) { + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); } + return true; } -/* - * LDAPR/STLR (unscaled immediate) - * - * 31 30 24 22 21 12 10 5 0 - * +------+-------------+-----+---+--------+-----+----+-----+ - * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | - * +------+-------------+-----+---+--------+-----+----+-----+ - * - * Rt: source or destination register - * Rn: base register - * imm9: unscaled immediate offset - * opc: 00: STLUR*, 01/10/11: various LDAPUR* - * size: size of load/store - */ -static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) +static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int offset = sextract32(insn, 12, 9); - int opc = extract32(insn, 22, 2); - int size = extract32(insn, 30, 2); TCGv_i64 clean_addr, dirty_addr; - bool is_store = false; - bool extend = false; - bool iss_sf; - MemOp mop = size; + MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); if (!dc_isar_feature(aa64_rcpc_8_4, s)) { - unallocated_encoding(s); - return; - } - - switch (opc) { - case 0: /* STLURB */ - is_store = true; - break; - case 1: /* LDAPUR* */ - break; - case 2: /* LDAPURS* 64-bit variant */ - if (size == 3) { - unallocated_encoding(s); - return; - } - mop |= MO_SIGN; - break; - case 3: /* LDAPURS* 32-bit variant */ - if (size > 1) { - unallocated_encoding(s); - return; - } - mop |= MO_SIGN; - extend = true; /* zero-extend 32->64 after signed load */ - break; - default: - g_assert_not_reached(); + return false; } - iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); - - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - mop = check_ordered_align(s, rn, offset, is_store, mop); - - dirty_addr = read_cpu_reg_sp(s, rn, 1); - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + mop = check_ordered_align(s, a->rn, a->imm, false, mop); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); clean_addr = clean_data_tbi(s, dirty_addr); - if (is_store) { - /* Store-Release semantics */ - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); - } else { - /* - * Load-AcquirePC semantics; we implement as the slightly more - * restrictive Load-Acquire. - */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, - extend, true, rt, iss_sf, true); - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } + /* + * Load-AcquirePC semantics; we implement as the slightly more + * restrictive Load-Acquire. + */ + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, + a->rt, iss_sf, true); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; } -/* Load/store register (all forms) */ -static void disas_ldst_reg(DisasContext *s, uint32_t insn) +static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) { - int rt = extract32(insn, 0, 5); - int opc = extract32(insn, 22, 2); - bool is_vector = extract32(insn, 26, 1); - int size = extract32(insn, 30, 2); + TCGv_i64 clean_addr, dirty_addr; + MemOp mop = a->sz; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); - switch (extract32(insn, 24, 2)) { - case 0: - if (extract32(insn, 21, 1) == 0) { - /* Load/store register (unscaled immediate) - * Load/store immediate pre/post-indexed - * Load/store register unprivileged - */ - disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); - return; - } - switch (extract32(insn, 10, 2)) { - case 0: - disas_ldst_atomic(s, insn, size, rt, is_vector); - return; - case 2: - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); - return; - default: - disas_ldst_pac(s, insn, size, rt, is_vector); - return; - } - break; - case 1: - disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); - return; + if (!dc_isar_feature(aa64_rcpc_8_4, s)) { + return false; } - unallocated_encoding(s); + + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + mop = check_ordered_align(s, a->rn, a->imm, true, mop); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); + clean_addr = clean_data_tbi(s, dirty_addr); + + /* Store-Release semantics */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); + return true; } -/* AdvSIMD load/store multiple structures - * - * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 - * +---+---+---------------+---+-------------+--------+------+------+------+ - * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | - * +---+---+---------------+---+-------------+--------+------+------+------+ - * - * AdvSIMD load/store multiple structures (post-indexed) - * - * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---------------+---+---+---------+--------+------+------+------+ - * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | - * +---+---+---------------+---+---+---------+--------+------+------+------+ - * - * Rt: first (or only) SIMD&FP register to be transferred - * Rn: base address or SP - * Rm (post-index only): post-index register (when !31) or size dependent #imm - */ -static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) +static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 10, 2); - int opcode = extract32(insn, 12, 4); - bool is_store = !extract32(insn, 22, 1); - bool is_postidx = extract32(insn, 23, 1); - bool is_q = extract32(insn, 30, 1); TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian, align, mop; int total; /* total bytes */ int elements; /* elements per vector */ - int rpt; /* num iterations */ - int selem; /* structure elements */ int r; + int size = a->sz; - if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { - unallocated_encoding(s); - return; + if (!a->p && a->rm != 0) { + /* For non-postindexed accesses the Rm field must be 0 */ + return false; + } + if (size == 3 && !a->q && a->selem != 1) { + return false; + } + if (!fp_access_check(s)) { + return true; } - if (!is_postidx && rm != 0) { - unallocated_encoding(s); - return; + if (a->rn == 31) { + gen_check_sp_alignment(s); } - /* From the shared decode logic */ - switch (opcode) { - case 0x0: - rpt = 1; - selem = 4; - break; - case 0x2: - rpt = 4; - selem = 1; - break; - case 0x4: - rpt = 1; - selem = 3; - break; - case 0x6: - rpt = 3; - selem = 1; - break; - case 0x7: - rpt = 1; - selem = 1; - break; - case 0x8: - rpt = 1; - selem = 2; - break; - case 0xa: - rpt = 2; - selem = 1; - break; - default: - unallocated_encoding(s); - return; + /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; + if (size == 0) { + endian = MO_LE; } - if (size == 3 && !is_q && selem != 1) { - /* reserved */ - unallocated_encoding(s); - return; + total = a->rpt * a->selem * (a->q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, a->rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, + finalize_memop_asimd(s, size)); + + /* + * Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + align = MO_ALIGN; + if (a->selem == 1 && endian == MO_LE) { + align = pow2_align(size); + size = 3; } + if (!s->align_mem) { + align = 0; + } + mop = endian | size | align; + elements = (a->q ? 16 : 8) >> size; + tcg_ebytes = tcg_constant_i64(1 << size); + for (r = 0; r < a->rpt; r++) { + int e; + for (e = 0; e < elements; e++) { + int xs; + for (xs = 0; xs < a->selem; xs++) { + int tt = (a->rt + r + xs) % 32; + do_vec_ld(s, tt, e, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + } + } + + /* + * For non-quad operations, setting a slice of the low 64 bits of + * the register clears the high 64 bits (in the ARM ARM pseudocode + * this is implicit in the fact that 'rval' is a 64 bit wide + * variable). For quad operations, we might still need to zero + * the high bits of SVE. + */ + for (r = 0; r < a->rpt * a->selem; r++) { + int tt = (a->rt + r) % 32; + clear_vec_high(s, a->q, tt); + } + + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); + } + } + return true; +} + +static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) +{ + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp endian, align, mop; + + int total; /* total bytes */ + int elements; /* elements per vector */ + int r; + int size = a->sz; + + if (!a->p && a->rm != 0) { + /* For non-postindexed accesses the Rm field must be 0 */ + return false; + } + if (size == 3 && !a->q && a->selem != 1) { + return false; + } if (!fp_access_check(s)) { - return; + return true; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } @@ -3839,22 +3542,22 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - total = rpt * selem * (is_q ? 16 : 8); - tcg_rn = cpu_reg_sp(s, rn); + total = a->rpt * a->selem * (a->q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, a->rn); /* * Issue the MTE check vs the logical repeat count, before we * promote consecutive little-endian elements below. */ - clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, - total, finalize_memop(s, size)); + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, + finalize_memop_asimd(s, size)); /* * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ align = MO_ALIGN; - if (selem == 1 && endian == MO_LE) { + if (a->selem == 1 && endian == MO_LE) { align = pow2_align(size); size = 3; } @@ -3863,359 +3566,333 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) } mop = endian | size | align; - elements = (is_q ? 16 : 8) >> size; + elements = (a->q ? 16 : 8) >> size; tcg_ebytes = tcg_constant_i64(1 << size); - for (r = 0; r < rpt; r++) { + for (r = 0; r < a->rpt; r++) { int e; for (e = 0; e < elements; e++) { int xs; - for (xs = 0; xs < selem; xs++) { - int tt = (rt + r + xs) % 32; - if (is_store) { - do_vec_st(s, tt, e, clean_addr, mop); - } else { - do_vec_ld(s, tt, e, clean_addr, mop); - } + for (xs = 0; xs < a->selem; xs++) { + int tt = (a->rt + r + xs) % 32; + do_vec_st(s, tt, e, clean_addr, mop); tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); } } } - if (!is_store) { - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. - */ - for (r = 0; r < rpt * selem; r++) { - int tt = (rt + r) % 32; - clear_vec_high(s, is_q, tt); + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; +} + +static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) +{ + int xs, total, rt; + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; + + if (!a->p && a->rm != 0) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); - if (is_postidx) { - if (rm == 31) { + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, + total, mop); + + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + do_vec_st(s, rt, a->index, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + + if (a->p) { + if (a->rm == 31) { tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; } -/* AdvSIMD load/store single structure - * - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * - * AdvSIMD load/store single structure (post-indexed) - * - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * - * Rt: first (or only) SIMD&FP register to be transferred - * Rn: base address or SP - * Rm (post-index only): post-index register (when !31) or size dependent #imm - * index = encoded in Q:S:size dependent on size - * - * lane_size = encoded in R, opc - * transfer width = encoded in opc, S, size - */ -static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) +static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 10, 2); - int S = extract32(insn, 12, 1); - int opc = extract32(insn, 13, 3); - int R = extract32(insn, 21, 1); - int is_load = extract32(insn, 22, 1); - int is_postidx = extract32(insn, 23, 1); - int is_q = extract32(insn, 30, 1); - - int scale = extract32(opc, 1, 2); - int selem = (extract32(opc, 0, 1) << 1 | R) + 1; - bool replicate = false; - int index = is_q << 3 | S << 2 | size; - int xs, total; + int xs, total, rt; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp mop; - if (extract32(insn, 31, 1)) { - unallocated_encoding(s); - return; + if (!a->p && a->rm != 0) { + return false; } - if (!is_postidx && rm != 0) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; } - switch (scale) { - case 3: - if (!is_load || S) { - unallocated_encoding(s); - return; - } - scale = size; - replicate = true; - break; - case 0: - break; - case 1: - if (extract32(size, 0, 1)) { - unallocated_encoding(s); - return; - } - index >>= 1; - break; - case 2: - if (extract32(size, 1, 1)) { - unallocated_encoding(s); - return; - } - if (!extract32(size, 0, 1)) { - index >>= 2; + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); + + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, + total, mop); + + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + do_vec_ld(s, rt, a->index, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - if (S) { - unallocated_encoding(s); - return; - } - index >>= 3; - scale = 3; + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } - break; - default: - g_assert_not_reached(); } + return true; +} +static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) +{ + int xs, total, rt; + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; + + if (!a->p && a->rm != 0) { + return false; + } if (!fp_access_check(s)) { - return; + return true; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - total = selem << scale; - tcg_rn = cpu_reg_sp(s, rn); - - mop = finalize_memop(s, scale); + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); - clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, mop); - tcg_ebytes = tcg_constant_i64(1 << scale); - for (xs = 0; xs < selem; xs++) { - if (replicate) { - /* Load and replicate to all elements */ - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + /* Load and replicate to all elements */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); - tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), - (is_q + 1) * 8, vec_full_reg_size(s), - tcg_tmp); - } else { - /* Load/store one element per register */ - if (is_load) { - do_vec_ld(s, rt, index, clean_addr, mop); - } else { - do_vec_st(s, rt, index, clean_addr, mop); - } - } + tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); + tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), + (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); - rt = (rt + 1) % 32; } - if (is_postidx) { - if (rm == 31) { + if (a->p) { + if (a->rm == 31) { tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; } -/* - * Load/Store memory tags - * - * 31 30 29 24 22 21 12 10 5 0 - * +-----+-------------+-----+---+------+-----+------+------+ - * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | - * +-----+-------------+-----+---+------+-----+------+------+ - */ -static void disas_ldst_tag(DisasContext *s, uint32_t insn) +static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; - int op2 = extract32(insn, 10, 2); - int op1 = extract32(insn, 22, 2); - bool is_load = false, is_pair = false, is_zero = false, is_mult = false; - int index = 0; TCGv_i64 addr, clean_addr, tcg_rt; + int size = 4 << s->dcz_blocksize; - /* We checked insn bits [29:24,21] in the caller. */ - if (extract32(insn, 30, 2) != 3) { - goto do_unallocated; + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; + } + + if (a->rn == 31) { + gen_check_sp_alignment(s); } + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata) { + gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); + } /* - * @index is a tri-state variable which has 3 states: - * < 0 : post-index, writeback - * = 0 : signed offset - * > 0 : pre-index, writeback + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. */ - switch (op1) { - case 0: - if (op2 != 0) { - /* STG */ - index = op2 - 2; - } else { - /* STZGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = is_zero = true; - } - break; - case 1: - if (op2 != 0) { - /* STZG */ - is_zero = true; - index = op2 - 2; - } else { - /* LDG */ - is_load = true; - } - break; - case 2: - if (op2 != 0) { - /* ST2G */ - is_pair = true; - index = op2 - 2; - } else { - /* STGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = true; - } - break; - case 3: - if (op2 != 0) { - /* STZ2G */ - is_pair = is_zero = true; - index = op2 - 2; - } else { - /* LDGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = is_load = true; - } - break; + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_helper_dc_zva(cpu_env, clean_addr); + return true; +} - default: - do_unallocated: - unallocated_encoding(s); - return; +static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; + + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; } - if (is_mult - ? !dc_isar_feature(aa64_mte, s) - : !dc_isar_feature(aa64_mte_insn_reg, s)) { - goto do_unallocated; + if (a->rn == 31) { + gen_check_sp_alignment(s); } - if (rn == 31) { + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata) { + gen_helper_stgm(cpu_env, addr, tcg_rt); + } else { + MMUAccessType acc = MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + } + return true; +} + +static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; + + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; + } + + if (a->rn == 31) { gen_check_sp_alignment(s); } - addr = read_cpu_reg_sp(s, rn, true); - if (index >= 0) { - /* pre-index or signed offset */ - tcg_gen_addi_i64(addr, addr, offset); + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata) { + gen_helper_ldgm(tcg_rt, cpu_env, addr); + } else { + MMUAccessType acc = MMU_DATA_LOAD; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_rt, 0); } + return true; +} - if (is_mult) { - tcg_rt = cpu_reg(s, rt); +static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; - if (is_zero) { - int size = 4 << s->dcz_blocksize; + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + return false; + } - if (s->ata) { - gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); - } - /* - * The non-tags portion of STZGM is mostly like DC_ZVA, - * except the alignment happens before the access. - */ - clean_addr = clean_data_tbi(s, addr); - tcg_gen_andi_i64(clean_addr, clean_addr, -size); - gen_helper_dc_zva(cpu_env, clean_addr); - } else if (s->ata) { - if (is_load) { - gen_helper_ldgm(tcg_rt, cpu_env, addr); - } else { - gen_helper_stgm(cpu_env, addr, tcg_rt); - } - } else { - MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; - int size = 4 << GMID_EL1_BS; + if (a->rn == 31) { + gen_check_sp_alignment(s); + } - clean_addr = clean_data_tbi(s, addr); - tcg_gen_andi_i64(clean_addr, clean_addr, -size); - gen_probe_access(s, clean_addr, acc, size); + addr = read_cpu_reg_sp(s, a->rn, true); + if (!a->p) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, a->imm); + } - if (is_load) { - /* The result tags are zeros. */ - tcg_gen_movi_i64(tcg_rt, 0); - } + tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, a->rt); + if (s->ata) { + gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); + } else { + /* + * Tag access disabled: we must check for aborts on the load + * load from [rn+offset], and then insert a 0 tag into rt. + */ + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_rt, tcg_rt); + } + + if (a->w) { + /* pre-index or post-index */ + if (a->p) { + /* post-index */ + tcg_gen_addi_i64(addr, addr, a->imm); } - return; + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); } + return true; +} - if (is_load) { - tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); - tcg_rt = cpu_reg(s, rt); - if (s->ata) { - gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); +static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) +{ + TCGv_i64 addr, tcg_rt; + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, a->rn, true); + if (!a->p) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, a->imm); + } + tcg_rt = cpu_reg_sp(s, a->rt); + if (!s->ata) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(cpu_env, addr); } else { - clean_addr = clean_data_tbi(s, addr); - gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); - gen_address_with_allocation_tag0(tcg_rt, addr); + gen_helper_stg_stub(cpu_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); + } else { + gen_helper_stg_parallel(cpu_env, addr, tcg_rt); } } else { - tcg_rt = cpu_reg_sp(s, rt); - if (!s->ata) { - /* - * For STG and ST2G, we need to check alignment and probe memory. - * TODO: For STZG and STZ2G, we could rely on the stores below, - * at least for system mode; user-only won't enforce alignment. - */ - if (is_pair) { - gen_helper_st2g_stub(cpu_env, addr); - } else { - gen_helper_stg_stub(cpu_env, addr); - } - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (is_pair) { - gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); - } else { - gen_helper_stg_parallel(cpu_env, addr, tcg_rt); - } + if (is_pair) { + gen_helper_st2g(cpu_env, addr, tcg_rt); } else { - if (is_pair) { - gen_helper_st2g(cpu_env, addr, tcg_rt); - } else { - gen_helper_stg(cpu_env, addr, tcg_rt); - } + gen_helper_stg(cpu_env, addr, tcg_rt); } } @@ -4236,54 +3913,21 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) } } - if (index != 0) { + if (a->w) { /* pre-index or post-index */ - if (index < 0) { + if (a->p) { /* post-index */ - tcg_gen_addi_i64(addr, addr, offset); + tcg_gen_addi_i64(addr, addr, a->imm); } - tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); } + return true; } -/* Loads and stores */ -static void disas_ldst(DisasContext *s, uint32_t insn) -{ - switch (extract32(insn, 24, 6)) { - case 0x08: /* Load/store exclusive */ - disas_ldst_excl(s, insn); - break; - case 0x18: case 0x1c: /* Load register (literal) */ - disas_ld_lit(s, insn); - break; - case 0x28: case 0x29: - case 0x2c: case 0x2d: /* Load/store pair (all forms) */ - disas_ldst_pair(s, insn); - break; - case 0x38: case 0x39: - case 0x3c: case 0x3d: /* Load/store register (all forms) */ - disas_ldst_reg(s, insn); - break; - case 0x0c: /* AdvSIMD load/store multiple structures */ - disas_ldst_multiple_struct(s, insn); - break; - case 0x0d: /* AdvSIMD load/store single structure */ - disas_ldst_single_struct(s, insn); - break; - case 0x19: - if (extract32(insn, 21, 1) != 0) { - disas_ldst_tag(s, insn); - } else if (extract32(insn, 10, 2) == 0) { - disas_ldst_ldapr_stlr(s, insn); - } else { - unallocated_encoding(s); - } - break; - default: - unallocated_encoding(s); - break; - } -} +TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) +TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) +TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) +TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); @@ -14179,15 +13823,6 @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype) static void disas_a64_legacy(DisasContext *s, uint32_t insn) { switch (extract32(insn, 25, 4)) { - case 0xa: case 0xb: /* Branch, exception generation and system insns */ - disas_b_exc_sys(s, insn); - break; - case 0x4: - case 0x6: - case 0xc: - case 0xe: /* Loads and stores */ - disas_ldst(s, insn); - break; case 0x5: case 0xd: /* Data processing - register */ disas_data_proc_reg(s, insn); |