diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-10-20 21:11:35 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-10-20 21:11:35 +0100 |
commit | ac793156f650ae2d77834932d72224175ee69086 (patch) | |
tree | 573940f3a8b5ad0e08ea71d2f210f776b7620909 | |
parent | 4c41341af76cfc85b5a6c0f87de4838672ab9f89 (diff) | |
parent | 8128c8e8cc9489a8387c74075974f86dc0222e7f (diff) | |
download | qemu-ac793156f650ae2d77834932d72224175ee69086.zip qemu-ac793156f650ae2d77834932d72224175ee69086.tar.gz qemu-ac793156f650ae2d77834932d72224175ee69086.tar.bz2 |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201020-1' into staging
target-arm queue:
* Fix AArch32 SMLAD incorrect setting of Q bit
* AArch32 VCVT fixed-point to float is always round-to-nearest
* strongarm: Fix 'time to transmit a char' unit comment
* Restrict APEI tables generation to the 'virt' machine
* bcm2835: minor code cleanups
* bcm2835: connect all IRQs from SYS_timer device
* correctly flush TLBs when TBI is enabled
* tests/qtest: Add npcm7xx timer test
* loads-stores.rst: add footnote that clarifies GETPC usage
* Fix reported EL for mte_check_fail
* Ignore HCR_EL2.ATA when {E2H,TGE} != 11
* microbit_i2c: Fix coredump when dump-vmstate
* nseries: Fix loading kernel image on n8x0 machines
* Implement v8.1M low-overhead-loops
# gpg: Signature made Tue 20 Oct 2020 21:10:35 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20201020-1: (29 commits)
target/arm: Implement FPSCR.LTPSIZE for M-profile LOB extension
target/arm: Allow M-profile CPUs with FP16 to set FPSCR.FP16
target/arm: Fix has_vfp/has_neon ID reg squashing for M-profile
target/arm: Implement v8.1M low-overhead-loop instructions
target/arm: Implement v8.1M branch-future insns (as NOPs)
target/arm: Don't allow BLX imm for M-profile
target/arm: Make the t32 insn[25:23]=111 group non-overlapping
target/arm: Implement v8.1M conditional-select insns
target/arm: Implement v8.1M NOCP handling
decodetree: Fix codegen for non-overlapping group inside overlapping group
hw/arm/nseries: Fix loading kernel image on n8x0 machines
microbit_i2c: Fix coredump when dump-vmstate
target/arm: Ignore HCR_EL2.ATA when {E2H,TGE} != 11
target/arm: Fix reported EL for mte_check_fail
target/arm: Remove redundant mmu_idx lookup
hw/intc/bcm2836_control: Use IRQ definitions instead of magic numbers
hw/intc/bcm2835_ic: Trace GPU/CPU IRQ handlers
loads-stores.rst: add footnote that clarifies GETPC usage
tests/qtest: Add npcm7xx timer test
target/arm: Use tlb_flush_page_bits_by_mmuidx*
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
29 files changed, 1405 insertions, 148 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 2bbbb3a..42ab79c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -409,12 +409,21 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu) tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); } +static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, + target_ulong page, target_ulong mask) +{ + page &= mask; + mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; + + return (page == (tlb_entry->addr_read & mask) || + page == (tlb_addr_write(tlb_entry) & mask) || + page == (tlb_entry->addr_code & mask)); +} + static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { - return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_addr_write(tlb_entry), page) || - tlb_hit_page(tlb_entry->addr_code, page); + return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); } /** @@ -427,31 +436,45 @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) } /* Called with tlb_c.lock held */ -static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, - target_ulong page) +static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, + target_ulong page, + target_ulong mask) { - if (tlb_hit_page_anyprot(tlb_entry, page)) { + if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); return true; } return false; } +static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, + target_ulong page) +{ + return tlb_flush_entry_mask_locked(tlb_entry, page, -1); +} + /* Called with tlb_c.lock held */ -static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, - target_ulong page) +static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, + target_ulong page, + target_ulong mask) { CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; int k; assert_cpu_is_self(env_cpu(env)); for (k = 0; k < CPU_VTLB_SIZE; k++) { - if (tlb_flush_entry_locked(&d->vtable[k], page)) { + if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { tlb_n_used_entries_dec(env, mmu_idx); } } } +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, + target_ulong page) +{ + tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); +} + static void tlb_flush_page_locked(CPUArchState *env, int midx, target_ulong page) { @@ -666,6 +689,240 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } +static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, + target_ulong page, unsigned bits) +{ + CPUTLBDesc *d = &env_tlb(env)->d[midx]; + CPUTLBDescFast *f = &env_tlb(env)->f[midx]; + target_ulong mask = MAKE_64BIT_MASK(0, bits); + + /* + * If @bits is smaller than the tlb size, there may be multiple entries + * within the TLB; otherwise all addresses that match under @mask hit + * the same TLB entry. + * + * TODO: Perhaps allow bits to be a few bits less than the size. + * For now, just flush the entire TLB. + */ + if (mask < f->mask) { + tlb_debug("forcing full flush midx %d (" + TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + midx, page, mask); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); + return; + } + + /* Check if we need to flush due to large pages. */ + if ((page & d->large_page_mask) == d->large_page_addr) { + tlb_debug("forcing full flush midx %d (" + TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + midx, d->large_page_addr, d->large_page_mask); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); + return; + } + + if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { + tlb_n_used_entries_dec(env, midx); + } + tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); +} + +typedef struct { + target_ulong addr; + uint16_t idxmap; + uint16_t bits; +} TLBFlushPageBitsByMMUIdxData; + +static void +tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, + TLBFlushPageBitsByMMUIdxData d) +{ + CPUArchState *env = cpu->env_ptr; + int mmu_idx; + + assert_cpu_is_self(cpu); + + tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", + d.addr, d.bits, d.idxmap); + + qemu_spin_lock(&env_tlb(env)->c.lock); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + if ((d.idxmap >> mmu_idx) & 1) { + tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); + } + } + qemu_spin_unlock(&env_tlb(env)->c.lock); + + tb_flush_jmp_cache(cpu, d.addr); +} + +static bool encode_pbm_to_runon(run_on_cpu_data *out, + TLBFlushPageBitsByMMUIdxData d) +{ + /* We need 6 bits to hold to hold @bits up to 63. */ + if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { + *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); + return true; + } + return false; +} + +static TLBFlushPageBitsByMMUIdxData +decode_runon_to_pbm(run_on_cpu_data data) +{ + target_ulong addr_map_bits = (target_ulong) data.target_ptr; + return (TLBFlushPageBitsByMMUIdxData){ + .addr = addr_map_bits & TARGET_PAGE_MASK, + .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, + .bits = addr_map_bits & 0x3f + }; +} + +static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data runon) +{ + tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); +} + +static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, + run_on_cpu_data data) +{ + TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; + tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); + g_free(d); +} + +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx(cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx(cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (qemu_cpu_is_self(cpu)) { + tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); + } else if (encode_pbm_to_runon(&runon, d)) { + async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + } else { + TLBFlushPageBitsByMMUIdxData *p + = g_new(TLBFlushPageBitsByMMUIdxData, 1); + + /* Otherwise allocate a structure, freed by the worker. */ + *p = d; + async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (encode_pbm_to_runon(&runon, d)) { + flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + } else { + CPUState *dst_cpu; + TLBFlushPageBitsByMMUIdxData *p; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_run_on_cpu(dst_cpu, + tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } + } + } + + tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (encode_pbm_to_runon(&runon, d)) { + flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, + runon); + } else { + CPUState *dst_cpu; + TLBFlushPageBitsByMMUIdxData *p; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } + } + + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } +} + /* update the TLBs so that writes to code in the virtual page 'addr' can be detected */ void tlb_protect_code(ram_addr_t ram_addr) diff --git a/default-configs/devices/arm-softmmu.mak b/default-configs/devices/arm-softmmu.mak index 9a94ebd..08a3212 100644 --- a/default-configs/devices/arm-softmmu.mak +++ b/default-configs/devices/arm-softmmu.mak @@ -43,4 +43,3 @@ CONFIG_FSL_IMX7=y CONFIG_FSL_IMX6UL=y CONFIG_SEMIHOSTING=y CONFIG_ALLWINNER_H3=y -CONFIG_ACPI_APEI=y diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index 9a944ef..59c1225 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -93,7 +93,13 @@ guest CPU state in case of a guest CPU exception. This is passed to ``cpu_restore_state()``. Therefore the value should either be 0, to indicate that the guest CPU state is already synchronized, or the result of ``GETPC()`` from the top level ``HELPER(foo)`` -function, which is a return address into the generated code. +function, which is a return address into the generated code [#gpc]_. + +.. [#gpc] Note that ``GETPC()`` should be used with great care: calling + it in other functions that are *not* the top level + ``HELPER(foo)`` will cause unexpected behavior. Instead, the + value of ``GETPC()`` should be read from the helper and passed + if needed to the functions that the helper calls. Function names follow the pattern: diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index f303c6b..7d04082 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -26,6 +26,7 @@ config ARM_VIRT select ACPI_MEMORY_HOTPLUG select ACPI_HW_REDUCED select ACPI_NVDIMM + select ACPI_APEI config CHEETAH bool diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 15c5c72..48909a4 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -171,8 +171,17 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&s->peri_mr, ST_OFFSET, sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systmr), 0)); sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 0, - qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_ARM_IRQ, - INTERRUPT_ARM_TIMER)); + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 1, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER1)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 2, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER2)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 3, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER3)); /* UART0 */ qdev_prop_set_chr(DEVICE(&s->uart0), "chardev", serial_hd(0)); diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index e48092c..76fd7fe 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -1318,6 +1318,7 @@ static void n8x0_init(MachineState *machine, g_free(sz); exit(EXIT_FAILURE); } + binfo->ram_size = machine->ram_size; memory_region_add_subregion(get_system_memory(), OMAP2_Q2_BASE, machine->ram); diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c index d7133ee..ca7c385 100644 --- a/hw/arm/strongarm.c +++ b/hw/arm/strongarm.c @@ -935,7 +935,7 @@ struct StrongARMUARTState { uint8_t rx_start; uint8_t rx_len; - uint64_t char_transmit_time; /* time to transmit a char in ticks*/ + uint64_t char_transmit_time; /* time to transmit a char in nanoseconds */ bool wait_break_end; QEMUTimer *rx_timeout_timer; QEMUTimer *tx_timer; diff --git a/hw/i2c/microbit_i2c.c b/hw/i2c/microbit_i2c.c index 8024739..e92f9f8 100644 --- a/hw/i2c/microbit_i2c.c +++ b/hw/i2c/microbit_i2c.c @@ -83,6 +83,7 @@ static const VMStateDescription microbit_i2c_vmstate = { .fields = (VMStateField[]) { VMSTATE_UINT32_ARRAY(regs, MicrobitI2CState, MICROBIT_I2C_NREGS), VMSTATE_UINT32(read_idx, MicrobitI2CState), + VMSTATE_END_OF_LIST() }, }; diff --git a/hw/intc/bcm2835_ic.c b/hw/intc/bcm2835_ic.c index 53ab8f5..9000d99 100644 --- a/hw/intc/bcm2835_ic.c +++ b/hw/intc/bcm2835_ic.c @@ -18,6 +18,7 @@ #include "migration/vmstate.h" #include "qemu/log.h" #include "qemu/module.h" +#include "trace.h" #define GPU_IRQS 64 #define ARM_IRQS 8 @@ -51,7 +52,6 @@ static void bcm2835_ic_update(BCM2835ICState *s) set = (s->gpu_irq_level & s->gpu_irq_enable) || (s->arm_irq_level & s->arm_irq_enable); qemu_set_irq(s->irq, set); - } static void bcm2835_ic_set_gpu_irq(void *opaque, int irq, int level) @@ -59,6 +59,7 @@ static void bcm2835_ic_set_gpu_irq(void *opaque, int irq, int level) BCM2835ICState *s = opaque; assert(irq >= 0 && irq < 64); + trace_bcm2835_ic_set_gpu_irq(irq, level); s->gpu_irq_level = deposit64(s->gpu_irq_level, irq, 1, level != 0); bcm2835_ic_update(s); } @@ -68,6 +69,7 @@ static void bcm2835_ic_set_arm_irq(void *opaque, int irq, int level) BCM2835ICState *s = opaque; assert(irq >= 0 && irq < 8); + trace_bcm2835_ic_set_cpu_irq(irq, level); s->arm_irq_level = deposit32(s->arm_irq_level, irq, 1, level != 0); bcm2835_ic_update(s); } diff --git a/hw/intc/bcm2836_control.c b/hw/intc/bcm2836_control.c index 53dba00..2ead76f 100644 --- a/hw/intc/bcm2836_control.c +++ b/hw/intc/bcm2836_control.c @@ -157,22 +157,22 @@ static void bcm2836_control_set_local_irq(void *opaque, int core, int local_irq, static void bcm2836_control_set_local_irq0(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 0, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTPSIRQ, level); } static void bcm2836_control_set_local_irq1(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 1, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTPNSIRQ, level); } static void bcm2836_control_set_local_irq2(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 2, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTHPIRQ, level); } static void bcm2836_control_set_local_irq3(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 3, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTVIRQ, level); } static void bcm2836_control_set_gpu_irq(void *opaque, int irq, int level) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 527c3f7..22782b3 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -199,3 +199,7 @@ nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg wri heathrow_write(uint64_t addr, unsigned int n, uint64_t value) "0x%"PRIx64" %u: 0x%"PRIx64 heathrow_read(uint64_t addr, unsigned int n, uint64_t value) "0x%"PRIx64" %u: 0x%"PRIx64 heathrow_set_irq(int num, int level) "set_irq: num=0x%02x level=%d" + +# bcm2835_ic.c +bcm2835_ic_set_gpu_irq(int irq, int level) "GPU irq #%d level %d" +bcm2835_ic_set_cpu_irq(int irq, int level) "CPU irq #%d level %d" diff --git a/hw/timer/bcm2835_systmr.c b/hw/timer/bcm2835_systmr.c index 3387a62..67669a5 100644 --- a/hw/timer/bcm2835_systmr.c +++ b/hw/timer/bcm2835_systmr.c @@ -28,20 +28,13 @@ REG32(COMPARE1, 0x10) REG32(COMPARE2, 0x14) REG32(COMPARE3, 0x18) -static void bcm2835_systmr_update_irq(BCM2835SystemTimerState *s) +static void bcm2835_systmr_timer_expire(void *opaque) { - bool enable = !!s->reg.status; + BCM2835SystemTimerCompare *tmr = opaque; - trace_bcm2835_systmr_irq(enable); - qemu_set_irq(s->irq, enable); -} - -static void bcm2835_systmr_update_compare(BCM2835SystemTimerState *s, - unsigned timer_index) -{ - /* TODO fow now, since neither Linux nor U-boot use these timers. */ - qemu_log_mask(LOG_UNIMP, "COMPARE register %u not implemented\n", - timer_index); + trace_bcm2835_systmr_timer_expired(tmr->id); + tmr->state->reg.ctrl_status |= 1 << tmr->id; + qemu_set_irq(tmr->irq, 1); } static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, @@ -52,7 +45,7 @@ static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, switch (offset) { case A_CTRL_STATUS: - r = s->reg.status; + r = s->reg.ctrl_status; break; case A_COMPARE0 ... A_COMPARE3: r = s->reg.compare[(offset - A_COMPARE0) >> 2]; @@ -75,19 +68,33 @@ static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, } static void bcm2835_systmr_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) + uint64_t value64, unsigned size) { BCM2835SystemTimerState *s = BCM2835_SYSTIMER(opaque); + int index; + uint32_t value = value64; + uint32_t triggers_delay_us; + uint64_t now; trace_bcm2835_systmr_write(offset, value); switch (offset) { case A_CTRL_STATUS: - s->reg.status &= ~value; /* Ack */ - bcm2835_systmr_update_irq(s); + s->reg.ctrl_status &= ~value; /* Ack */ + for (index = 0; index < ARRAY_SIZE(s->tmr); index++) { + if (extract32(value, index, 1)) { + trace_bcm2835_systmr_irq_ack(index); + qemu_set_irq(s->tmr[index].irq, 0); + } + } break; case A_COMPARE0 ... A_COMPARE3: - s->reg.compare[(offset - A_COMPARE0) >> 2] = value; - bcm2835_systmr_update_compare(s, (offset - A_COMPARE0) >> 2); + index = (offset - A_COMPARE0) >> 2; + s->reg.compare[index] = value; + now = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL); + /* Compare lower 32-bits of the free-running counter. */ + triggers_delay_us = value - now; + trace_bcm2835_systmr_run(index, triggers_delay_us); + timer_mod(&s->tmr[index].timer, now + triggers_delay_us); break; case A_COUNTER_LOW: case A_COUNTER_HIGH: @@ -125,7 +132,14 @@ static void bcm2835_systmr_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->iomem, OBJECT(dev), &bcm2835_systmr_ops, s, "bcm2835-sys-timer", 0x20); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); - sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); + + for (size_t i = 0; i < ARRAY_SIZE(s->tmr); i++) { + s->tmr[i].id = i; + s->tmr[i].state = s; + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->tmr[i].irq); + timer_init_us(&s->tmr[i].timer, QEMU_CLOCK_VIRTUAL, + bcm2835_systmr_timer_expire, &s->tmr[i]); + } } static const VMStateDescription bcm2835_systmr_vmstate = { @@ -133,8 +147,9 @@ static const VMStateDescription bcm2835_systmr_vmstate = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32(reg.status, BCM2835SystemTimerState), - VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, 4), + VMSTATE_UINT32(reg.ctrl_status, BCM2835SystemTimerState), + VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, + BCM2835_SYSTIMER_COUNT), VMSTATE_END_OF_LIST() } }; diff --git a/hw/timer/trace-events b/hw/timer/trace-events index b996d99..7a4326d 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -77,9 +77,11 @@ nrf51_timer_write(uint8_t timer_id, uint64_t addr, uint32_t value, unsigned size nrf51_timer_set_count(uint8_t timer_id, uint8_t counter_id, uint32_t value) "timer %u counter %u count 0x%" PRIx32 # bcm2835_systmr.c -bcm2835_systmr_irq(bool enable) "timer irq state %u" +bcm2835_systmr_timer_expired(unsigned id) "timer #%u expired" +bcm2835_systmr_irq_ack(unsigned id) "timer #%u acked" bcm2835_systmr_read(uint64_t offset, uint64_t data) "timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 -bcm2835_systmr_write(uint64_t offset, uint64_t data) "timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 +bcm2835_systmr_write(uint64_t offset, uint32_t data) "timer write: offset 0x%" PRIx64 " data 0x%" PRIx32 +bcm2835_systmr_run(unsigned id, uint64_t delay_us) "timer #%u expiring in %"PRIu64" us" # avr_timer16.c avr_timer16_read(uint8_t addr, uint8_t value) "timer16 read addr:%u value:%u" diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 66f9b4c..4707ac1 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -251,6 +251,25 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap); * depend on when the guests translation ends the TB. */ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); + +/** + * tlb_flush_page_bits_by_mmuidx + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of mmu indexes to flush + * @bits: number of significant bits in address + * + * Similar to tlb_flush_page_mask, but with a bitmap of indexes. + */ +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); + +/* Similarly, with broadcast and syncing. */ +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced + (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); + /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for @@ -337,6 +356,23 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap) { } +static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void +tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) +{ +} #endif /** * probe_access: diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h index 7ce8f6e..bd3097d 100644 --- a/include/hw/timer/bcm2835_systmr.h +++ b/include/hw/timer/bcm2835_systmr.h @@ -11,23 +11,32 @@ #include "hw/sysbus.h" #include "hw/irq.h" +#include "qemu/timer.h" #include "qom/object.h" #define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer" OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SystemTimerState, BCM2835_SYSTIMER) +#define BCM2835_SYSTIMER_COUNT 4 + +typedef struct { + unsigned id; + QEMUTimer timer; + qemu_irq irq; + BCM2835SystemTimerState *state; +} BCM2835SystemTimerCompare; + struct BCM2835SystemTimerState { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ MemoryRegion iomem; - qemu_irq irq; - struct { - uint32_t status; - uint32_t compare[4]; + uint32_t ctrl_status; + uint32_t compare[BCM2835_SYSTIMER_COUNT]; } reg; + BCM2835SystemTimerCompare tmr[BCM2835_SYSTIMER_COUNT]; }; #endif diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 60fd3b5e..c1bf3cf 100644 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -548,7 +548,7 @@ class Tree: output(ind, ' /* ', str_match_bits(innerbits, innermask), ' */\n') s.output_code(i + 4, extracted, innerbits, innermask) - output(ind, ' return false;\n') + output(ind, ' break;\n') output(ind, '}\n') # end Tree diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 0563198..07492e9 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -255,6 +255,15 @@ static void arm_cpu_reset(DeviceState *dev) uint8_t *rom; uint32_t vecbase; + if (cpu_isar_feature(aa32_lob, cpu)) { + /* + * LTPSIZE is constant 4 if MVE not implemented, and resets + * to an UNKNOWN value if MVE is implemented. We choose to + * always reset to 4. + */ + env->v7m.ltpsize = 4; + } + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { env->v7m.secure = true; } else { @@ -1429,17 +1438,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, FPSP, 0); u = FIELD_DP32(u, MVFR0, FPDP, 0); - u = FIELD_DP32(u, MVFR0, FPTRAP, 0); u = FIELD_DP32(u, MVFR0, FPDIVIDE, 0); u = FIELD_DP32(u, MVFR0, FPSQRT, 0); - u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); u = FIELD_DP32(u, MVFR0, FPROUND, 0); + if (!arm_feature(env, ARM_FEATURE_M)) { + u = FIELD_DP32(u, MVFR0, FPTRAP, 0); + u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); + } cpu->isar.mvfr0 = u; u = cpu->isar.mvfr1; u = FIELD_DP32(u, MVFR1, FPFTZ, 0); u = FIELD_DP32(u, MVFR1, FPDNAN, 0); u = FIELD_DP32(u, MVFR1, FPHP, 0); + if (arm_feature(env, ARM_FEATURE_M)) { + u = FIELD_DP32(u, MVFR1, FP16, 0); + } cpu->isar.mvfr1 = u; u = cpu->isar.mvfr2; @@ -1475,16 +1489,18 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = FIELD_DP32(u, ID_ISAR6, FHM, 0); cpu->isar.id_isar6 = u; - u = cpu->isar.mvfr1; - u = FIELD_DP32(u, MVFR1, SIMDLS, 0); - u = FIELD_DP32(u, MVFR1, SIMDINT, 0); - u = FIELD_DP32(u, MVFR1, SIMDSP, 0); - u = FIELD_DP32(u, MVFR1, SIMDHP, 0); - cpu->isar.mvfr1 = u; - - u = cpu->isar.mvfr2; - u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); - cpu->isar.mvfr2 = u; + if (!arm_feature(env, ARM_FEATURE_M)) { + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, SIMDLS, 0); + u = FIELD_DP32(u, MVFR1, SIMDINT, 0); + u = FIELD_DP32(u, MVFR1, SIMDSP, 0); + u = FIELD_DP32(u, MVFR1, SIMDHP, 0); + cpu->isar.mvfr1 = u; + + u = cpu->isar.mvfr2; + u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); + cpu->isar.mvfr2 = u; + } } if (!cpu->has_neon && !cpu->has_vfp) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index cfff1b5..49cd5ca 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -549,6 +549,7 @@ typedef struct CPUARMState { uint32_t fpdscr[M_REG_NUM_BANKS]; uint32_t cpacr[M_REG_NUM_BANKS]; uint32_t nsacr; + int ltpsize; } v7m; /* Information associated with an exception about to be taken: @@ -1985,6 +1986,7 @@ enum arm_features { ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ + ARM_FEATURE_V8_1M, /* M profile extras only in v8.1M and later */ }; static inline int arm_feature(CPUARMState *env, int feature) @@ -3472,6 +3474,12 @@ static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; } +static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) +{ + /* (M-profile) low-overhead loops and branch future */ + return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; +} + static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; diff --git a/target/arm/helper.c b/target/arm/helper.c index cd0779f..97bb6b8 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -50,6 +50,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, #endif static void switch_mode(CPUARMState *env, int mode); +static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); static int vfp_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) { @@ -4457,6 +4458,33 @@ static int vae1_tlbmask(CPUARMState *env) } } +/* Return 56 if TBI is enabled, 64 otherwise. */ +static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, + uint64_t addr) +{ + uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; + int tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + int select = extract64(addr, 55, 1); + + return (tbi >> select) & 1 ? 56 : 64; +} + +static int vae1_tlbbits(CPUARMState *env, uint64_t addr) +{ + ARMMMUIdx mmu_idx; + + /* Only the regime of the mmu_idx below is significant. */ + if (arm_is_secure_below_el3(env)) { + mmu_idx = ARMMMUIdx_SE10_0; + } else if ((env->cp15.hcr_el2 & (HCR_E2H | HCR_TGE)) + == (HCR_E2H | HCR_TGE)) { + mmu_idx = ARMMMUIdx_E20_0; + } else { + mmu_idx = ARMMMUIdx_E10_0; + } + return tlbbits_for_regime(env, mmu_idx, addr); +} + static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -4593,8 +4621,9 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); int mask = vae1_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); } static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4608,11 +4637,12 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); int mask = vae1_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); if (tlb_force_broadcast(env)) { - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); } } @@ -4621,9 +4651,10 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E2, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E2); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_E2, bits); } static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4631,9 +4662,10 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = tlbbits_for_regime(env, ARMMMUIdx_SE3, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_SE3); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_SE3, bits); } static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -6874,10 +6906,11 @@ static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, { int el = arm_current_el(env); - if (el < 2 && - arm_feature(env, ARM_FEATURE_EL2) && - !(arm_hcr_el2_eff(env) & HCR_ATA)) { - return CP_ACCESS_TRAP_EL2; + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + uint64_t hcr = arm_hcr_el2_eff(env); + if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { + return CP_ACCESS_TRAP_EL2; + } } if (el < 3 && arm_feature(env, ARM_FEATURE_EL3) && diff --git a/target/arm/helper.h b/target/arm/helper.h index 8defd7c..774d2cd 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -213,6 +213,19 @@ DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr) DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr) +DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, ptr) + DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr) DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index ae99725..5460678 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1252,10 +1252,11 @@ static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, && !(env->cp15.scr_el3 & SCR_ATA)) { return false; } - if (el < 2 - && arm_feature(env, ARM_FEATURE_EL2) - && !(arm_hcr_el2_eff(env) & HCR_ATA)) { - return false; + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + uint64_t hcr = arm_hcr_el2_eff(env); + if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { + return false; + } } sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); return sctlr != 0; diff --git a/target/arm/m-nocp.decode b/target/arm/m-nocp.decode index 7182d7d..28c8ac6 100644 --- a/target/arm/m-nocp.decode +++ b/target/arm/m-nocp.decode @@ -29,14 +29,16 @@ # If the coprocessor is not present or disabled then we will generate # the NOCP exception; otherwise we let the insn through to the main decode. +&nocp cp + { # Special cases which do not take an early NOCP: VLLDM and VLSTM VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 0000 0000 # TODO: VSCCLRM (new in v8.1M) is similar: #VSCCLRM 1110 1100 1-01 1111 ---- 1011 ---- ---0 - NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- - NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- - # TODO: From v8.1M onwards we will also want this range to NOCP - #NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- cp=10 + NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp + NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- &nocp + # From v8.1M onwards this range will also NOCP: + NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- &nocp cp=10 } diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 5615c67..153bd1e 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -525,14 +525,10 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, reg_el = regime_el(env, arm_mmu_idx); sctlr = env->cp15.sctlr_el[reg_el]; - switch (arm_mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - el = 0; + el = arm_current_el(env); + if (el == 0) { tcf = extract64(sctlr, 38, 2); - break; - default: - el = reg_el; + } else { tcf = extract64(sctlr, 40, 2); } @@ -563,8 +559,7 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, case 2: /* Tag check fail causes asynchronous flag set. */ - mmu_idx = arm_mmu_idx_el(env, el); - if (regime_has_2_ranges(mmu_idx)) { + if (regime_has_2_ranges(arm_mmu_idx)) { select = extract64(dirty_ptr, 55, 1); } else { select = 0; diff --git a/target/arm/t32.decode b/target/arm/t32.decode index 7069d82..8152739 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -90,6 +90,9 @@ SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi } RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi +# v8.1M CSEL and friends +CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4 + # Data-processing (register-shifted register) MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \ @@ -293,8 +296,8 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm { # Group insn[25:23] = 111, which is cond=111x for the branch below, # or unconditional, which would be illegal for the branch. - { - # Hints + [ + # Hints, and CPS { YIELD 1111 0011 1010 1111 1000 0000 0000 0001 WFE 1111 0011 1010 1111 1000 0000 0000 0010 @@ -307,20 +310,18 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm # The canonical nop ends in 0000 0000, but the whole rest # of the space is "reserved hint, behaves as nop". NOP 1111 0011 1010 1111 1000 0000 ---- ---- - } - # If imod == '00' && M == '0' then SEE "Hint instructions", above. - CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \ + # If imod == '00' && M == '0' then SEE "Hint instructions", above. + CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \ &cps + } # Miscellaneous control - [ - CLREX 1111 0011 1011 1111 1000 1111 0010 1111 - DSB 1111 0011 1011 1111 1000 1111 0100 ---- - DMB 1111 0011 1011 1111 1000 1111 0101 ---- - ISB 1111 0011 1011 1111 1000 1111 0110 ---- - SB 1111 0011 1011 1111 1000 1111 0111 0000 - ] + CLREX 1111 0011 1011 1111 1000 1111 0010 1111 + DSB 1111 0011 1011 1111 1000 1111 0100 ---- + DMB 1111 0011 1011 1111 1000 1111 0101 ---- + ISB 1111 0011 1011 1111 1000 1111 0110 ---- + SB 1111 0011 1011 1111 1000 1111 0111 0000 # Note that the v7m insn overlaps both the normal and banked insn. { @@ -348,7 +349,7 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm HVC 1111 0111 1110 .... 1000 .... .... .... \ &i imm=%imm16_16_0 UDF 1111 0111 1111 ---- 1010 ---- ---- ---- - } + ] B_cond_thumb 1111 0. cond:4 ...... 10.0 ............ &ci imm=%imm21 } @@ -647,4 +648,23 @@ MRC 1110 1110 ... 1 .... .... .... ... 1 .... @mcr B 1111 0. .......... 10.1 ............ @branch24 BL 1111 0. .......... 11.1 ............ @branch24 -BLX_i 1111 0. .......... 11.0 ............ @branch24 +{ + # BLX_i is non-M-profile only + BLX_i 1111 0. .......... 11.0 ............ @branch24 + # M-profile only: loop and branch insns + [ + # All these BF insns have boff != 0b0000; we NOP them all + BF 1111 0 boff:4 ------- 1100 - ---------- 1 # BFL + BF 1111 0 boff:4 0 ------ 1110 - ---------- 1 # BFCSEL + BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF + BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX + ] + [ + # LE and WLS immediate + %lob_imm 1:10 11:1 !function=times_2 + + DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 + WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm + LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm + ] +} diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 28e0dba..a7ed9bc 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -3141,16 +3141,16 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtoh(vd, vd, shift, fpst); + gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltoh(vd, vd, shift, fpst); + gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtoh(vd, vd, shift, fpst); + gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultoh(vd, vd, shift, fpst); + gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); @@ -3200,16 +3200,16 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtos(vd, vd, shift, fpst); + gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltos(vd, vd, shift, fpst); + gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtos(vd, vd, shift, fpst); + gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultos(vd, vd, shift, fpst); + gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst); @@ -3265,16 +3265,16 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtod(vd, vd, shift, fpst); + gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltod(vd, vd, shift, fpst); + gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtod(vd, vd, shift, fpst); + gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultod(vd, vd, shift, fpst); + gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst); @@ -3459,7 +3459,7 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) return true; } -static bool trans_NOCP(DisasContext *s, arg_NOCP *a) +static bool trans_NOCP(DisasContext *s, arg_nocp *a) { /* * Handle M-profile early check for disabled coprocessor: @@ -3472,7 +3472,11 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a) if (a->cp == 11) { a->cp = 10; } - /* TODO: in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */ + if (arm_dc_feature(s, ARM_FEATURE_V8_1M) && + (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) { + /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */ + a->cp = 10; + } if (a->cp != 10) { gen_exception_insn(s, s->pc_curr, EXCP_NOCP, @@ -3489,6 +3493,15 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a) return false; } +static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a) +{ + /* This range needs a coprocessor check for v8.1M and later only */ + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + return false; + } + return trans_NOCP(s, a); +} + static bool trans_VINS(DisasContext *s, arg_VINS *a) { TCGv_i32 rd, rm; diff --git a/target/arm/translate.c b/target/arm/translate.c index d34c1d3..38371db 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -2490,17 +2490,23 @@ static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) s->base.is_jmp = DISAS_NORETURN; } -static inline void gen_jmp (DisasContext *s, uint32_t dest) +/* Jump, specifying which TB number to use if we gen_goto_tb() */ +static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) { if (unlikely(is_singlestepping(s))) { /* An indirect jump so that we still trigger the debug exception. */ gen_set_pc_im(s, dest); s->base.is_jmp = DISAS_JUMP; } else { - gen_goto_tb(s, 0, dest); + gen_goto_tb(s, tbno, dest); } } +static inline void gen_jmp(DisasContext *s, uint32_t dest) +{ + gen_jmp_tb(s, dest, 0); +} + static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y) { if (x) @@ -7401,22 +7407,60 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) gen_smul_dual(t1, t2); if (sub) { - /* This subtraction cannot overflow. */ - tcg_gen_sub_i32(t1, t1, t2); - } else { /* - * This addition cannot overflow 32 bits; however it may - * overflow considered as a signed operation, in which case - * we must set the Q flag. + * This subtraction cannot overflow, so we can do a simple + * 32-bit subtraction and then a possible 32-bit saturating + * addition of Ra. */ - gen_helper_add_setq(t1, cpu_env, t1, t2); - } - tcg_temp_free_i32(t2); + tcg_gen_sub_i32(t1, t1, t2); + tcg_temp_free_i32(t2); - if (a->ra != 15) { - t2 = load_reg(s, a->ra); + if (a->ra != 15) { + t2 = load_reg(s, a->ra); + gen_helper_add_setq(t1, cpu_env, t1, t2); + tcg_temp_free_i32(t2); + } + } else if (a->ra == 15) { + /* Single saturation-checking addition */ gen_helper_add_setq(t1, cpu_env, t1, t2); tcg_temp_free_i32(t2); + } else { + /* + * We need to add the products and Ra together and then + * determine whether the final result overflowed. Doing + * this as two separate add-and-check-overflow steps incorrectly + * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1. + * Do all the arithmetic at 64-bits and then check for overflow. + */ + TCGv_i64 p64, q64; + TCGv_i32 t3, qf, one; + + p64 = tcg_temp_new_i64(); + q64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(p64, t1); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + load_reg_var(s, t2, a->ra); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + tcg_temp_free_i64(q64); + + tcg_gen_extr_i64_i32(t1, t2, p64); + tcg_temp_free_i64(p64); + /* + * t1 is the low half of the result which goes into Rd. + * We have overflow and must set Q if the high half (t2) + * is different from the sign-extension of t1. + */ + t3 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t3, t1, 31); + qf = load_cpu_field(QF); + one = tcg_const_i32(1); + tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf); + store_cpu_field(qf, QF); + tcg_temp_free_i32(one); + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); } store_reg(s, a->rd, t1); return true; @@ -7880,6 +7924,14 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a) { TCGv_i32 tmp; + /* + * BLX <imm> would be useless on M-profile; the encoding space + * is used for other insns from v8.1M onward, and UNDEFs before that. + */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + return false; + } + /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */ if (s->thumb && (a->imm & 2)) { return false; @@ -7925,6 +7977,109 @@ static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a) return true; } +static bool trans_BF(DisasContext *s, arg_BF *a) +{ + /* + * M-profile branch future insns. The architecture permits an + * implementation to implement these as NOPs (equivalent to + * discarding the LO_BRANCH_INFO cache immediately), and we + * take that IMPDEF option because for QEMU a "real" implementation + * would be complicated and wouldn't execute any faster. + */ + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->boff == 0) { + /* SEE "Related encodings" (loop insns) */ + return false; + } + /* Handle as NOP */ + return true; +} + +static bool trans_DLS(DisasContext *s, arg_DLS *a) +{ + /* M-profile low-overhead loop start */ + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->rn == 13 || a->rn == 15) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + + /* Not a while loop, no tail predication: just set LR to the count */ + tmp = load_reg(s, a->rn); + store_reg(s, 14, tmp); + return true; +} + +static bool trans_WLS(DisasContext *s, arg_WLS *a) +{ + /* M-profile low-overhead while-loop start */ + TCGv_i32 tmp; + TCGLabel *nextlabel; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->rn == 13 || a->rn == 15) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + if (s->condexec_mask) { + /* + * WLS in an IT block is CONSTRAINED UNPREDICTABLE; + * we choose to UNDEF, because otherwise our use of + * gen_goto_tb(1) would clash with the use of TB exit 1 + * in the dc->condjmp condition-failed codepath in + * arm_tr_tb_stop() and we'd get an assertion. + */ + return false; + } + nextlabel = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel); + tmp = load_reg(s, a->rn); + store_reg(s, 14, tmp); + gen_jmp_tb(s, s->base.pc_next, 1); + + gen_set_label(nextlabel); + gen_jmp(s, read_pc(s) + a->imm); + return true; +} + +static bool trans_LE(DisasContext *s, arg_LE *a) +{ + /* + * M-profile low-overhead loop end. The architecture permits an + * implementation to discard the LO_BRANCH_INFO cache at any time, + * and we take the IMPDEF option to never set it in the first place + * (equivalent to always discarding it immediately), because for QEMU + * a "real" implementation would be complicated and wouldn't execute + * any faster. + */ + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + + if (!a->f) { + /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */ + arm_gen_condlabel(s); + tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel); + /* Decrement LR */ + tmp = load_reg(s, 14); + tcg_gen_addi_i32(tmp, tmp, -1); + store_reg(s, 14, tmp); + } + /* Jump back to the loop start */ + gen_jmp(s, read_pc(s) - a->imm); + return true; +} + static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) { TCGv_i32 addr, tmp; @@ -8224,6 +8379,66 @@ static bool trans_IT(DisasContext *s, arg_IT *a) return true; } +/* v8.1M CSEL/CSINC/CSNEG/CSINV */ +static bool trans_CSEL(DisasContext *s, arg_CSEL *a) +{ + TCGv_i32 rn, rm, zero; + DisasCompare c; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + return false; + } + + if (a->rm == 13) { + /* SEE "Related encodings" (MVE shifts) */ + return false; + } + + if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + + /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */ + if (a->rn == 15) { + rn = tcg_const_i32(0); + } else { + rn = load_reg(s, a->rn); + } + if (a->rm == 15) { + rm = tcg_const_i32(0); + } else { + rm = load_reg(s, a->rm); + } + + switch (a->op) { + case 0: /* CSEL */ + break; + case 1: /* CSINC */ + tcg_gen_addi_i32(rm, rm, 1); + break; + case 2: /* CSINV */ + tcg_gen_not_i32(rm, rm); + break; + case 3: /* CSNEG */ + tcg_gen_neg_i32(rm, rm); + break; + default: + g_assert_not_reached(); + } + + arm_test_cc(&c, a->fcond); + zero = tcg_const_i32(0); + tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm); + arm_free_cc(&c); + tcg_temp_free_i32(zero); + + store_reg(s, a->rd, rn); + tcg_temp_free_i32(rm); + + return true; +} + /* * Legacy decoder. */ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 5666393..01b9d85 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -174,6 +174,12 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) | (env->vfp.vec_len << 16) | (env->vfp.vec_stride << 20); + /* + * M-profile LTPSIZE overlaps A-profile Stride; whichever of the + * two is not applicable to this CPU will always be zero. + */ + fpscr |= env->v7m.ltpsize << 16; + fpscr |= vfp_get_fpscr_from_host(env); i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; @@ -194,36 +200,45 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) val &= ~FPCR_FZ16; } - if (arm_feature(env, ARM_FEATURE_M)) { + vfp_set_fpscr_to_host(env, val); + + if (!arm_feature(env, ARM_FEATURE_M)) { /* - * M profile FPSCR is RES0 for the QC, STRIDE, FZ16, LEN bits - * and also for the trapped-exception-handling bits IxE. + * Short-vector length and stride; on M-profile these bits + * are used for different purposes. + * We can't make this conditional be "if MVFR0.FPShVec != 0", + * because in v7A no-short-vector-support cores still had to + * allow Stride/Len to be written with the only effect that + * some insns are required to UNDEF if the guest sets them. + * + * TODO: if M-profile MVE implemented, set LTPSIZE. */ - val &= 0xf7c0009f; + env->vfp.vec_len = extract32(val, 16, 3); + env->vfp.vec_stride = extract32(val, 20, 2); } - vfp_set_fpscr_to_host(env, val); + if (arm_feature(env, ARM_FEATURE_NEON)) { + /* + * The bit we set within fpscr_q is arbitrary; the register as a + * whole being zero/non-zero is what counts. + * TODO: M-profile MVE also has a QC bit. + */ + env->vfp.qc[0] = val & FPCR_QC; + env->vfp.qc[1] = 0; + env->vfp.qc[2] = 0; + env->vfp.qc[3] = 0; + } /* * We don't implement trapped exception handling, so the * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) * - * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC - * (which are stored in fp_status), and the other RES0 bits - * in between, then we clear all of the low 16 bits. + * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in + * fp_status; QC, Len and Stride are stored separately earlier. + * Clear out all of those and the RES0 bits: only NZCV, AHP, DN, + * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR]. */ env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000; - env->vfp.vec_len = (val >> 16) & 7; - env->vfp.vec_stride = (val >> 20) & 3; - - /* - * The bit we set within fpscr_q is arbitrary; the register as a - * whole being zero/non-zero is what counts. - */ - env->vfp.qc[0] = val & FPCR_QC; - env->vfp.qc[1] = 0; - env->vfp.qc[2] = 0; - env->vfp.qc[3] = 0; } void vfp_set_fpscr(CPUARMState *env, uint32_t val) @@ -393,12 +408,32 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) return float64_to_float32(x, &env->vfp.fp_status); } -/* VFP3 fixed point conversion. */ +/* + * VFP3 fixed point conversion. The AArch32 versions of fix-to-float + * must always round-to-nearest; the AArch64 ones honour the FPSCR + * rounding mode. (For AArch32 Neon the standard-FPSCR is set to + * round-to-nearest so either helper will work.) AArch32 float-to-fix + * must round-to-zero. + */ #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ void *fpstp) \ { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); } +#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ + ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \ + uint32_t shift, \ + void *fpstp) \ + { \ + ftype ret; \ + float_status *fpst = fpstp; \ + FloatRoundMode oldmode = fpst->float_rounding_mode; \ + fpst->float_rounding_mode = float_round_nearest_even; \ + ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \ + fpst->float_rounding_mode = oldmode; \ + return ret; \ + } + #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \ uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ void *fpst) \ @@ -412,6 +447,7 @@ uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ #define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \ VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ float_round_to_zero, _round_to_zero) \ VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 3987f96..28d4068 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -138,6 +138,7 @@ qtests_arm = \ ['arm-cpu-features', 'microbit-test', 'm25p80-test', + 'npcm7xx_timer-test', 'test-arm-mptimer', 'boot-serial-test', 'hexloader-test'] diff --git a/tests/qtest/npcm7xx_timer-test.c b/tests/qtest/npcm7xx_timer-test.c new file mode 100644 index 0000000..f08b0cd --- /dev/null +++ b/tests/qtest/npcm7xx_timer-test.c @@ -0,0 +1,562 @@ +/* + * QTest testcase for the Nuvoton NPCM7xx Timer + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "libqtest-single.h" + +#define TIM_REF_HZ (25000000) + +/* Bits in TCSRx */ +#define CEN BIT(30) +#define IE BIT(29) +#define MODE_ONESHOT (0 << 27) +#define MODE_PERIODIC (1 << 27) +#define CRST BIT(26) +#define CACT BIT(25) +#define PRESCALE(x) (x) + +/* Registers shared between all timers in a module. */ +#define TISR 0x18 +#define WTCR 0x1c +# define WTCLK(x) ((x) << 10) + +/* Power-on default; used to re-initialize timers before each test. */ +#define TCSR_DEFAULT PRESCALE(5) + +/* Register offsets for a timer within a timer block. */ +typedef struct Timer { + unsigned int tcsr_offset; + unsigned int ticr_offset; + unsigned int tdr_offset; +} Timer; + +/* A timer block containing 5 timers. */ +typedef struct TimerBlock { + int irq_base; + uint64_t base_addr; +} TimerBlock; + +/* Testdata for testing a particular timer within a timer block. */ +typedef struct TestData { + const TimerBlock *tim; + const Timer *timer; +} TestData; + +const TimerBlock timer_block[] = { + { + .irq_base = 32, + .base_addr = 0xf0008000, + }, + { + .irq_base = 37, + .base_addr = 0xf0009000, + }, + { + .irq_base = 42, + .base_addr = 0xf000a000, + }, +}; + +const Timer timer[] = { + { + .tcsr_offset = 0x00, + .ticr_offset = 0x08, + .tdr_offset = 0x10, + }, { + .tcsr_offset = 0x04, + .ticr_offset = 0x0c, + .tdr_offset = 0x14, + }, { + .tcsr_offset = 0x20, + .ticr_offset = 0x28, + .tdr_offset = 0x30, + }, { + .tcsr_offset = 0x24, + .ticr_offset = 0x2c, + .tdr_offset = 0x34, + }, { + .tcsr_offset = 0x40, + .ticr_offset = 0x48, + .tdr_offset = 0x50, + }, +}; + +/* Returns the index of the timer block. */ +static int tim_index(const TimerBlock *tim) +{ + ptrdiff_t diff = tim - timer_block; + + g_assert(diff >= 0 && diff < ARRAY_SIZE(timer_block)); + + return diff; +} + +/* Returns the index of a timer within a timer block. */ +static int timer_index(const Timer *t) +{ + ptrdiff_t diff = t - timer; + + g_assert(diff >= 0 && diff < ARRAY_SIZE(timer)); + + return diff; +} + +/* Returns the irq line for a given timer. */ +static int tim_timer_irq(const TestData *td) +{ + return td->tim->irq_base + timer_index(td->timer); +} + +/* Register read/write accessors. */ + +static void tim_write(const TestData *td, + unsigned int offset, uint32_t value) +{ + writel(td->tim->base_addr + offset, value); +} + +static uint32_t tim_read(const TestData *td, unsigned int offset) +{ + return readl(td->tim->base_addr + offset); +} + +static void tim_write_tcsr(const TestData *td, uint32_t value) +{ + tim_write(td, td->timer->tcsr_offset, value); +} + +static uint32_t tim_read_tcsr(const TestData *td) +{ + return tim_read(td, td->timer->tcsr_offset); +} + +static void tim_write_ticr(const TestData *td, uint32_t value) +{ + tim_write(td, td->timer->ticr_offset, value); +} + +static uint32_t tim_read_ticr(const TestData *td) +{ + return tim_read(td, td->timer->ticr_offset); +} + +static uint32_t tim_read_tdr(const TestData *td) +{ + return tim_read(td, td->timer->tdr_offset); +} + +/* Returns the number of nanoseconds to count the given number of cycles. */ +static int64_t tim_calculate_step(uint32_t count, uint32_t prescale) +{ + return (1000000000LL / TIM_REF_HZ) * count * (prescale + 1); +} + +/* Returns a bitmask corresponding to the timer under test. */ +static uint32_t tim_timer_bit(const TestData *td) +{ + return BIT(timer_index(td->timer)); +} + +/* Resets all timers to power-on defaults. */ +static void tim_reset(const TestData *td) +{ + int i, j; + + /* Reset all the timers, in case a previous test left a timer running. */ + for (i = 0; i < ARRAY_SIZE(timer_block); i++) { + for (j = 0; j < ARRAY_SIZE(timer); j++) { + writel(timer_block[i].base_addr + timer[j].tcsr_offset, + CRST | TCSR_DEFAULT); + } + writel(timer_block[i].base_addr + TISR, -1); + } +} + +/* Verifies the reset state of a timer. */ +static void test_reset(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_ticr(td), ==, 0); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_cmphex(tim_read(td, WTCR), ==, WTCLK(1)); +} + +/* Verifies that CRST wins if both CEN and CRST are set. */ +static void test_reset_overrides_enable(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + /* CRST should force CEN to 0 */ + tim_write_tcsr(td, CEN | CRST | TCSR_DEFAULT); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + g_assert_cmphex(tim_read(td, TISR), ==, 0); +} + +/* Verifies the behavior when CEN is set and then cleared. */ +static void test_oneshot_enable_then_disable(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + /* Enable the timer with zero initial count, then disable it again. */ + tim_write_tcsr(td, CEN | TCSR_DEFAULT); + tim_write_tcsr(td, TCSR_DEFAULT); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + /* Timer interrupt flag should be set, but interrupts are not enabled. */ + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with prescaler 5. */ +static void test_oneshot_ps5(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 5; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Clear the interrupt flag. */ + tim_write(td, TISR, tim_timer_bit(td)); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Verify that this isn't a periodic timer. */ + clock_step(2 * tim_calculate_step(count, ps)); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with prescaler 0. */ +static void test_oneshot_ps0(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 1; + unsigned int ps = 0; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with highest prescaler. */ +static void test_oneshot_ps255(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = (1U << 24) - 1; + unsigned int ps = 255; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a oneshot timer fires an interrupt when expected. */ +static void test_oneshot_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 7; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* + * Verifies that the timer can be paused and later resumed, and it still fires + * at the right moment. + */ +static void test_pause_resume(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 1; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + + /* Pause the timer halfway to expiration. */ + clock_step(tim_calculate_step(count / 2, ps)); + tim_write_tcsr(td, IE | MODE_ONESHOT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + + /* Counter should not advance during the following step. */ + clock_step(2 * tim_calculate_step(count, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Resume the timer and run _almost_ to expiration. */ + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + clock_step(tim_calculate_step(count / 2, ps) - 1); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Now, run the rest of the way and verify that the interrupt fires. */ + clock_step(1); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that the prescaler can be changed while the timer is runnin. */ +static void test_prescaler_change(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 5; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + + /* Run a quarter of the way, and change the prescaler. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, 3 * count / 4); + ps = 2; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, 3 * count / 4); + + /* Run another quarter of the way, and change the prescaler again. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + ps = 8; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + + /* Run another quarter of the way, and change the prescaler again. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 4); + ps = 0; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, count / 4); + + /* Run almost to expiration, and verify the timer didn't fire yet. */ + clock_step(tim_calculate_step(count / 4, ps) - 1); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + /* Now, run the rest of the way and verify that the timer fires. */ + clock_step(1); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); +} + +/* Verifies that a periodic timer automatically restarts after expiration. */ +static void test_periodic_no_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 2; + unsigned int ps = 3; + int i; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_PERIODIC | PRESCALE(ps)); + + for (i = 0; i < 4; i++) { + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + tim_write(td, TISR, tim_timer_bit(td)); + + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + } +} + +/* Verifies that a periodict timer fires an interrupt every time it expires. */ +static void test_periodic_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 65535; + unsigned int ps = 2; + int i; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | IE | MODE_PERIODIC | PRESCALE(ps)); + + for (i = 0; i < 4; i++) { + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + tim_write(td, TISR, tim_timer_bit(td)); + + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + } +} + +/* + * Verifies that the timer behaves correctly when disabled right before and + * exactly when it's supposed to expire. + */ +static void test_disable_on_expiration(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 8; + unsigned int ps = 255; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + + clock_step(tim_calculate_step(count, ps) - 1); + + tim_write_tcsr(td, MODE_ONESHOT | PRESCALE(ps)); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + clock_step(1); + tim_write_tcsr(td, MODE_ONESHOT | PRESCALE(ps)); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); +} + +/* + * Constructs a name that includes the timer block, timer and testcase name, + * and adds the test to the test suite. + */ +static void tim_add_test(const char *name, const TestData *td, GTestDataFunc fn) +{ + g_autofree char *full_name; + + full_name = g_strdup_printf("npcm7xx_timer/tim[%d]/timer[%d]/%s", + tim_index(td->tim), timer_index(td->timer), + name); + qtest_add_data_func(full_name, td, fn); +} + +/* Convenience macro for adding a test with a predictable function name. */ +#define add_test(name, td) tim_add_test(#name, td, test_##name) + +int main(int argc, char **argv) +{ + TestData testdata[ARRAY_SIZE(timer_block) * ARRAY_SIZE(timer)]; + int ret; + int i, j; + + g_test_init(&argc, &argv, NULL); + g_test_set_nonfatal_assertions(); + + for (i = 0; i < ARRAY_SIZE(timer_block); i++) { + for (j = 0; j < ARRAY_SIZE(timer); j++) { + TestData *td = &testdata[i * ARRAY_SIZE(timer) + j]; + td->tim = &timer_block[i]; + td->timer = &timer[j]; + + add_test(reset, td); + add_test(reset_overrides_enable, td); + add_test(oneshot_enable_then_disable, td); + add_test(oneshot_ps5, td); + add_test(oneshot_ps0, td); + add_test(oneshot_ps255, td); + add_test(oneshot_interrupt, td); + add_test(pause_resume, td); + add_test(prescaler_change, td); + add_test(periodic_no_interrupt, td); + add_test(periodic_interrupt, td); + add_test(disable_on_expiration, td); + } + } + + qtest_start("-machine npcm750-evb"); + qtest_irq_intercept_in(global_qtest, "/machine/soc/a9mpcore/gic"); + ret = g_test_run(); + qtest_end(); + + return ret; +} |