From 5288145d716338ace0f83e3ff05c4d07715bb4f4 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 9 Oct 2020 15:47:12 +0100 Subject: target/arm: Fix SMLAD incorrect setting of Q bit The SMLAD instruction is supposed to: * signed multiply Rn[15:0] * Rm[15:0] * signed multiply Rn[31:16] * Rm[31:16] * perform a signed addition of the products and Ra * set Rd to the low 32 bits of the theoretical infinite-precision result * set the Q flag if the sign-extension of Rd would differ from the infinite-precision result (ie on overflow) Our current implementation doesn't quite do this, though: it performs an addition of the products setting Q on overflow, and then it adds Ra, again possibly setting Q. This sometimes incorrectly sets Q when the architecturally mandated only-check-for-overflow-once algorithm does not. For instance: r1 = 0x80008000; r2 = 0x80008000; r3 = 0xffffffff smlad r0, r1, r2, r3 This is (-32768 * -32768) + (-32768 * -32768) - 1 The products are both 0x4000_0000, so when added together as 32-bit signed numbers they overflow (and QEMU sets Q), but because the addition of Ra == -1 brings the total back down to 0x7fff_ffff there is no overflow for the complete operation and setting Q is incorrect. Fix this edge case by resorting to 64-bit arithmetic for the case where we need to add three values together. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201009144712.11187-1-peter.maydell@linaro.org --- target/arm/translate.c | 60 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index d34c1d3..d8729e4 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7401,22 +7401,60 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) gen_smul_dual(t1, t2); if (sub) { - /* This subtraction cannot overflow. */ - tcg_gen_sub_i32(t1, t1, t2); - } else { /* - * This addition cannot overflow 32 bits; however it may - * overflow considered as a signed operation, in which case - * we must set the Q flag. + * This subtraction cannot overflow, so we can do a simple + * 32-bit subtraction and then a possible 32-bit saturating + * addition of Ra. */ - gen_helper_add_setq(t1, cpu_env, t1, t2); - } - tcg_temp_free_i32(t2); + tcg_gen_sub_i32(t1, t1, t2); + tcg_temp_free_i32(t2); - if (a->ra != 15) { - t2 = load_reg(s, a->ra); + if (a->ra != 15) { + t2 = load_reg(s, a->ra); + gen_helper_add_setq(t1, cpu_env, t1, t2); + tcg_temp_free_i32(t2); + } + } else if (a->ra == 15) { + /* Single saturation-checking addition */ gen_helper_add_setq(t1, cpu_env, t1, t2); tcg_temp_free_i32(t2); + } else { + /* + * We need to add the products and Ra together and then + * determine whether the final result overflowed. Doing + * this as two separate add-and-check-overflow steps incorrectly + * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1. + * Do all the arithmetic at 64-bits and then check for overflow. + */ + TCGv_i64 p64, q64; + TCGv_i32 t3, qf, one; + + p64 = tcg_temp_new_i64(); + q64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(p64, t1); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + load_reg_var(s, t2, a->ra); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + tcg_temp_free_i64(q64); + + tcg_gen_extr_i64_i32(t1, t2, p64); + tcg_temp_free_i64(p64); + /* + * t1 is the low half of the result which goes into Rd. + * We have overflow and must set Q if the high half (t2) + * is different from the sign-extension of t1. + */ + t3 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t3, t1, 31); + qf = load_cpu_field(QF); + one = tcg_const_i32(1); + tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf); + store_cpu_field(qf, QF); + tcg_temp_free_i32(one); + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); } store_reg(s, a->rd, t1); return true; -- cgit v1.1 From 61db12d9f9eb36761edba4d9a414cd8dd34c512b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 13 Oct 2020 11:35:32 +0100 Subject: target/arm: AArch32 VCVT fixed-point to float is always round-to-nearest For AArch32, unlike the VCVT of integer to float, which honours the rounding mode specified by the FPSCR, VCVT of fixed-point to float is always round-to-nearest. (AArch64 fixed-point-to-float conversions always honour the FPCR rounding mode.) Implement this by providing _round_to_nearest versions of the relevant helpers which set the rounding mode temporarily when making the call to the underlying softfloat function. We only need to change the VFP VCVT instructions, because the standard- FPSCR value used by the Neon VCVT is always set to round-to-nearest, so we don't need to do the extra work of saving and restoring the rounding mode. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201013103532.13391-1-peter.maydell@linaro.org --- target/arm/helper.h | 13 +++++++++++++ target/arm/translate-vfp.c.inc | 24 ++++++++++++------------ target/arm/vfp_helper.c | 23 ++++++++++++++++++++++- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 8defd7c..774d2cd 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -213,6 +213,19 @@ DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr) DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr) DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr) +DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, ptr) +DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, ptr) + DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr) DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32) diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 28e0dba..9b11b81 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -3141,16 +3141,16 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtoh(vd, vd, shift, fpst); + gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltoh(vd, vd, shift, fpst); + gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtoh(vd, vd, shift, fpst); + gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultoh(vd, vd, shift, fpst); + gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); @@ -3200,16 +3200,16 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtos(vd, vd, shift, fpst); + gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltos(vd, vd, shift, fpst); + gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtos(vd, vd, shift, fpst); + gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultos(vd, vd, shift, fpst); + gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst); @@ -3265,16 +3265,16 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) /* Switch on op:U:sx bits */ switch (a->opc) { case 0: - gen_helper_vfp_shtod(vd, vd, shift, fpst); + gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst); break; case 1: - gen_helper_vfp_sltod(vd, vd, shift, fpst); + gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst); break; case 2: - gen_helper_vfp_uhtod(vd, vd, shift, fpst); + gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst); break; case 3: - gen_helper_vfp_ultod(vd, vd, shift, fpst); + gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst); break; case 4: gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst); diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 5666393..abfdb6a 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -393,12 +393,32 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) return float64_to_float32(x, &env->vfp.fp_status); } -/* VFP3 fixed point conversion. */ +/* + * VFP3 fixed point conversion. The AArch32 versions of fix-to-float + * must always round-to-nearest; the AArch64 ones honour the FPSCR + * rounding mode. (For AArch32 Neon the standard-FPSCR is set to + * round-to-nearest so either helper will work.) AArch32 float-to-fix + * must round-to-zero. + */ #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ void *fpstp) \ { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); } +#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ + ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \ + uint32_t shift, \ + void *fpstp) \ + { \ + ftype ret; \ + float_status *fpst = fpstp; \ + FloatRoundMode oldmode = fpst->float_rounding_mode; \ + fpst->float_rounding_mode = float_round_nearest_even; \ + ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \ + fpst->float_rounding_mode = oldmode; \ + return ret; \ + } + #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \ uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ void *fpst) \ @@ -412,6 +432,7 @@ uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \ #define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \ VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \ +VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \ VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ float_round_to_zero, _round_to_zero) \ VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \ -- cgit v1.1 From 8ddd611a50481826a8e583b7ccdf6e1866e22c15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 14 Oct 2020 23:36:01 +0200 Subject: hw/arm/strongarm: Fix 'time to transmit a char' unit comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The time to transmit a char is expressed in nanoseconds, not in ticks. Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201014213601.205222-1-f4bug@amsat.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/strongarm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c index d7133ee..ca7c385 100644 --- a/hw/arm/strongarm.c +++ b/hw/arm/strongarm.c @@ -935,7 +935,7 @@ struct StrongARMUARTState { uint8_t rx_start; uint8_t rx_len; - uint64_t char_transmit_time; /* time to transmit a char in ticks*/ + uint64_t char_transmit_time; /* time to transmit a char in nanoseconds */ bool wait_break_end; QEMUTimer *rx_timeout_timer; QEMUTimer *tx_timer; -- cgit v1.1 From b77a52a0c14163e4d8602c94b64bae9cf3524ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 8 Oct 2020 18:14:14 +0200 Subject: hw/arm: Restrict APEI tables generation to the 'virt' machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While APEI is a generic ACPI feature (usable by X86 and ARM64), only the 'virt' machine uses it, by enabling the RAS Virtualization. See commit 2afa8c8519: "hw/arm/virt: Introduce a RAS machine option"). Restrict the APEI tables generation code to the single user: the virt machine. If another machine wants to use it, it simply has to 'select ACPI_APEI' in its Kconfig. Fixes: aa16508f1d ("ACPI: Build related register address fields via hardware error fw_cfg blob") Acked-by: Michael S. Tsirkin Reviewed-by: Dongjiu Geng Acked-by: Laszlo Ersek Reviewed-by: Igor Mammedov Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201008161414.2672569-1-philmd@redhat.com Signed-off-by: Peter Maydell --- default-configs/devices/arm-softmmu.mak | 1 - hw/arm/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/default-configs/devices/arm-softmmu.mak b/default-configs/devices/arm-softmmu.mak index 9a94ebd..08a3212 100644 --- a/default-configs/devices/arm-softmmu.mak +++ b/default-configs/devices/arm-softmmu.mak @@ -43,4 +43,3 @@ CONFIG_FSL_IMX7=y CONFIG_FSL_IMX6UL=y CONFIG_SEMIHOSTING=y CONFIG_ALLWINNER_H3=y -CONFIG_ACPI_APEI=y diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index f303c6b..7d04082 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -26,6 +26,7 @@ config ARM_VIRT select ACPI_MEMORY_HOTPLUG select ACPI_HW_REDUCED select ACPI_NVDIMM + select ACPI_APEI config CHEETAH bool -- cgit v1.1 From f3f69362fdd957dbdc6b5bd1120347560752e4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 10 Oct 2020 22:37:06 +0200 Subject: hw/timer/bcm2835: Introduce BCM2835_SYSTIMER_COUNT definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the BCM2835_SYSTIMER_COUNT definition instead of the magic '4' value. Reviewed-by: Luc Michel Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201010203709.3116542-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/timer/bcm2835_systmr.c | 3 ++- include/hw/timer/bcm2835_systmr.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/timer/bcm2835_systmr.c b/hw/timer/bcm2835_systmr.c index 3387a62..ff8c553 100644 --- a/hw/timer/bcm2835_systmr.c +++ b/hw/timer/bcm2835_systmr.c @@ -134,7 +134,8 @@ static const VMStateDescription bcm2835_systmr_vmstate = { .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(reg.status, BCM2835SystemTimerState), - VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, 4), + VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, + BCM2835_SYSTIMER_COUNT), VMSTATE_END_OF_LIST() } }; diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h index 7ce8f6e..43df7ee 100644 --- a/include/hw/timer/bcm2835_systmr.h +++ b/include/hw/timer/bcm2835_systmr.h @@ -16,6 +16,8 @@ #define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer" OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SystemTimerState, BCM2835_SYSTIMER) +#define BCM2835_SYSTIMER_COUNT 4 + struct BCM2835SystemTimerState { /*< private >*/ SysBusDevice parent_obj; @@ -26,7 +28,7 @@ struct BCM2835SystemTimerState { struct { uint32_t status; - uint32_t compare[4]; + uint32_t compare[BCM2835_SYSTIMER_COUNT]; } reg; }; -- cgit v1.1 From cdb490da8695ec67dbc151335b31450abb9e564e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 10 Oct 2020 22:37:07 +0200 Subject: hw/timer/bcm2835: Rename variable holding CTRL_STATUS register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable holding the CTRL_STATUS register is misnamed 'status'. Rename it 'ctrl_status' to make it more obvious this register is also used to control the peripheral. Reviewed-by: Luc Michel Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201010203709.3116542-3-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/timer/bcm2835_systmr.c | 8 ++++---- include/hw/timer/bcm2835_systmr.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/timer/bcm2835_systmr.c b/hw/timer/bcm2835_systmr.c index ff8c553..b234e83 100644 --- a/hw/timer/bcm2835_systmr.c +++ b/hw/timer/bcm2835_systmr.c @@ -30,7 +30,7 @@ REG32(COMPARE3, 0x18) static void bcm2835_systmr_update_irq(BCM2835SystemTimerState *s) { - bool enable = !!s->reg.status; + bool enable = !!s->reg.ctrl_status; trace_bcm2835_systmr_irq(enable); qemu_set_irq(s->irq, enable); @@ -52,7 +52,7 @@ static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, switch (offset) { case A_CTRL_STATUS: - r = s->reg.status; + r = s->reg.ctrl_status; break; case A_COMPARE0 ... A_COMPARE3: r = s->reg.compare[(offset - A_COMPARE0) >> 2]; @@ -82,7 +82,7 @@ static void bcm2835_systmr_write(void *opaque, hwaddr offset, trace_bcm2835_systmr_write(offset, value); switch (offset) { case A_CTRL_STATUS: - s->reg.status &= ~value; /* Ack */ + s->reg.ctrl_status &= ~value; /* Ack */ bcm2835_systmr_update_irq(s); break; case A_COMPARE0 ... A_COMPARE3: @@ -133,7 +133,7 @@ static const VMStateDescription bcm2835_systmr_vmstate = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32(reg.status, BCM2835SystemTimerState), + VMSTATE_UINT32(reg.ctrl_status, BCM2835SystemTimerState), VMSTATE_UINT32_ARRAY(reg.compare, BCM2835SystemTimerState, BCM2835_SYSTIMER_COUNT), VMSTATE_END_OF_LIST() diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h index 43df7ee..f15788a 100644 --- a/include/hw/timer/bcm2835_systmr.h +++ b/include/hw/timer/bcm2835_systmr.h @@ -27,7 +27,7 @@ struct BCM2835SystemTimerState { qemu_irq irq; struct { - uint32_t status; + uint32_t ctrl_status; uint32_t compare[BCM2835_SYSTIMER_COUNT]; } reg; }; -- cgit v1.1 From be95dffa326a63f6f850d389dbe358d25e8ba20b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 10 Oct 2020 22:37:08 +0200 Subject: hw/timer/bcm2835: Support the timer COMPARE registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This peripheral has 1 free-running timer and 4 compare registers. Only the free-running timer is implemented. Add support the COMPARE registers (each register is wired to an IRQ). Reference: "BCM2835 ARM Peripherals" datasheet [*] chapter 12 "System Timer": The System Timer peripheral provides four 32-bit timer channels and a single 64-bit free running counter. Each channel has an output compare register, which is compared against the 32 least significant bits of the free running counter values. When the two values match, the system timer peripheral generates a signal to indicate a match for the appropriate channel. The match signal is then fed into the interrupt controller. This peripheral is used since Linux 3.7, commit ee4af5696720 ("ARM: bcm2835: add system timer"). [*] https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Luc Michel Message-id: 20201010203709.3116542-4-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/timer/bcm2835_systmr.c | 48 +++++++++++++++++++++++++-------------- hw/timer/trace-events | 6 +++-- include/hw/timer/bcm2835_systmr.h | 11 +++++++-- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/hw/timer/bcm2835_systmr.c b/hw/timer/bcm2835_systmr.c index b234e83..67669a5 100644 --- a/hw/timer/bcm2835_systmr.c +++ b/hw/timer/bcm2835_systmr.c @@ -28,20 +28,13 @@ REG32(COMPARE1, 0x10) REG32(COMPARE2, 0x14) REG32(COMPARE3, 0x18) -static void bcm2835_systmr_update_irq(BCM2835SystemTimerState *s) +static void bcm2835_systmr_timer_expire(void *opaque) { - bool enable = !!s->reg.ctrl_status; + BCM2835SystemTimerCompare *tmr = opaque; - trace_bcm2835_systmr_irq(enable); - qemu_set_irq(s->irq, enable); -} - -static void bcm2835_systmr_update_compare(BCM2835SystemTimerState *s, - unsigned timer_index) -{ - /* TODO fow now, since neither Linux nor U-boot use these timers. */ - qemu_log_mask(LOG_UNIMP, "COMPARE register %u not implemented\n", - timer_index); + trace_bcm2835_systmr_timer_expired(tmr->id); + tmr->state->reg.ctrl_status |= 1 << tmr->id; + qemu_set_irq(tmr->irq, 1); } static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, @@ -75,19 +68,33 @@ static uint64_t bcm2835_systmr_read(void *opaque, hwaddr offset, } static void bcm2835_systmr_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) + uint64_t value64, unsigned size) { BCM2835SystemTimerState *s = BCM2835_SYSTIMER(opaque); + int index; + uint32_t value = value64; + uint32_t triggers_delay_us; + uint64_t now; trace_bcm2835_systmr_write(offset, value); switch (offset) { case A_CTRL_STATUS: s->reg.ctrl_status &= ~value; /* Ack */ - bcm2835_systmr_update_irq(s); + for (index = 0; index < ARRAY_SIZE(s->tmr); index++) { + if (extract32(value, index, 1)) { + trace_bcm2835_systmr_irq_ack(index); + qemu_set_irq(s->tmr[index].irq, 0); + } + } break; case A_COMPARE0 ... A_COMPARE3: - s->reg.compare[(offset - A_COMPARE0) >> 2] = value; - bcm2835_systmr_update_compare(s, (offset - A_COMPARE0) >> 2); + index = (offset - A_COMPARE0) >> 2; + s->reg.compare[index] = value; + now = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL); + /* Compare lower 32-bits of the free-running counter. */ + triggers_delay_us = value - now; + trace_bcm2835_systmr_run(index, triggers_delay_us); + timer_mod(&s->tmr[index].timer, now + triggers_delay_us); break; case A_COUNTER_LOW: case A_COUNTER_HIGH: @@ -125,7 +132,14 @@ static void bcm2835_systmr_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->iomem, OBJECT(dev), &bcm2835_systmr_ops, s, "bcm2835-sys-timer", 0x20); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); - sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); + + for (size_t i = 0; i < ARRAY_SIZE(s->tmr); i++) { + s->tmr[i].id = i; + s->tmr[i].state = s; + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->tmr[i].irq); + timer_init_us(&s->tmr[i].timer, QEMU_CLOCK_VIRTUAL, + bcm2835_systmr_timer_expire, &s->tmr[i]); + } } static const VMStateDescription bcm2835_systmr_vmstate = { diff --git a/hw/timer/trace-events b/hw/timer/trace-events index b996d99..7a4326d 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -77,9 +77,11 @@ nrf51_timer_write(uint8_t timer_id, uint64_t addr, uint32_t value, unsigned size nrf51_timer_set_count(uint8_t timer_id, uint8_t counter_id, uint32_t value) "timer %u counter %u count 0x%" PRIx32 # bcm2835_systmr.c -bcm2835_systmr_irq(bool enable) "timer irq state %u" +bcm2835_systmr_timer_expired(unsigned id) "timer #%u expired" +bcm2835_systmr_irq_ack(unsigned id) "timer #%u acked" bcm2835_systmr_read(uint64_t offset, uint64_t data) "timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 -bcm2835_systmr_write(uint64_t offset, uint64_t data) "timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 +bcm2835_systmr_write(uint64_t offset, uint32_t data) "timer write: offset 0x%" PRIx64 " data 0x%" PRIx32 +bcm2835_systmr_run(unsigned id, uint64_t delay_us) "timer #%u expiring in %"PRIu64" us" # avr_timer16.c avr_timer16_read(uint8_t addr, uint8_t value) "timer16 read addr:%u value:%u" diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h index f15788a..bd3097d 100644 --- a/include/hw/timer/bcm2835_systmr.h +++ b/include/hw/timer/bcm2835_systmr.h @@ -11,6 +11,7 @@ #include "hw/sysbus.h" #include "hw/irq.h" +#include "qemu/timer.h" #include "qom/object.h" #define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer" @@ -18,18 +19,24 @@ OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SystemTimerState, BCM2835_SYSTIMER) #define BCM2835_SYSTIMER_COUNT 4 +typedef struct { + unsigned id; + QEMUTimer timer; + qemu_irq irq; + BCM2835SystemTimerState *state; +} BCM2835SystemTimerCompare; + struct BCM2835SystemTimerState { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ MemoryRegion iomem; - qemu_irq irq; - struct { uint32_t ctrl_status; uint32_t compare[BCM2835_SYSTIMER_COUNT]; } reg; + BCM2835SystemTimerCompare tmr[BCM2835_SYSTIMER_COUNT]; }; #endif -- cgit v1.1 From 722bde6789c55f9f872026f796ecabecbec5d82b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 10 Oct 2020 22:37:09 +0200 Subject: hw/arm/bcm2835_peripherals: Correctly wire the SYS_timer IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SYS_timer is not directly wired to the ARM core, but to the SoC (peripheral) interrupt controller. Fixes: 0e5bbd74064 ("hw/arm/bcm2835_peripherals: Use the SYS_timer") Reviewed-by: Luc Michel Reviewed-by: Richard Henderson Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201010203709.3116542-5-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/bcm2835_peripherals.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 15c5c72..48909a4 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -171,8 +171,17 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&s->peri_mr, ST_OFFSET, sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systmr), 0)); sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 0, - qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_ARM_IRQ, - INTERRUPT_ARM_TIMER)); + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 1, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER1)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 2, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER2)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->systmr), 3, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_TIMER3)); /* UART0 */ qdev_prop_set_chr(DEVICE(&s->uart0), "chardev", serial_hd(0)); -- cgit v1.1 From 3ab6e68cd035de244d9bf999900349a69939ad41 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Oct 2020 14:07:53 -0700 Subject: accel/tcg: Add tlb_flush_page_bits_by_mmuidx* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ARM, the Top Byte Ignore feature means that only 56 bits of the address are significant in the virtual address. We are required to give the entire 64-bit address to FAR_ELx on fault, which means that we do not "clean" the top byte early in TCG. This new interface allows us to flush all 256 possible aliases for a given page, currently missed by tlb_flush_page*. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Message-id: 20201016210754.818257-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- accel/tcg/cputlb.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++-- include/exec/exec-all.h | 36 +++++++ 2 files changed, 302 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 2bbbb3a..42ab79c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -409,12 +409,21 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu) tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); } +static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, + target_ulong page, target_ulong mask) +{ + page &= mask; + mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; + + return (page == (tlb_entry->addr_read & mask) || + page == (tlb_addr_write(tlb_entry) & mask) || + page == (tlb_entry->addr_code & mask)); +} + static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { - return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_addr_write(tlb_entry), page) || - tlb_hit_page(tlb_entry->addr_code, page); + return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); } /** @@ -427,31 +436,45 @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) } /* Called with tlb_c.lock held */ -static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, - target_ulong page) +static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, + target_ulong page, + target_ulong mask) { - if (tlb_hit_page_anyprot(tlb_entry, page)) { + if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); return true; } return false; } +static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, + target_ulong page) +{ + return tlb_flush_entry_mask_locked(tlb_entry, page, -1); +} + /* Called with tlb_c.lock held */ -static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, - target_ulong page) +static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, + target_ulong page, + target_ulong mask) { CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; int k; assert_cpu_is_self(env_cpu(env)); for (k = 0; k < CPU_VTLB_SIZE; k++) { - if (tlb_flush_entry_locked(&d->vtable[k], page)) { + if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { tlb_n_used_entries_dec(env, mmu_idx); } } } +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, + target_ulong page) +{ + tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); +} + static void tlb_flush_page_locked(CPUArchState *env, int midx, target_ulong page) { @@ -666,6 +689,240 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } +static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, + target_ulong page, unsigned bits) +{ + CPUTLBDesc *d = &env_tlb(env)->d[midx]; + CPUTLBDescFast *f = &env_tlb(env)->f[midx]; + target_ulong mask = MAKE_64BIT_MASK(0, bits); + + /* + * If @bits is smaller than the tlb size, there may be multiple entries + * within the TLB; otherwise all addresses that match under @mask hit + * the same TLB entry. + * + * TODO: Perhaps allow bits to be a few bits less than the size. + * For now, just flush the entire TLB. + */ + if (mask < f->mask) { + tlb_debug("forcing full flush midx %d (" + TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + midx, page, mask); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); + return; + } + + /* Check if we need to flush due to large pages. */ + if ((page & d->large_page_mask) == d->large_page_addr) { + tlb_debug("forcing full flush midx %d (" + TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + midx, d->large_page_addr, d->large_page_mask); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); + return; + } + + if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { + tlb_n_used_entries_dec(env, midx); + } + tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); +} + +typedef struct { + target_ulong addr; + uint16_t idxmap; + uint16_t bits; +} TLBFlushPageBitsByMMUIdxData; + +static void +tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, + TLBFlushPageBitsByMMUIdxData d) +{ + CPUArchState *env = cpu->env_ptr; + int mmu_idx; + + assert_cpu_is_self(cpu); + + tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", + d.addr, d.bits, d.idxmap); + + qemu_spin_lock(&env_tlb(env)->c.lock); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + if ((d.idxmap >> mmu_idx) & 1) { + tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); + } + } + qemu_spin_unlock(&env_tlb(env)->c.lock); + + tb_flush_jmp_cache(cpu, d.addr); +} + +static bool encode_pbm_to_runon(run_on_cpu_data *out, + TLBFlushPageBitsByMMUIdxData d) +{ + /* We need 6 bits to hold to hold @bits up to 63. */ + if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { + *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); + return true; + } + return false; +} + +static TLBFlushPageBitsByMMUIdxData +decode_runon_to_pbm(run_on_cpu_data data) +{ + target_ulong addr_map_bits = (target_ulong) data.target_ptr; + return (TLBFlushPageBitsByMMUIdxData){ + .addr = addr_map_bits & TARGET_PAGE_MASK, + .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, + .bits = addr_map_bits & 0x3f + }; +} + +static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data runon) +{ + tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); +} + +static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, + run_on_cpu_data data) +{ + TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; + tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); + g_free(d); +} + +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx(cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx(cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (qemu_cpu_is_self(cpu)) { + tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); + } else if (encode_pbm_to_runon(&runon, d)) { + async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + } else { + TLBFlushPageBitsByMMUIdxData *p + = g_new(TLBFlushPageBitsByMMUIdxData, 1); + + /* Otherwise allocate a structure, freed by the worker. */ + *p = d; + async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (encode_pbm_to_runon(&runon, d)) { + flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + } else { + CPUState *dst_cpu; + TLBFlushPageBitsByMMUIdxData *p; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_run_on_cpu(dst_cpu, + tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } + } + } + + tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); +} + +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ + TLBFlushPageBitsByMMUIdxData d; + run_on_cpu_data runon; + + /* If all bits are significant, this devolves to tlb_flush_page. */ + if (bits >= TARGET_LONG_BITS) { + tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); + return; + } + /* If no page bits are significant, this devolves to tlb_flush. */ + if (bits < TARGET_PAGE_BITS) { + tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); + return; + } + + /* This should already be page aligned */ + d.addr = addr & TARGET_PAGE_MASK; + d.idxmap = idxmap; + d.bits = bits; + + if (encode_pbm_to_runon(&runon, d)) { + flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); + async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, + runon); + } else { + CPUState *dst_cpu; + TLBFlushPageBitsByMMUIdxData *p; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } + } + + p = g_new(TLBFlushPageBitsByMMUIdxData, 1); + *p = d; + async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(p)); + } +} + /* update the TLBs so that writes to code in the virtual page 'addr' can be detected */ void tlb_protect_code(ram_addr_t ram_addr) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 66f9b4c..4707ac1 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -251,6 +251,25 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap); * depend on when the guests translation ends the TB. */ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); + +/** + * tlb_flush_page_bits_by_mmuidx + * @cpu: CPU whose TLB should be flushed + * @addr: virtual address of page to be flushed + * @idxmap: bitmap of mmu indexes to flush + * @bits: number of significant bits in address + * + * Similar to tlb_flush_page_mask, but with a bitmap of indexes. + */ +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); + +/* Similarly, with broadcast and syncing. */ +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits); +void tlb_flush_page_bits_by_mmuidx_all_cpus_synced + (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); + /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for @@ -337,6 +356,23 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap) { } +static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, + target_ulong addr, + uint16_t idxmap, + unsigned bits) +{ +} +static inline void +tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, + uint16_t idxmap, unsigned bits) +{ +} #endif /** * probe_access: -- cgit v1.1 From ea04dce7bb4ccd3e464e5189c0d6d53510b7c212 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Oct 2020 14:07:54 -0700 Subject: target/arm: Use tlb_flush_page_bits_by_mmuidx* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When TBI is enabled in a given regime, 56 bits of the address are significant and we need to clear out any other matching virtual addresses with differing tags. The other uses of tlb_flush_page (without mmuidx) in this file are only used by aarch32 mode. Fixes: 38d931687fa1 Reported-by: Jordan Frank Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-id: 20201016210754.818257-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index cd0779f..f49b045 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -50,6 +50,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, #endif static void switch_mode(CPUARMState *env, int mode); +static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); static int vfp_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) { @@ -4457,6 +4458,33 @@ static int vae1_tlbmask(CPUARMState *env) } } +/* Return 56 if TBI is enabled, 64 otherwise. */ +static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, + uint64_t addr) +{ + uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; + int tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + int select = extract64(addr, 55, 1); + + return (tbi >> select) & 1 ? 56 : 64; +} + +static int vae1_tlbbits(CPUARMState *env, uint64_t addr) +{ + ARMMMUIdx mmu_idx; + + /* Only the regime of the mmu_idx below is significant. */ + if (arm_is_secure_below_el3(env)) { + mmu_idx = ARMMMUIdx_SE10_0; + } else if ((env->cp15.hcr_el2 & (HCR_E2H | HCR_TGE)) + == (HCR_E2H | HCR_TGE)) { + mmu_idx = ARMMMUIdx_E20_0; + } else { + mmu_idx = ARMMMUIdx_E10_0; + } + return tlbbits_for_regime(env, mmu_idx, addr); +} + static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -4593,8 +4621,9 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); int mask = vae1_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); } static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4608,11 +4637,12 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); int mask = vae1_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae1_tlbbits(env, pageaddr); if (tlb_force_broadcast(env)) { - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); } else { - tlb_flush_page_by_mmuidx(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); } } @@ -4621,9 +4651,10 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E2, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E2); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_E2, bits); } static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4631,9 +4662,10 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = tlbbits_for_regime(env, ARMMMUIdx_SE3, pageaddr); - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_SE3); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_SE3, bits); } static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, -- cgit v1.1 From 19d50149c857e50ccb1ee35dd4277f9d4954877f Mon Sep 17 00:00:00 2001 From: Havard Skinnemoen Date: Thu, 8 Oct 2020 16:21:49 -0700 Subject: tests/qtest: Add npcm7xx timer test This test exercises the various modes of the npcm7xx timer. In particular, it triggers the bug found by the fuzzer, as reported here: https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg02992.html It also found several other bugs, especially related to interrupt handling. The test exercises all the timers in all the timer modules, which expands to 180 test cases in total. Reviewed-by: Tyrone Ting Signed-off-by: Havard Skinnemoen Message-id: 20201008232154.94221-2-hskinnemoen@google.com Signed-off-by: Peter Maydell --- tests/qtest/meson.build | 1 + tests/qtest/npcm7xx_timer-test.c | 562 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 563 insertions(+) create mode 100644 tests/qtest/npcm7xx_timer-test.c diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 3987f96..28d4068 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -138,6 +138,7 @@ qtests_arm = \ ['arm-cpu-features', 'microbit-test', 'm25p80-test', + 'npcm7xx_timer-test', 'test-arm-mptimer', 'boot-serial-test', 'hexloader-test'] diff --git a/tests/qtest/npcm7xx_timer-test.c b/tests/qtest/npcm7xx_timer-test.c new file mode 100644 index 0000000..f08b0cd --- /dev/null +++ b/tests/qtest/npcm7xx_timer-test.c @@ -0,0 +1,562 @@ +/* + * QTest testcase for the Nuvoton NPCM7xx Timer + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "libqtest-single.h" + +#define TIM_REF_HZ (25000000) + +/* Bits in TCSRx */ +#define CEN BIT(30) +#define IE BIT(29) +#define MODE_ONESHOT (0 << 27) +#define MODE_PERIODIC (1 << 27) +#define CRST BIT(26) +#define CACT BIT(25) +#define PRESCALE(x) (x) + +/* Registers shared between all timers in a module. */ +#define TISR 0x18 +#define WTCR 0x1c +# define WTCLK(x) ((x) << 10) + +/* Power-on default; used to re-initialize timers before each test. */ +#define TCSR_DEFAULT PRESCALE(5) + +/* Register offsets for a timer within a timer block. */ +typedef struct Timer { + unsigned int tcsr_offset; + unsigned int ticr_offset; + unsigned int tdr_offset; +} Timer; + +/* A timer block containing 5 timers. */ +typedef struct TimerBlock { + int irq_base; + uint64_t base_addr; +} TimerBlock; + +/* Testdata for testing a particular timer within a timer block. */ +typedef struct TestData { + const TimerBlock *tim; + const Timer *timer; +} TestData; + +const TimerBlock timer_block[] = { + { + .irq_base = 32, + .base_addr = 0xf0008000, + }, + { + .irq_base = 37, + .base_addr = 0xf0009000, + }, + { + .irq_base = 42, + .base_addr = 0xf000a000, + }, +}; + +const Timer timer[] = { + { + .tcsr_offset = 0x00, + .ticr_offset = 0x08, + .tdr_offset = 0x10, + }, { + .tcsr_offset = 0x04, + .ticr_offset = 0x0c, + .tdr_offset = 0x14, + }, { + .tcsr_offset = 0x20, + .ticr_offset = 0x28, + .tdr_offset = 0x30, + }, { + .tcsr_offset = 0x24, + .ticr_offset = 0x2c, + .tdr_offset = 0x34, + }, { + .tcsr_offset = 0x40, + .ticr_offset = 0x48, + .tdr_offset = 0x50, + }, +}; + +/* Returns the index of the timer block. */ +static int tim_index(const TimerBlock *tim) +{ + ptrdiff_t diff = tim - timer_block; + + g_assert(diff >= 0 && diff < ARRAY_SIZE(timer_block)); + + return diff; +} + +/* Returns the index of a timer within a timer block. */ +static int timer_index(const Timer *t) +{ + ptrdiff_t diff = t - timer; + + g_assert(diff >= 0 && diff < ARRAY_SIZE(timer)); + + return diff; +} + +/* Returns the irq line for a given timer. */ +static int tim_timer_irq(const TestData *td) +{ + return td->tim->irq_base + timer_index(td->timer); +} + +/* Register read/write accessors. */ + +static void tim_write(const TestData *td, + unsigned int offset, uint32_t value) +{ + writel(td->tim->base_addr + offset, value); +} + +static uint32_t tim_read(const TestData *td, unsigned int offset) +{ + return readl(td->tim->base_addr + offset); +} + +static void tim_write_tcsr(const TestData *td, uint32_t value) +{ + tim_write(td, td->timer->tcsr_offset, value); +} + +static uint32_t tim_read_tcsr(const TestData *td) +{ + return tim_read(td, td->timer->tcsr_offset); +} + +static void tim_write_ticr(const TestData *td, uint32_t value) +{ + tim_write(td, td->timer->ticr_offset, value); +} + +static uint32_t tim_read_ticr(const TestData *td) +{ + return tim_read(td, td->timer->ticr_offset); +} + +static uint32_t tim_read_tdr(const TestData *td) +{ + return tim_read(td, td->timer->tdr_offset); +} + +/* Returns the number of nanoseconds to count the given number of cycles. */ +static int64_t tim_calculate_step(uint32_t count, uint32_t prescale) +{ + return (1000000000LL / TIM_REF_HZ) * count * (prescale + 1); +} + +/* Returns a bitmask corresponding to the timer under test. */ +static uint32_t tim_timer_bit(const TestData *td) +{ + return BIT(timer_index(td->timer)); +} + +/* Resets all timers to power-on defaults. */ +static void tim_reset(const TestData *td) +{ + int i, j; + + /* Reset all the timers, in case a previous test left a timer running. */ + for (i = 0; i < ARRAY_SIZE(timer_block); i++) { + for (j = 0; j < ARRAY_SIZE(timer); j++) { + writel(timer_block[i].base_addr + timer[j].tcsr_offset, + CRST | TCSR_DEFAULT); + } + writel(timer_block[i].base_addr + TISR, -1); + } +} + +/* Verifies the reset state of a timer. */ +static void test_reset(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_ticr(td), ==, 0); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_cmphex(tim_read(td, WTCR), ==, WTCLK(1)); +} + +/* Verifies that CRST wins if both CEN and CRST are set. */ +static void test_reset_overrides_enable(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + /* CRST should force CEN to 0 */ + tim_write_tcsr(td, CEN | CRST | TCSR_DEFAULT); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + g_assert_cmphex(tim_read(td, TISR), ==, 0); +} + +/* Verifies the behavior when CEN is set and then cleared. */ +static void test_oneshot_enable_then_disable(gconstpointer test_data) +{ + const TestData *td = test_data; + + tim_reset(td); + + /* Enable the timer with zero initial count, then disable it again. */ + tim_write_tcsr(td, CEN | TCSR_DEFAULT); + tim_write_tcsr(td, TCSR_DEFAULT); + + g_assert_cmphex(tim_read_tcsr(td), ==, TCSR_DEFAULT); + g_assert_cmphex(tim_read_tdr(td), ==, 0); + /* Timer interrupt flag should be set, but interrupts are not enabled. */ + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with prescaler 5. */ +static void test_oneshot_ps5(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 5; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Clear the interrupt flag. */ + tim_write(td, TISR, tim_timer_bit(td)); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Verify that this isn't a periodic timer. */ + clock_step(2 * tim_calculate_step(count, ps)); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with prescaler 0. */ +static void test_oneshot_ps0(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 1; + unsigned int ps = 0; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a one-shot timer fires when expected with highest prescaler. */ +static void test_oneshot_ps255(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = (1U << 24) - 1; + unsigned int ps = 255; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | PRESCALE(ps)); + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + + clock_step(tim_calculate_step(count, ps) - 1); + + g_assert_cmphex(tim_read_tcsr(td), ==, CEN | CACT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + clock_step(1); + + g_assert_cmphex(tim_read_tcsr(td), ==, PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that a oneshot timer fires an interrupt when expected. */ +static void test_oneshot_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 7; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* + * Verifies that the timer can be paused and later resumed, and it still fires + * at the right moment. + */ +static void test_pause_resume(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 1; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + + /* Pause the timer halfway to expiration. */ + clock_step(tim_calculate_step(count / 2, ps)); + tim_write_tcsr(td, IE | MODE_ONESHOT | PRESCALE(ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + + /* Counter should not advance during the following step. */ + clock_step(2 * tim_calculate_step(count, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Resume the timer and run _almost_ to expiration. */ + tim_write_tcsr(td, IE | CEN | MODE_ONESHOT | PRESCALE(ps)); + clock_step(tim_calculate_step(count / 2, ps) - 1); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + /* Now, run the rest of the way and verify that the interrupt fires. */ + clock_step(1); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); +} + +/* Verifies that the prescaler can be changed while the timer is runnin. */ +static void test_prescaler_change(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 256; + unsigned int ps = 5; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + + /* Run a quarter of the way, and change the prescaler. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, 3 * count / 4); + ps = 2; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, 3 * count / 4); + + /* Run another quarter of the way, and change the prescaler again. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + ps = 8; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, count / 2); + + /* Run another quarter of the way, and change the prescaler again. */ + clock_step(tim_calculate_step(count / 4, ps)); + g_assert_cmpuint(tim_read_tdr(td), ==, count / 4); + ps = 0; + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + /* The counter must not change. */ + g_assert_cmpuint(tim_read_tdr(td), ==, count / 4); + + /* Run almost to expiration, and verify the timer didn't fire yet. */ + clock_step(tim_calculate_step(count / 4, ps) - 1); + g_assert_cmpuint(tim_read_tdr(td), <, count); + g_assert_cmphex(tim_read(td, TISR), ==, 0); + + /* Now, run the rest of the way and verify that the timer fires. */ + clock_step(1); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); +} + +/* Verifies that a periodic timer automatically restarts after expiration. */ +static void test_periodic_no_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 2; + unsigned int ps = 3; + int i; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_PERIODIC | PRESCALE(ps)); + + for (i = 0; i < 4; i++) { + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + tim_write(td, TISR, tim_timer_bit(td)); + + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + } +} + +/* Verifies that a periodict timer fires an interrupt every time it expires. */ +static void test_periodic_interrupt(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 65535; + unsigned int ps = 2; + int i; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | IE | MODE_PERIODIC | PRESCALE(ps)); + + for (i = 0; i < 4; i++) { + clock_step_next(); + + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); + g_assert_true(qtest_get_irq(global_qtest, tim_timer_irq(td))); + + tim_write(td, TISR, tim_timer_bit(td)); + + g_assert_cmphex(tim_read(td, TISR), ==, 0); + g_assert_false(qtest_get_irq(global_qtest, tim_timer_irq(td))); + } +} + +/* + * Verifies that the timer behaves correctly when disabled right before and + * exactly when it's supposed to expire. + */ +static void test_disable_on_expiration(gconstpointer test_data) +{ + const TestData *td = test_data; + unsigned int count = 8; + unsigned int ps = 255; + + tim_reset(td); + + tim_write_ticr(td, count); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + + clock_step(tim_calculate_step(count, ps) - 1); + + tim_write_tcsr(td, MODE_ONESHOT | PRESCALE(ps)); + tim_write_tcsr(td, CEN | MODE_ONESHOT | PRESCALE(ps)); + clock_step(1); + tim_write_tcsr(td, MODE_ONESHOT | PRESCALE(ps)); + g_assert_cmphex(tim_read(td, TISR), ==, tim_timer_bit(td)); +} + +/* + * Constructs a name that includes the timer block, timer and testcase name, + * and adds the test to the test suite. + */ +static void tim_add_test(const char *name, const TestData *td, GTestDataFunc fn) +{ + g_autofree char *full_name; + + full_name = g_strdup_printf("npcm7xx_timer/tim[%d]/timer[%d]/%s", + tim_index(td->tim), timer_index(td->timer), + name); + qtest_add_data_func(full_name, td, fn); +} + +/* Convenience macro for adding a test with a predictable function name. */ +#define add_test(name, td) tim_add_test(#name, td, test_##name) + +int main(int argc, char **argv) +{ + TestData testdata[ARRAY_SIZE(timer_block) * ARRAY_SIZE(timer)]; + int ret; + int i, j; + + g_test_init(&argc, &argv, NULL); + g_test_set_nonfatal_assertions(); + + for (i = 0; i < ARRAY_SIZE(timer_block); i++) { + for (j = 0; j < ARRAY_SIZE(timer); j++) { + TestData *td = &testdata[i * ARRAY_SIZE(timer) + j]; + td->tim = &timer_block[i]; + td->timer = &timer[j]; + + add_test(reset, td); + add_test(reset_overrides_enable, td); + add_test(oneshot_enable_then_disable, td); + add_test(oneshot_ps5, td); + add_test(oneshot_ps0, td); + add_test(oneshot_ps255, td); + add_test(oneshot_interrupt, td); + add_test(pause_resume, td); + add_test(prescaler_change, td); + add_test(periodic_no_interrupt, td); + add_test(periodic_interrupt, td); + add_test(disable_on_expiration, td); + } + } + + qtest_start("-machine npcm750-evb"); + qtest_irq_intercept_in(global_qtest, "/machine/soc/a9mpcore/gic"); + ret = g_test_run(); + qtest_end(); + + return ret; +} -- cgit v1.1 From a0c0c9f8b4093bf1564d705d8977b6ba46cd2f5a Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 15 Oct 2020 11:51:47 +0200 Subject: loads-stores.rst: add footnote that clarifies GETPC usage Current documentation is not too clear on the GETPC usage. In particular, when used outside the top level helper function it causes unexpected behavior. Signed-off-by: Emanuele Giuseppe Esposito Message-id: 20201015095147.1691-1-e.emanuelegiuseppe@gmail.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- docs/devel/loads-stores.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index 9a944ef..59c1225 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -93,7 +93,13 @@ guest CPU state in case of a guest CPU exception. This is passed to ``cpu_restore_state()``. Therefore the value should either be 0, to indicate that the guest CPU state is already synchronized, or the result of ``GETPC()`` from the top level ``HELPER(foo)`` -function, which is a return address into the generated code. +function, which is a return address into the generated code [#gpc]_. + +.. [#gpc] Note that ``GETPC()`` should be used with great care: calling + it in other functions that are *not* the top level + ``HELPER(foo)`` will cause unexpected behavior. Instead, the + value of ``GETPC()`` should be read from the helper and passed + if needed to the functions that the helper calls. Function names follow the pattern: -- cgit v1.1 From b68a92f4cb16115025f41bc59e1b2f182a610370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 17 Oct 2020 20:07:30 +0200 Subject: hw/intc/bcm2835_ic: Trace GPU/CPU IRQ handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add trace events for GPU and CPU IRQs. Reviewed-by: Luc Michel Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201017180731.1165871-2-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/intc/bcm2835_ic.c | 4 +++- hw/intc/trace-events | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/intc/bcm2835_ic.c b/hw/intc/bcm2835_ic.c index 53ab8f5..9000d99 100644 --- a/hw/intc/bcm2835_ic.c +++ b/hw/intc/bcm2835_ic.c @@ -18,6 +18,7 @@ #include "migration/vmstate.h" #include "qemu/log.h" #include "qemu/module.h" +#include "trace.h" #define GPU_IRQS 64 #define ARM_IRQS 8 @@ -51,7 +52,6 @@ static void bcm2835_ic_update(BCM2835ICState *s) set = (s->gpu_irq_level & s->gpu_irq_enable) || (s->arm_irq_level & s->arm_irq_enable); qemu_set_irq(s->irq, set); - } static void bcm2835_ic_set_gpu_irq(void *opaque, int irq, int level) @@ -59,6 +59,7 @@ static void bcm2835_ic_set_gpu_irq(void *opaque, int irq, int level) BCM2835ICState *s = opaque; assert(irq >= 0 && irq < 64); + trace_bcm2835_ic_set_gpu_irq(irq, level); s->gpu_irq_level = deposit64(s->gpu_irq_level, irq, 1, level != 0); bcm2835_ic_update(s); } @@ -68,6 +69,7 @@ static void bcm2835_ic_set_arm_irq(void *opaque, int irq, int level) BCM2835ICState *s = opaque; assert(irq >= 0 && irq < 8); + trace_bcm2835_ic_set_cpu_irq(irq, level); s->arm_irq_level = deposit32(s->arm_irq_level, irq, 1, level != 0); bcm2835_ic_update(s); } diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 527c3f7..22782b3 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -199,3 +199,7 @@ nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg wri heathrow_write(uint64_t addr, unsigned int n, uint64_t value) "0x%"PRIx64" %u: 0x%"PRIx64 heathrow_read(uint64_t addr, unsigned int n, uint64_t value) "0x%"PRIx64" %u: 0x%"PRIx64 heathrow_set_irq(int num, int level) "set_irq: num=0x%02x level=%d" + +# bcm2835_ic.c +bcm2835_ic_set_gpu_irq(int irq, int level) "GPU irq #%d level %d" +bcm2835_ic_set_cpu_irq(int irq, int level) "CPU irq #%d level %d" -- cgit v1.1 From e7534f29b1587527613fdce3e460ac4720e5d18b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 17 Oct 2020 20:07:31 +0200 Subject: hw/intc/bcm2836_control: Use IRQ definitions instead of magic numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IRQ values are defined few lines earlier, use them instead of the magic numbers. Signed-off-by: Philippe Mathieu-Daudé Message-id: 20201017180731.1165871-3-f4bug@amsat.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/intc/bcm2836_control.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/intc/bcm2836_control.c b/hw/intc/bcm2836_control.c index 53dba00..2ead76f 100644 --- a/hw/intc/bcm2836_control.c +++ b/hw/intc/bcm2836_control.c @@ -157,22 +157,22 @@ static void bcm2836_control_set_local_irq(void *opaque, int core, int local_irq, static void bcm2836_control_set_local_irq0(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 0, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTPSIRQ, level); } static void bcm2836_control_set_local_irq1(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 1, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTPNSIRQ, level); } static void bcm2836_control_set_local_irq2(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 2, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTHPIRQ, level); } static void bcm2836_control_set_local_irq3(void *opaque, int core, int level) { - bcm2836_control_set_local_irq(opaque, core, 3, level); + bcm2836_control_set_local_irq(opaque, core, IRQ_CNTVIRQ, level); } static void bcm2836_control_set_gpu_irq(void *opaque, int irq, int level) -- cgit v1.1 From 4aedfc0f633fd06dd2a5dc8ffa93f4c56117e37f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 8 Oct 2020 11:21:53 -0500 Subject: target/arm: Remove redundant mmu_idx lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have the full ARMMMUIdx as computed from the function parameter. For the purpose of regime_has_2_ranges, we can ignore any difference between AccType_Normal and AccType_Unpriv, which would be the only difference between the passed mmu_idx and arm_mmu_idx_el. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Vincenzo Frascino Tested-by: Vincenzo Frascino Message-id: 20201008162155.161886-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 5615c67..734cc5c 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -563,8 +563,7 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, case 2: /* Tag check fail causes asynchronous flag set. */ - mmu_idx = arm_mmu_idx_el(env, el); - if (regime_has_2_ranges(mmu_idx)) { + if (regime_has_2_ranges(arm_mmu_idx)) { select = extract64(dirty_ptr, 55, 1); } else { select = 0; -- cgit v1.1 From 50244cc76abcac3296cff3d84826f5ff71808c80 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 8 Oct 2020 11:21:54 -0500 Subject: target/arm: Fix reported EL for mte_check_fail The reporting in AArch64.TagCheckFail only depends on PSTATE.EL, and not the AccType of the operation. There are two guest visible problems that affect LDTR and STTR because of this: (1) Selecting TCF0 vs TCF1 to decide on reporting, (2) Report "data abort same el" not "data abort lower el". Reported-by: Vincenzo Frascino Signed-off-by: Richard Henderson Reviewed-by: Vincenzo Frascino Tested-by: Vincenzo Frascino Message-id: 20201008162155.161886-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 734cc5c..153bd1e 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -525,14 +525,10 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, reg_el = regime_el(env, arm_mmu_idx); sctlr = env->cp15.sctlr_el[reg_el]; - switch (arm_mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - el = 0; + el = arm_current_el(env); + if (el == 0) { tcf = extract64(sctlr, 38, 2); - break; - default: - el = reg_el; + } else { tcf = extract64(sctlr, 40, 2); } -- cgit v1.1 From 4301acd7d7d455792ea873ced75c0b5d653618b1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 8 Oct 2020 11:21:55 -0500 Subject: target/arm: Ignore HCR_EL2.ATA when {E2H,TGE} != 11 Unlike many other bits in HCR_EL2, the description for this bit does not contain the phrase "if ... this field behaves as 0 for all purposes other than", so do not squash the bit in arm_hcr_el2_eff. Instead, replicate the E2H+TGE test in the two places that require it. Reported-by: Vincenzo Frascino Signed-off-by: Richard Henderson Reviewed-by: Vincenzo Frascino Tested-by: Vincenzo Frascino Message-id: 20201008162155.161886-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 9 +++++---- target/arm/internals.h | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index f49b045..97bb6b8 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6906,10 +6906,11 @@ static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, { int el = arm_current_el(env); - if (el < 2 && - arm_feature(env, ARM_FEATURE_EL2) && - !(arm_hcr_el2_eff(env) & HCR_ATA)) { - return CP_ACCESS_TRAP_EL2; + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + uint64_t hcr = arm_hcr_el2_eff(env); + if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { + return CP_ACCESS_TRAP_EL2; + } } if (el < 3 && arm_feature(env, ARM_FEATURE_EL3) && diff --git a/target/arm/internals.h b/target/arm/internals.h index ae99725..5460678 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1252,10 +1252,11 @@ static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, && !(env->cp15.scr_el3 & SCR_ATA)) { return false; } - if (el < 2 - && arm_feature(env, ARM_FEATURE_EL2) - && !(arm_hcr_el2_eff(env) & HCR_ATA)) { - return false; + if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + uint64_t hcr = arm_hcr_el2_eff(env); + if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { + return false; + } } sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); return sctlr != 0; -- cgit v1.1 From 3cd27b58dd2045580689bdece677fa14e12c324d Mon Sep 17 00:00:00 2001 From: Peng Liang Date: Mon, 19 Oct 2020 17:34:01 +0800 Subject: microbit_i2c: Fix coredump when dump-vmstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMStateDescription.fields should be end with VMSTATE_END_OF_LIST(). However, microbit_i2c_vmstate doesn't follow it. Let's change it. Fixes: 9d68bf564e ("arm: Stub out NRF51 TWI magnetometer/accelerometer detection") Reported-by: Euler Robot Signed-off-by: Peng Liang Reviewed-by: Philippe Mathieu-Daudé Message-id: 20201019093401.2993833-1-liangpeng10@huawei.com Signed-off-by: Peter Maydell --- hw/i2c/microbit_i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/i2c/microbit_i2c.c b/hw/i2c/microbit_i2c.c index 8024739..e92f9f8 100644 --- a/hw/i2c/microbit_i2c.c +++ b/hw/i2c/microbit_i2c.c @@ -83,6 +83,7 @@ static const VMStateDescription microbit_i2c_vmstate = { .fields = (VMStateField[]) { VMSTATE_UINT32_ARRAY(regs, MicrobitI2CState, MICROBIT_I2C_NREGS), VMSTATE_UINT32(read_idx, MicrobitI2CState), + VMSTATE_END_OF_LIST() }, }; -- cgit v1.1 From b3267ff675dd410b4c2a569e209cb7d468cf1873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Oct 2020 11:51:48 +0200 Subject: hw/arm/nseries: Fix loading kernel image on n8x0 machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7998beb9c2e removed the ram_size initialization in the arm_boot_info structure, however it is used by arm_load_kernel(). Initialize the field to fix: $ qemu-system-arm -M n800 -append 'console=ttyS1' \ -kernel meego-arm-n8x0-1.0.80.20100712.1431-vmlinuz-2.6.35~rc4-129.1-n8x0 qemu-system-arm: kernel 'meego-arm-n8x0-1.0.80.20100712.1431-vmlinuz-2.6.35~rc4-129.1-n8x0' is too large to fit in RAM (kernel size 1964608, RAM size 0) Noticed while running the test introduced in commit 050a82f0c5b ("tests/acceptance: Add a test for the N800 and N810 arm machines"). Fixes: 7998beb9c2e ("arm/nseries: use memdev for RAM") Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Tested-by: Thomas Huth Message-id: 20201019095148.1602119-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/arm/nseries.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index e48092c..76fd7fe 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -1318,6 +1318,7 @@ static void n8x0_init(MachineState *machine, g_free(sz); exit(EXIT_FAILURE); } + binfo->ram_size = machine->ram_size; memory_region_add_subregion(get_system_memory(), OMAP2_Q2_BASE, machine->ram); -- cgit v1.1 From 514101c0b931f0a11a40d29d26af1cc40482f951 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:52 +0100 Subject: decodetree: Fix codegen for non-overlapping group inside overlapping group For nested groups like: { [ pattern 1 pattern 2 ] pattern 3 } the intended behaviour is that patterns 1 and 2 must not overlap with each other; if the insn matches neither then we fall through to pattern 3 as the next thing in the outer overlapping group. Currently we generate incorrect code for this situation, because in the code path for a failed match inside the inner non-overlapping group we generate a "return" statement, which causes decode to stop entirely rather than continuing to the next thing in the outer group. Generate a "break" instead, so that decode flow behaves as required for this nested group case. Suggested-by: Richard Henderson Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-2-peter.maydell@linaro.org --- scripts/decodetree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 60fd3b5e..c1bf3cf 100644 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -548,7 +548,7 @@ class Tree: output(ind, ' /* ', str_match_bits(innerbits, innermask), ' */\n') s.output_code(i + 4, extracted, innerbits, innermask) - output(ind, ' return false;\n') + output(ind, ' break;\n') output(ind, '}\n') # end Tree -- cgit v1.1 From 5d2555a1fe7370feeb1efbbf276a653040910017 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:53 +0100 Subject: target/arm: Implement v8.1M NOCP handling From v8.1M, disabled-coprocessor handling changes slightly: * coprocessors 8, 9, 14 and 15 are also governed by the cp10 enable bit, like cp11 * an extra range of instruction patterns is considered to be inside the coprocessor space We previously marked these up with TODO comments; implement the correct behaviour. Unfortunately there is no ID register field which indicates this behaviour. We could in theory test an unrelated ID register which indicates guaranteed-to-be-in-v8.1M behaviour like ID_ISAR0.CmpBranch >= 3 (low-overhead-loops), but it seems better to simply define a new ARM_FEATURE_V8_1M feature flag and use it for this and other new-in-v8.1M behaviour that isn't identifiable from the ID registers. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201019151301.2046-3-peter.maydell@linaro.org --- target/arm/cpu.h | 1 + target/arm/m-nocp.decode | 10 ++++++---- target/arm/translate-vfp.c.inc | 17 +++++++++++++++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index cfff1b5..74392fa 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1985,6 +1985,7 @@ enum arm_features { ARM_FEATURE_VBAR, /* has cp15 VBAR */ ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ + ARM_FEATURE_V8_1M, /* M profile extras only in v8.1M and later */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target/arm/m-nocp.decode b/target/arm/m-nocp.decode index 7182d7d..28c8ac6 100644 --- a/target/arm/m-nocp.decode +++ b/target/arm/m-nocp.decode @@ -29,14 +29,16 @@ # If the coprocessor is not present or disabled then we will generate # the NOCP exception; otherwise we let the insn through to the main decode. +&nocp cp + { # Special cases which do not take an early NOCP: VLLDM and VLSTM VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 0000 0000 # TODO: VSCCLRM (new in v8.1M) is similar: #VSCCLRM 1110 1100 1-01 1111 ---- 1011 ---- ---0 - NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- - NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- - # TODO: From v8.1M onwards we will also want this range to NOCP - #NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- cp=10 + NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp + NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- &nocp + # From v8.1M onwards this range will also NOCP: + NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- &nocp cp=10 } diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc index 9b11b81..a7ed9bc 100644 --- a/target/arm/translate-vfp.c.inc +++ b/target/arm/translate-vfp.c.inc @@ -3459,7 +3459,7 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) return true; } -static bool trans_NOCP(DisasContext *s, arg_NOCP *a) +static bool trans_NOCP(DisasContext *s, arg_nocp *a) { /* * Handle M-profile early check for disabled coprocessor: @@ -3472,7 +3472,11 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a) if (a->cp == 11) { a->cp = 10; } - /* TODO: in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */ + if (arm_dc_feature(s, ARM_FEATURE_V8_1M) && + (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) { + /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */ + a->cp = 10; + } if (a->cp != 10) { gen_exception_insn(s, s->pc_curr, EXCP_NOCP, @@ -3489,6 +3493,15 @@ static bool trans_NOCP(DisasContext *s, arg_NOCP *a) return false; } +static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a) +{ + /* This range needs a coprocessor check for v8.1M and later only */ + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + return false; + } + return trans_NOCP(s, a); +} + static bool trans_VINS(DisasContext *s, arg_VINS *a) { TCGv_i32 rd, rm; -- cgit v1.1 From cc73bbded0dfb5612b0e416f7eda13a66950542a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:54 +0100 Subject: target/arm: Implement v8.1M conditional-select insns v8.1M brings four new insns to M-profile: * CSEL : Rd = cond ? Rn : Rm * CSINC : Rd = cond ? Rn : Rm+1 * CSINV : Rd = cond ? Rn : ~Rm * CSNEG : Rd = cond ? Rn : -Rm Implement these. Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-4-peter.maydell@linaro.org --- target/arm/t32.decode | 3 +++ target/arm/translate.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/target/arm/t32.decode b/target/arm/t32.decode index 7069d82..d8454bd 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -90,6 +90,9 @@ SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi } RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi +# v8.1M CSEL and friends +CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4 + # Data-processing (register-shifted register) MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \ diff --git a/target/arm/translate.c b/target/arm/translate.c index d8729e4..9f2201c 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8262,6 +8262,66 @@ static bool trans_IT(DisasContext *s, arg_IT *a) return true; } +/* v8.1M CSEL/CSINC/CSNEG/CSINV */ +static bool trans_CSEL(DisasContext *s, arg_CSEL *a) +{ + TCGv_i32 rn, rm, zero; + DisasCompare c; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { + return false; + } + + if (a->rm == 13) { + /* SEE "Related encodings" (MVE shifts) */ + return false; + } + + if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + + /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */ + if (a->rn == 15) { + rn = tcg_const_i32(0); + } else { + rn = load_reg(s, a->rn); + } + if (a->rm == 15) { + rm = tcg_const_i32(0); + } else { + rm = load_reg(s, a->rm); + } + + switch (a->op) { + case 0: /* CSEL */ + break; + case 1: /* CSINC */ + tcg_gen_addi_i32(rm, rm, 1); + break; + case 2: /* CSINV */ + tcg_gen_not_i32(rm, rm); + break; + case 3: /* CSNEG */ + tcg_gen_neg_i32(rm, rm); + break; + default: + g_assert_not_reached(); + } + + arm_test_cc(&c, a->fcond); + zero = tcg_const_i32(0); + tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm); + arm_free_cc(&c); + tcg_temp_free_i32(zero); + + store_reg(s, a->rd, rn); + tcg_temp_free_i32(rm); + + return true; +} + /* * Legacy decoder. */ -- cgit v1.1 From 45f11876ae86128bdee27e0b089045de43cc88e4 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:55 +0100 Subject: target/arm: Make the t32 insn[25:23]=111 group non-overlapping The t32 decode has a group which represents a set of insns which overlap with B_cond_thumb because they have [25:23]=111 (which is an invalid condition code field for the branch insn). This group is currently defined using the {} overlap-OK syntax, but it is almost entirely non-overlapping patterns. Switch it over to use a non-overlapping group. For this to be valid syntactically, CPS must move into the same overlapping-group as the hint insns (CPS vs hints was the only actual use of the overlap facility for the group). The non-overlapping subgroup for CLREX/DSB/DMB/ISB/SB is no longer necessary and so we can remove it (promoting those insns to be members of the parent group). Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-5-peter.maydell@linaro.org --- target/arm/t32.decode | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/target/arm/t32.decode b/target/arm/t32.decode index d8454bd..7d5e000 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -296,8 +296,8 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm { # Group insn[25:23] = 111, which is cond=111x for the branch below, # or unconditional, which would be illegal for the branch. - { - # Hints + [ + # Hints, and CPS { YIELD 1111 0011 1010 1111 1000 0000 0000 0001 WFE 1111 0011 1010 1111 1000 0000 0000 0010 @@ -310,20 +310,18 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm # The canonical nop ends in 0000 0000, but the whole rest # of the space is "reserved hint, behaves as nop". NOP 1111 0011 1010 1111 1000 0000 ---- ---- - } - # If imod == '00' && M == '0' then SEE "Hint instructions", above. - CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \ + # If imod == '00' && M == '0' then SEE "Hint instructions", above. + CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \ &cps + } # Miscellaneous control - [ - CLREX 1111 0011 1011 1111 1000 1111 0010 1111 - DSB 1111 0011 1011 1111 1000 1111 0100 ---- - DMB 1111 0011 1011 1111 1000 1111 0101 ---- - ISB 1111 0011 1011 1111 1000 1111 0110 ---- - SB 1111 0011 1011 1111 1000 1111 0111 0000 - ] + CLREX 1111 0011 1011 1111 1000 1111 0010 1111 + DSB 1111 0011 1011 1111 1000 1111 0100 ---- + DMB 1111 0011 1011 1111 1000 1111 0101 ---- + ISB 1111 0011 1011 1111 1000 1111 0110 ---- + SB 1111 0011 1011 1111 1000 1111 0111 0000 # Note that the v7m insn overlaps both the normal and banked insn. { @@ -351,7 +349,7 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm HVC 1111 0111 1110 .... 1000 .... .... .... \ &i imm=%imm16_16_0 UDF 1111 0111 1111 ---- 1010 ---- ---- ---- - } + ] B_cond_thumb 1111 0. cond:4 ...... 10.0 ............ &ci imm=%imm21 } -- cgit v1.1 From 920f04fa3ea789f8f85a52cee5395b8887b56cf7 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:56 +0100 Subject: target/arm: Don't allow BLX imm for M-profile The BLX immediate insn in the Thumb encoding always performs a switch from Thumb to Arm state. This would be totally useless in M-profile which has no Arm decoder, and so the instruction does not exist at all there. Make the encoding UNDEF for M-profile. (This part of the encoding space is used for the branch-future and low-overhead-loop insns in v8.1M.) Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-6-peter.maydell@linaro.org --- target/arm/translate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/arm/translate.c b/target/arm/translate.c index 9f2201c..dc3a403 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7918,6 +7918,14 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a) { TCGv_i32 tmp; + /* + * BLX would be useless on M-profile; the encoding space + * is used for other insns from v8.1M onward, and UNDEFs before that. + */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + return false; + } + /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */ if (s->thumb && (a->imm & 2)) { return false; -- cgit v1.1 From 05903f036edba8e3ed940cc215b8e27fb49265b9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:57 +0100 Subject: target/arm: Implement v8.1M branch-future insns (as NOPs) v8.1M implements a new 'branch future' feature, which is a set of instructions that request the CPU to perform a branch "in the future", when it reaches a particular execution address. In hardware, the expected implementation is that the information about the branch location and destination is cached and then acted upon when execution reaches the specified address. However the architecture permits an implementation to discard this cached information at any point, and so guest code must always include a normal branch insn at the branch point as a fallback. In particular, an implementation is specifically permitted to treat all BF insns as NOPs (which is equivalent to discarding the cached information immediately). For QEMU, implementing this caching of branch information would be complicated and would not improve the speed of execution at all, so we make the IMPDEF choice to implement all BF insns as NOPs. Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-7-peter.maydell@linaro.org --- target/arm/cpu.h | 6 ++++++ target/arm/t32.decode | 13 ++++++++++++- target/arm/translate.c | 20 ++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 74392fa..a432f30 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3473,6 +3473,12 @@ static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; } +static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) +{ + /* (M-profile) low-overhead loops and branch future */ + return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; +} + static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; diff --git a/target/arm/t32.decode b/target/arm/t32.decode index 7d5e000..3015731 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -648,4 +648,15 @@ MRC 1110 1110 ... 1 .... .... .... ... 1 .... @mcr B 1111 0. .......... 10.1 ............ @branch24 BL 1111 0. .......... 11.1 ............ @branch24 -BLX_i 1111 0. .......... 11.0 ............ @branch24 +{ + # BLX_i is non-M-profile only + BLX_i 1111 0. .......... 11.0 ............ @branch24 + # M-profile only: loop and branch insns + [ + # All these BF insns have boff != 0b0000; we NOP them all + BF 1111 0 boff:4 ------- 1100 - ---------- 1 # BFL + BF 1111 0 boff:4 0 ------ 1110 - ---------- 1 # BFCSEL + BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF + BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX + ] +} diff --git a/target/arm/translate.c b/target/arm/translate.c index dc3a403..a5ebe56 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7971,6 +7971,26 @@ static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a) return true; } +static bool trans_BF(DisasContext *s, arg_BF *a) +{ + /* + * M-profile branch future insns. The architecture permits an + * implementation to implement these as NOPs (equivalent to + * discarding the LO_BRANCH_INFO cache immediately), and we + * take that IMPDEF option because for QEMU a "real" implementation + * would be complicated and wouldn't execute any faster. + */ + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->boff == 0) { + /* SEE "Related encodings" (loop insns) */ + return false; + } + /* Handle as NOP */ + return true; +} + static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) { TCGv_i32 addr, tmp; -- cgit v1.1 From b7226369721896ab9ef71544e4fe95b40710e05a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:58 +0100 Subject: target/arm: Implement v8.1M low-overhead-loop instructions v8.1M's "low-overhead-loop" extension has three instructions for looping: * DLS (start of a do-loop) * WLS (start of a while-loop) * LE (end of a loop) The loop-start instructions are both simple operations to start a loop whose iteration count (if any) is in LR. The loop-end instruction handles "decrement iteration count and jump back to loop start"; it also caches the information about the branch back to the start of the loop to improve performance of the branch on subsequent iterations. As with the branch-future instructions, the architecture permits an implementation to discard the LO_BRANCH_INFO cache at any time, and QEMU takes the IMPDEF option to never set it in the first place (equivalent to discarding it immediately), because for us a "real" implementation would be unnecessary complexity. (This implementation only provides the simple looping constructs; the vector extension MVE (Helium) adds some extra variants to handle looping across vectors. We'll add those later when we implement MVE.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201019151301.2046-8-peter.maydell@linaro.org --- target/arm/t32.decode | 8 +++++ target/arm/translate.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/target/arm/t32.decode b/target/arm/t32.decode index 3015731..8152739 100644 --- a/target/arm/t32.decode +++ b/target/arm/t32.decode @@ -659,4 +659,12 @@ BL 1111 0. .......... 11.1 ............ @branch24 BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX ] + [ + # LE and WLS immediate + %lob_imm 1:10 11:1 !function=times_2 + + DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 + WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm + LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm + ] } diff --git a/target/arm/translate.c b/target/arm/translate.c index a5ebe56..38371db 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -2490,17 +2490,23 @@ static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) s->base.is_jmp = DISAS_NORETURN; } -static inline void gen_jmp (DisasContext *s, uint32_t dest) +/* Jump, specifying which TB number to use if we gen_goto_tb() */ +static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) { if (unlikely(is_singlestepping(s))) { /* An indirect jump so that we still trigger the debug exception. */ gen_set_pc_im(s, dest); s->base.is_jmp = DISAS_JUMP; } else { - gen_goto_tb(s, 0, dest); + gen_goto_tb(s, tbno, dest); } } +static inline void gen_jmp(DisasContext *s, uint32_t dest) +{ + gen_jmp_tb(s, dest, 0); +} + static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y) { if (x) @@ -7991,6 +7997,89 @@ static bool trans_BF(DisasContext *s, arg_BF *a) return true; } +static bool trans_DLS(DisasContext *s, arg_DLS *a) +{ + /* M-profile low-overhead loop start */ + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->rn == 13 || a->rn == 15) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + + /* Not a while loop, no tail predication: just set LR to the count */ + tmp = load_reg(s, a->rn); + store_reg(s, 14, tmp); + return true; +} + +static bool trans_WLS(DisasContext *s, arg_WLS *a) +{ + /* M-profile low-overhead while-loop start */ + TCGv_i32 tmp; + TCGLabel *nextlabel; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + if (a->rn == 13 || a->rn == 15) { + /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ + return false; + } + if (s->condexec_mask) { + /* + * WLS in an IT block is CONSTRAINED UNPREDICTABLE; + * we choose to UNDEF, because otherwise our use of + * gen_goto_tb(1) would clash with the use of TB exit 1 + * in the dc->condjmp condition-failed codepath in + * arm_tr_tb_stop() and we'd get an assertion. + */ + return false; + } + nextlabel = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel); + tmp = load_reg(s, a->rn); + store_reg(s, 14, tmp); + gen_jmp_tb(s, s->base.pc_next, 1); + + gen_set_label(nextlabel); + gen_jmp(s, read_pc(s) + a->imm); + return true; +} + +static bool trans_LE(DisasContext *s, arg_LE *a) +{ + /* + * M-profile low-overhead loop end. The architecture permits an + * implementation to discard the LO_BRANCH_INFO cache at any time, + * and we take the IMPDEF option to never set it in the first place + * (equivalent to always discarding it immediately), because for QEMU + * a "real" implementation would be complicated and wouldn't execute + * any faster. + */ + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_lob, s)) { + return false; + } + + if (!a->f) { + /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */ + arm_gen_condlabel(s); + tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel); + /* Decrement LR */ + tmp = load_reg(s, 14); + tcg_gen_addi_i32(tmp, tmp, -1); + store_reg(s, 14, tmp); + } + /* Jump back to the loop start */ + gen_jmp(s, read_pc(s) - a->imm); + return true; +} + static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) { TCGv_i32 addr, tmp; -- cgit v1.1 From 532a3af5fbd348bca371b4a56b45f8f97c7c5519 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:12:59 +0100 Subject: target/arm: Fix has_vfp/has_neon ID reg squashing for M-profile In arm_cpu_realizefn(), if the CPU has VFP or Neon disabled then we squash the ID register fields so that we don't advertise it to the guest. This code was written for A-profile and needs some tweaks to work correctly on M-profile: * A-profile only fields should not be zeroed on M-profile: - MVFR0.FPSHVEC,FPTRAP - MVFR1.SIMDLS,SIMDINT,SIMDSP,SIMDHP - MVFR2.SIMDMISC * M-profile only fields should be zeroed on M-profile: - MVFR1.FP16 In particular, because MVFR1.SIMDHP on A-profile is the same field as MVFR1.FP16 on M-profile this code was incorrectly disabling FP16 support on an M-profile CPU (where has_neon is always false). This isn't a visible bug yet because we don't have any M-profile CPUs with FP16 support, but the change is necessary before we introduce any. Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20201019151301.2046-9-peter.maydell@linaro.org --- target/arm/cpu.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 0563198..186ee62 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1429,17 +1429,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, FPSP, 0); u = FIELD_DP32(u, MVFR0, FPDP, 0); - u = FIELD_DP32(u, MVFR0, FPTRAP, 0); u = FIELD_DP32(u, MVFR0, FPDIVIDE, 0); u = FIELD_DP32(u, MVFR0, FPSQRT, 0); - u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); u = FIELD_DP32(u, MVFR0, FPROUND, 0); + if (!arm_feature(env, ARM_FEATURE_M)) { + u = FIELD_DP32(u, MVFR0, FPTRAP, 0); + u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); + } cpu->isar.mvfr0 = u; u = cpu->isar.mvfr1; u = FIELD_DP32(u, MVFR1, FPFTZ, 0); u = FIELD_DP32(u, MVFR1, FPDNAN, 0); u = FIELD_DP32(u, MVFR1, FPHP, 0); + if (arm_feature(env, ARM_FEATURE_M)) { + u = FIELD_DP32(u, MVFR1, FP16, 0); + } cpu->isar.mvfr1 = u; u = cpu->isar.mvfr2; @@ -1475,16 +1480,18 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = FIELD_DP32(u, ID_ISAR6, FHM, 0); cpu->isar.id_isar6 = u; - u = cpu->isar.mvfr1; - u = FIELD_DP32(u, MVFR1, SIMDLS, 0); - u = FIELD_DP32(u, MVFR1, SIMDINT, 0); - u = FIELD_DP32(u, MVFR1, SIMDSP, 0); - u = FIELD_DP32(u, MVFR1, SIMDHP, 0); - cpu->isar.mvfr1 = u; - - u = cpu->isar.mvfr2; - u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); - cpu->isar.mvfr2 = u; + if (!arm_feature(env, ARM_FEATURE_M)) { + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, SIMDLS, 0); + u = FIELD_DP32(u, MVFR1, SIMDINT, 0); + u = FIELD_DP32(u, MVFR1, SIMDSP, 0); + u = FIELD_DP32(u, MVFR1, SIMDHP, 0); + cpu->isar.mvfr1 = u; + + u = cpu->isar.mvfr2; + u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); + cpu->isar.mvfr2 = u; + } } if (!cpu->has_neon && !cpu->has_vfp) { -- cgit v1.1 From d31e2ce68d56f5bcc83831497e5fe4b8a7e18e85 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:13:00 +0100 Subject: target/arm: Allow M-profile CPUs with FP16 to set FPSCR.FP16 M-profile CPUs with half-precision floating point support should be able to write to FPSCR.FZ16, but an M-profile specific masking of the value at the top of vfp_set_fpscr() currently prevents that. This is not yet an active bug because we have no M-profile FP16 CPUs, but needs to be fixed before we can add any. The bits that the masking is effectively preventing from being set are the A-profile only short-vector Len and Stride fields, plus the Neon QC bit. Rearrange the order of the function so that those fields are handled earlier and only under a suitable guard; this allows us to drop the M-profile specific masking, making FZ16 writeable. This change also makes the QC bit correctly RAZ/WI for older no-Neon A-profile cores. This refactoring also paves the way for the low-overhead-branch LTPSIZE field, which uses some of the bits that are used for A-profile Stride and Len. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201019151301.2046-10-peter.maydell@linaro.org --- target/arm/vfp_helper.c | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index abfdb6a..3648564 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -194,36 +194,45 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) val &= ~FPCR_FZ16; } - if (arm_feature(env, ARM_FEATURE_M)) { + vfp_set_fpscr_to_host(env, val); + + if (!arm_feature(env, ARM_FEATURE_M)) { /* - * M profile FPSCR is RES0 for the QC, STRIDE, FZ16, LEN bits - * and also for the trapped-exception-handling bits IxE. + * Short-vector length and stride; on M-profile these bits + * are used for different purposes. + * We can't make this conditional be "if MVFR0.FPShVec != 0", + * because in v7A no-short-vector-support cores still had to + * allow Stride/Len to be written with the only effect that + * some insns are required to UNDEF if the guest sets them. + * + * TODO: if M-profile MVE implemented, set LTPSIZE. */ - val &= 0xf7c0009f; + env->vfp.vec_len = extract32(val, 16, 3); + env->vfp.vec_stride = extract32(val, 20, 2); } - vfp_set_fpscr_to_host(env, val); + if (arm_feature(env, ARM_FEATURE_NEON)) { + /* + * The bit we set within fpscr_q is arbitrary; the register as a + * whole being zero/non-zero is what counts. + * TODO: M-profile MVE also has a QC bit. + */ + env->vfp.qc[0] = val & FPCR_QC; + env->vfp.qc[1] = 0; + env->vfp.qc[2] = 0; + env->vfp.qc[3] = 0; + } /* * We don't implement trapped exception handling, so the * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) * - * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC - * (which are stored in fp_status), and the other RES0 bits - * in between, then we clear all of the low 16 bits. + * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in + * fp_status; QC, Len and Stride are stored separately earlier. + * Clear out all of those and the RES0 bits: only NZCV, AHP, DN, + * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR]. */ env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000; - env->vfp.vec_len = (val >> 16) & 7; - env->vfp.vec_stride = (val >> 20) & 3; - - /* - * The bit we set within fpscr_q is arbitrary; the register as a - * whole being zero/non-zero is what counts. - */ - env->vfp.qc[0] = val & FPCR_QC; - env->vfp.qc[1] = 0; - env->vfp.qc[2] = 0; - env->vfp.qc[3] = 0; } void vfp_set_fpscr(CPUARMState *env, uint32_t val) -- cgit v1.1 From 8128c8e8cc9489a8387c74075974f86dc0222e7f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Oct 2020 16:13:01 +0100 Subject: target/arm: Implement FPSCR.LTPSIZE for M-profile LOB extension If the M-profile low-overhead-branch extension is implemented, FPSCR bits [18:16] are a new field LTPSIZE. If MVE is not implemented (currently always true for us) then this field always reads as 4 and ignores writes. These bits used to be the vector-length field for the old short-vector extension, so we need to take care that they are not misinterpreted as setting vec_len. We do this with a rearrangement of the vfp_set_fpscr() code that deals with vec_len, vec_stride and also the QC bit; this obviates the need for the M-profile only masking step that we used to have at the start of the function. We provide a new field in CPUState for LTPSIZE, even though this will always be 4, in preparation for MVE, so we don't have to come back later and split it out of the vfp.xregs[FPSCR] value. (This state struct field will be saved and restored as part of the FPSCR value via the vmstate_fpscr in machine.c.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20201019151301.2046-11-peter.maydell@linaro.org --- target/arm/cpu.c | 9 +++++++++ target/arm/cpu.h | 1 + target/arm/vfp_helper.c | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 186ee62..07492e9 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -255,6 +255,15 @@ static void arm_cpu_reset(DeviceState *dev) uint8_t *rom; uint32_t vecbase; + if (cpu_isar_feature(aa32_lob, cpu)) { + /* + * LTPSIZE is constant 4 if MVE not implemented, and resets + * to an UNKNOWN value if MVE is implemented. We choose to + * always reset to 4. + */ + env->v7m.ltpsize = 4; + } + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { env->v7m.secure = true; } else { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a432f30..49cd5ca 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -549,6 +549,7 @@ typedef struct CPUARMState { uint32_t fpdscr[M_REG_NUM_BANKS]; uint32_t cpacr[M_REG_NUM_BANKS]; uint32_t nsacr; + int ltpsize; } v7m; /* Information associated with an exception about to be taken: diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 3648564..01b9d85 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -174,6 +174,12 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) | (env->vfp.vec_len << 16) | (env->vfp.vec_stride << 20); + /* + * M-profile LTPSIZE overlaps A-profile Stride; whichever of the + * two is not applicable to this CPU will always be zero. + */ + fpscr |= env->v7m.ltpsize << 16; + fpscr |= vfp_get_fpscr_from_host(env); i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; -- cgit v1.1