From bd16430777cc3d25930e479fdbe290d92cec0888 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Mar 2014 19:36:13 +0000 Subject: ahci: fix sysbus support Non-PCI AHCI support is broken due to assertion failures when trying to convert AHCIState to a PCIDevice pointer as AHCIState can have different container structs. Fix this by using the non-asserting object cast and checking the returned pointer is not NULL. The AddressSpace pointer is also being initialized to NULL and causing dma_memory_map call to fail. Fix this by initializing to address_space_memory for sysbus instances. Also correct AHCI_VMSTATE to use the correct container SysbusAHCIState for sysbus instances. Signed-off-by: Rob Herring Message-id: 1392073373-3295-1-git-send-email-robherring2@gmail.com [PMM: added linebreaks to fix overlong lines] Signed-off-by: Peter Maydell --- hw/ide/ahci.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index fbea9e8..bfe633f 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -118,11 +118,12 @@ static uint32_t ahci_port_read(AHCIState *s, int port, int offset) static void ahci_irq_raise(AHCIState *s, AHCIDevice *dev) { AHCIPCIState *d = container_of(s, AHCIPCIState, ahci); - PCIDevice *pci_dev = PCI_DEVICE(d); + PCIDevice *pci_dev = + (PCIDevice *)object_dynamic_cast(OBJECT(d), TYPE_PCI_DEVICE); DPRINTF(0, "raise irq\n"); - if (msi_enabled(pci_dev)) { + if (pci_dev && msi_enabled(pci_dev)) { msi_notify(pci_dev, 0); } else { qemu_irq_raise(s->irq); @@ -132,10 +133,12 @@ static void ahci_irq_raise(AHCIState *s, AHCIDevice *dev) static void ahci_irq_lower(AHCIState *s, AHCIDevice *dev) { AHCIPCIState *d = container_of(s, AHCIPCIState, ahci); + PCIDevice *pci_dev = + (PCIDevice *)object_dynamic_cast(OBJECT(d), TYPE_PCI_DEVICE); DPRINTF(0, "lower irq\n"); - if (!msi_enabled(PCI_DEVICE(d))) { + if (!pci_dev || !msi_enabled(pci_dev)) { qemu_irq_lower(s->irq); } } @@ -1311,7 +1314,7 @@ static const VMStateDescription vmstate_sysbus_ahci = { .name = "sysbus-ahci", .unmigratable = 1, /* Still buggy under I/O load */ .fields = (VMStateField []) { - VMSTATE_AHCI(ahci, AHCIPCIState), + VMSTATE_AHCI(ahci, SysbusAHCIState), VMSTATE_END_OF_LIST() }, }; @@ -1328,7 +1331,7 @@ static void sysbus_ahci_realize(DeviceState *dev, Error **errp) SysBusDevice *sbd = SYS_BUS_DEVICE(dev); SysbusAHCIState *s = SYSBUS_AHCI(dev); - ahci_init(&s->ahci, dev, NULL, s->num_ports); + ahci_init(&s->ahci, dev, &address_space_memory, s->num_ports); sysbus_init_mmio(sbd, &s->ahci.mem); sysbus_init_irq(sbd, &s->ahci.irq); -- cgit v1.1 From 22709e90a270a36418f1b1d5d3277016eec1edc2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Mar 2014 13:18:39 -0500 Subject: pl011: reset the fifo when enabled or disabled Intermittent issues have been seen where no serial input occurs. It appears the pl011 gets in a state where the rx interrupt never fires because the rx interrupt only asserts when crossing the fifo trigger level. The fifo state appears to get out of sync when the pl011 is re-configured. This combined with the rx timeout interrupt not being modeled results in no more rx interrupts. Disabling the fifo is the recommended way to clear the tx fifo in the TRM (section 3.3.8). The behavior in this case for the rx fifo is undefined in the TRM, but having fifo contents to be maintained during configuration changes is not likely expected behavior. Reseting the fifo state when the fifo size is changed is the simplest solution. Signed-off-by: Rob Herring Reviewed-by: Peter Maydell Message-id: 1395166721-15716-2-git-send-email-robherring2@gmail.com Signed-off-by: Peter Maydell --- hw/char/pl011.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/char/pl011.c b/hw/char/pl011.c index a8ae6f4..8103e2e 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c @@ -162,6 +162,11 @@ static void pl011_write(void *opaque, hwaddr offset, s->fbrd = value; break; case 11: /* UARTLCR_H */ + /* Reset the FIFO state on FIFO enable or disable */ + if ((s->lcr ^ value) & 0x10) { + s->read_count = 0; + s->read_pos = 0; + } s->lcr = value; pl011_set_read_trigger(s); break; -- cgit v1.1 From ce8f0905a59232982c8a220169e11c14c73e7dea Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Mar 2014 13:18:40 -0500 Subject: pl011: fix UARTRSR accesses corrupting the UARTCR value Offset 4 is UARTRSR/UARTECR, not the UARTCR. The UARTCR would be corrupted if the UARTRSR is ever written. Fix by implementing a correct model of the UARTRSR/UARTECR register. Reads of this register simply reflect the error bits in data register. Only breaks can be triggered in QEMU. With the pl011_can_receive function, we effectively have flow control between the host and the model. Framing and parity errors simply don't make sense in the model and will never occur. Signed-off-by: Rob Herring Reviewed-by: Peter Maydell Message-id: 1395166721-15716-3-git-send-email-robherring2@gmail.com Signed-off-by: Peter Maydell --- hw/char/pl011.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hw/char/pl011.c b/hw/char/pl011.c index 8103e2e..11c3a75 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c @@ -20,6 +20,7 @@ typedef struct PL011State { uint32_t readbuff; uint32_t flags; uint32_t lcr; + uint32_t rsr; uint32_t cr; uint32_t dmacr; uint32_t int_enabled; @@ -81,13 +82,14 @@ static uint64_t pl011_read(void *opaque, hwaddr offset, } if (s->read_count == s->read_trigger - 1) s->int_level &= ~ PL011_INT_RX; + s->rsr = c >> 8; pl011_update(s); if (s->chr) { qemu_chr_accept_input(s->chr); } return c; - case 1: /* UARTCR */ - return 0; + case 1: /* UARTRSR */ + return s->rsr; case 6: /* UARTFR */ return s->flags; case 8: /* UARTILPR */ @@ -146,8 +148,8 @@ static void pl011_write(void *opaque, hwaddr offset, s->int_level |= PL011_INT_TX; pl011_update(s); break; - case 1: /* UARTCR */ - s->cr = value; + case 1: /* UARTRSR/UARTECR */ + s->rsr = 0; break; case 6: /* UARTFR */ /* Writes to Flag register are ignored. */ @@ -247,13 +249,14 @@ static const MemoryRegionOps pl011_ops = { static const VMStateDescription vmstate_pl011 = { .name = "pl011", - .version_id = 1, - .minimum_version_id = 1, - .minimum_version_id_old = 1, + .version_id = 2, + .minimum_version_id = 2, + .minimum_version_id_old = 2, .fields = (VMStateField[]) { VMSTATE_UINT32(readbuff, PL011State), VMSTATE_UINT32(flags, PL011State), VMSTATE_UINT32(lcr, PL011State), + VMSTATE_UINT32(rsr, PL011State), VMSTATE_UINT32(cr, PL011State), VMSTATE_UINT32(dmacr, PL011State), VMSTATE_UINT32(int_enabled, PL011State), -- cgit v1.1 From f72dbf3d2629be75d50b4c98816c360d82e8a848 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Mar 2014 13:18:41 -0500 Subject: pl011: fix incorrect logic to set the RXFF flag The receive fifo full bit should be set when 1 character is received and the fifo is disabled or when 16 characters are in the fifo. Signed-off-by: Rob Herring Reviewed-by: Peter Maydell Message-id: 1395166721-15716-4-git-send-email-robherring2@gmail.com Signed-off-by: Peter Maydell --- hw/char/pl011.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/char/pl011.c b/hw/char/pl011.c index 11c3a75..644aad7 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c @@ -221,7 +221,7 @@ static void pl011_put_fifo(void *opaque, uint32_t value) s->read_fifo[slot] = value; s->read_count++; s->flags &= ~PL011_FLAG_RXFE; - if (s->cr & 0x10 || s->read_count == 16) { + if (!(s->lcr & 0x10) || s->read_count == 16) { s->flags |= PL011_FLAG_RXFF; } if (s->read_count == s->read_trigger) { -- cgit v1.1 From 0a79bc87c3acf8364abf2d47b261fa898db15885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 18 Mar 2014 23:10:06 +0000 Subject: target-arm: A64: Add saturating int ops (SQNEG/SQABS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mostly re-uses the existing NEON helpers with an additional two for the 64 bit case. I also took the opportunity to add TCG_CALL_NO_RWG options to the helpers as they don't modify globals (saturation flags are in the CPU Environment). Signed-off-by: Alex Bennée Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- target-arm/helper.h | 14 +++++++------ target-arm/neon_helper.c | 22 ++++++++++++++++++++ target-arm/translate-a64.c | 51 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/target-arm/helper.h b/target-arm/helper.h index a3d6f32..b006fd5 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -375,12 +375,14 @@ DEF_HELPER_2(neon_mull_s16, i64, i32, i32) DEF_HELPER_1(neon_negl_u16, i64, i64) DEF_HELPER_1(neon_negl_u32, i64, i64) -DEF_HELPER_2(neon_qabs_s8, i32, env, i32) -DEF_HELPER_2(neon_qabs_s16, i32, env, i32) -DEF_HELPER_2(neon_qabs_s32, i32, env, i32) -DEF_HELPER_2(neon_qneg_s8, i32, env, i32) -DEF_HELPER_2(neon_qneg_s16, i32, env, i32) -DEF_HELPER_2(neon_qneg_s32, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 13752ba..e23f224 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -1776,6 +1776,28 @@ uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x) return x; } +uint64_t HELPER(neon_qabs_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else if ((int64_t)x < 0) { + x = -x; + } + return x; +} + +uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else { + x = -x; + } + return x; +} + /* NEON Float helpers. */ uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp) { diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index befffac..18659d7 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -73,6 +73,7 @@ typedef struct AArch64DecodeTable { } AArch64DecodeTable; /* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); @@ -6942,6 +6943,13 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, */ tcg_gen_not_i64(tcg_rd, tcg_rn); break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); + } else { + gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); + } + break; case 0xa: /* CMLT */ /* 64 bit integer comparison against zero, result is * test ? (2^64 - 1) : 0. We implement via setcond(!test) and @@ -7332,6 +7340,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_ptr tcg_fpstatus; switch (opcode) { + case 0x7: /* SQABS / SQNEG */ + break; case 0xa: /* CMLT */ if (u) { unallocated_encoding(s); @@ -7441,11 +7451,25 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) write_fp_dreg(s, rd, tcg_rd); tcg_temp_free_i64(tcg_rd); tcg_temp_free_i64(tcg_rn); - } else if (size == 2) { - TCGv_i32 tcg_rn = read_fp_sreg(s, rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); TCGv_i32 tcg_rd = tcg_temp_new_i32(); + read_vec_element_i32(s, tcg_rn, rn, 0, size); + switch (opcode) { + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + }; + genfn = fns[size][u]; + genfn(tcg_rd, cpu_env, tcg_rn); + break; + } case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ case 0x1c: /* FCVTAS */ @@ -7475,8 +7499,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) write_fp_sreg(s, rd, tcg_rd); tcg_temp_free_i32(tcg_rd); tcg_temp_free_i32(tcg_rn); - } else { - g_assert_not_reached(); } if (is_fcvt) { @@ -9177,8 +9199,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - unsupported_encoding(s, insn); - return; + break; case 0xc ... 0xf: case 0x16 ... 0x1d: case 0x1f: @@ -9389,6 +9410,13 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_cls32(tcg_res, tcg_op); } break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); + } else { + gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); + } + break; case 0xb: /* ABS, NEG */ if (u) { tcg_gen_neg_i32(tcg_res, tcg_op); @@ -9463,6 +9491,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_neon_cnt_u8(tcg_res, tcg_op); } break; + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + }; + genfn = fns[size][u]; + genfn(tcg_res, cpu_env, tcg_op); + break; + } case 0x8: /* CMGT, CMGE */ case 0x9: /* CMEQ, CMLE */ case 0xa: /* CMLT */ -- cgit v1.1 From 09e037354b6f940c18f417f23355cffd23f4fde5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 18 Mar 2014 23:10:06 +0000 Subject: target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the saturating accumulate operations USQADD and SUQADD to the A64 instruction set. This completes coverage of A64 Neon. These operations (which are unsigned + signed -> signed and signed + unsigned -> unsigned) don't exist in the A32/T32 instruction set, so require a complete new set of helper functions. Signed-off-by: Alex Bennée Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- target-arm/helper.h | 20 ++++-- target-arm/neon_helper.c | 165 +++++++++++++++++++++++++++++++++++++++++++++ target-arm/translate-a64.c | 109 ++++++++++++++++++++++++++++-- 3 files changed, 284 insertions(+), 10 deletions(-) diff --git a/target-arm/helper.h b/target-arm/helper.h index b006fd5..366c1b3 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -186,12 +186,20 @@ DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) /* neon_helper.c */ -DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index e23f224..8d6f9a9 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -236,6 +236,171 @@ uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) return res; } +/* Unsigned saturating accumulate of signed value + * + * Op1/Rn is treated as signed + * Op2/Rd is treated as unsigned + * + * Explicit casting is used to ensure the correct sign extension of + * inputs. The result is treated as a unsigned value and saturated as such. + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define USATACC(bits, shift) \ + do { \ + va = sextract32(a, shift, bits); \ + vb = extract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > UINT##bits##_MAX) { \ + SET_QC(); \ + vr = UINT##bits##_MAX; \ + } else if (vr < 0) { \ + SET_QC(); \ + vr = 0; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + USATACC(8, 0); + USATACC(8, 8); + USATACC(8, 16); + USATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint64_t r = 0; + + USATACC(16, 0); + USATACC(16, 16); + return r; +} + +#undef USATACC + +uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t va = (int32_t)a; + int64_t vb = (uint32_t)b; + int64_t vr = va + vb; + if (vr > UINT32_MAX) { + SET_QC(); + vr = UINT32_MAX; + } else if (vr < 0) { + SET_QC(); + vr = 0; + } + return vr; +} + +uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect + * +ve/-ve saturation + */ + if (~a & b & ~res & SIGNBIT64) { + SET_QC(); + res = UINT64_MAX; + } else if (a & ~b & res & SIGNBIT64) { + SET_QC(); + res = 0; + } + return res; +} + +/* Signed saturating accumulate of unsigned value + * + * Op1/Rn is treated as unsigned + * Op2/Rd is treated as signed + * + * The result is treated as a signed value and saturated as such + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define SSATACC(bits, shift) \ + do { \ + va = extract32(a, shift, bits); \ + vb = sextract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > INT##bits##_MAX) { \ + SET_QC(); \ + vr = INT##bits##_MAX; \ + } else if (vr < INT##bits##_MIN) { \ + SET_QC(); \ + vr = INT##bits##_MIN; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + SSATACC(8, 0); + SSATACC(8, 8); + SSATACC(8, 16); + SSATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint32_t r = 0; + + SSATACC(16, 0); + SSATACC(16, 16); + + return r; +} + +#undef SSATACC + +uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t res; + int64_t op1 = (uint32_t)a; + int64_t op2 = (int32_t)b; + res = op1 + op2; + if (res > INT32_MAX) { + SET_QC(); + res = INT32_MAX; + } else if (res < INT32_MIN) { + SET_QC(); + res = INT32_MIN; + } + return res; +} + +uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect an overflow */ + if (((a & res) + | (~b & res) + | (a & ~b)) & SIGNBIT64) { + SET_QC(); + res = INT64_MAX; + } + return res; +} + + #define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 18659d7..9f06450 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -7321,6 +7321,101 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, } } +/* Remaining saturating accumulating ops */ +static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, + bool is_q, int size, int rn, int rd) +{ + bool is_double = (size == 3); + + if (is_double) { + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_rn, rn, pass, MO_64); + read_vec_element(s, tcg_rd, rd, pass, MO_64); + + if (is_u) { /* USQADD */ + gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } else { /* SUQADD */ + gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } + write_vec_element(s, tcg_rd, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + if (is_scalar) { + read_vec_element_i32(s, tcg_rn, rn, pass, size); + read_vec_element_i32(s, tcg_rd, rd, pass, size); + } else { + read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); + read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (is_u) { /* USQADD */ + switch (size) { + case 0: + gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } else { /* SUQADD */ + switch (size) { + case 0: + gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } + + if (is_scalar) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + write_vec_element(s, tcg_zero, rd, 0, MO_64); + tcg_temp_free_i64(tcg_zero); + } + write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } +} + /* C3.6.12 AdvSIMD scalar two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +-----+---+-----------+------+-----------+--------+-----+------+------+ @@ -7340,6 +7435,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_ptr tcg_fpstatus; switch (opcode) { + case 0x3: /* USQADD / SUQADD*/ + handle_2misc_satacc(s, true, u, false, size, rn, rd); + return; case 0x7: /* SQABS / SQNEG */ break; case 0xa: /* CMLT */ @@ -7427,10 +7525,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } break; default: - /* Other categories of encoding in this class: - * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64 - */ - unsupported_encoding(s, insn); + unallocated_encoding(s); return; } @@ -9194,6 +9289,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x3: /* SUQADD, USQADD */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + handle_2misc_satacc(s, false, u, is_q, size, rn, rd); + return; case 0x7: /* SQABS, SQNEG */ if (size == 3 && !is_q) { unallocated_encoding(s); -- cgit v1.1