From 8009519b3094ab515def1fe9bbc444d463579448 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 14:08:24 -0700 Subject: target/arm: Use set/clear_helper_retaddr in helper-a64.c Use these in helper_dc_dva and the FEAT_MOPS routines to avoid a race condition with munmap in another thread. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/arm/tcg/helper-a64.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'target') diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 0ea8668..c60d2a7 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -928,6 +928,8 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) { + uintptr_t ra = GETPC(); + /* * Implement DC ZVA, which zeroes a fixed-length block of memory. * Note that we do not implement the (architecturally mandated) @@ -948,8 +950,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) #ifndef CONFIG_USER_ONLY if (unlikely(!mem)) { - uintptr_t ra = GETPC(); - /* * Trap if accessing an invalid page. DC_ZVA requires that we supply * the original pointer for an invalid page. But watchpoints require @@ -971,7 +971,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) } #endif + set_helper_retaddr(ra); memset(mem, 0, blocklen); + clear_helper_retaddr(); } void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, @@ -1120,7 +1122,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); return setsize; } @@ -1163,7 +1167,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); mte_mops_set_tags(env, toaddr, setsize, *mtedesc); return setsize; } @@ -1497,7 +1503,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, } #endif /* Easy case: just memmove the host memory */ + set_helper_retaddr(ra); memmove(wmem, rmem, copysize); + clear_helper_retaddr(); return copysize; } @@ -1572,7 +1580,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, * Easy case: just memmove the host memory. Note that wmem and * rmem here point to the *last* byte to copy. */ + set_helper_retaddr(ra); memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); + clear_helper_retaddr(); return copysize; } -- cgit v1.1 From 3b9991e35c08be7fd6b84090b2114ff1bfd44d3f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 15:02:07 -0700 Subject: target/arm: Use set/clear_helper_retaddr in SVE and SME helpers Avoid a race condition with munmap in another thread. Use around blocks that exclusively use "host_fn". Keep the blocks as small as possible, but without setting and clearing for every operation on one page. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/arm/tcg/sme_helper.c | 16 ++++++++++++++++ target/arm/tcg/sve_helper.c | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 49 insertions(+), 9 deletions(-) (limited to 'target') diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 5a6dd76..50bb088 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -517,6 +517,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, clr_fn(za, 0, reg_off); } + set_helper_retaddr(ra); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -529,6 +531,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -543,6 +547,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(ra); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -554,6 +560,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, reg_off += esize; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -701,6 +709,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(ra); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -711,6 +721,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -725,6 +737,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(ra); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -734,6 +748,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, reg_off += 1 << esz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index dd49e67..f1ee0e0 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -5738,6 +5738,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -5752,6 +5754,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -5771,6 +5775,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -5784,6 +5790,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, mem_off += N << msz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -5934,15 +5942,11 @@ DO_LDN_2(4, dd, MO_64) /* * Load contiguous data, first-fault and no-fault. * - * For user-only, one could argue that we should hold the mmap_lock during - * the operation so that there is no race between page_check_range and the - * load operation. However, unmapping pages out from under a running thread - * is extraordinarily unlikely. This theoretical race condition also affects - * linux-user/ in its get_user/put_user macros. - * - * TODO: Construct some helpers, written in assembly, that interact with - * host_signal_handler to produce memory ops which can properly report errors - * without racing. + * For user-only, we control the race between page_check_range and + * another thread's munmap by using set/clear_helper_retaddr. Any + * SEGV that occurs between those markers is assumed to be because + * the guest page vanished. Keep that block as small as possible + * so that unrelated QEMU bugs are not blamed on the guest. */ /* Fault on byte I. All bits in FFR from I are cleared. The vector @@ -6093,6 +6097,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3)); do { @@ -6101,9 +6107,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, (cpu_watchpoint_address_matches (env_cpu(env), addr + mem_off, 1 << msz) & BP_MEM_READ)) { + clear_helper_retaddr(); goto do_fault; } if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) { + clear_helper_retaddr(); goto do_fault; } host_fn(vd, reg_off, host + mem_off); @@ -6113,6 +6121,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } while (reg_off <= reg_last); + clear_helper_retaddr(); + /* * MemSingleNF is allowed to fail for any reason. We have special * code above to handle the first element crossing a page boundary. @@ -6348,6 +6358,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, reg_last = info.reg_off_last[0]; host = info.page[0].host; + set_helper_retaddr(retaddr); + while (reg_off <= reg_last) { uint64_t pg = vg[reg_off >> 6]; do { @@ -6362,6 +6374,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, } while (reg_off <= reg_last && (reg_off & 63)); } + clear_helper_retaddr(); + /* * Use the slow path to manage the cross-page misalignment. * But we know this is RAM and cannot trap. @@ -6381,6 +6395,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, reg_last = info.reg_off_last[1]; host = info.page[1].host; + set_helper_retaddr(retaddr); + do { uint64_t pg = vg[reg_off >> 6]; do { @@ -6394,6 +6410,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, mem_off += N << msz; } while (reg_off & 63); } while (reg_off <= reg_last); + + clear_helper_retaddr(); } } @@ -6560,7 +6578,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, if (unlikely(info.flags & TLB_MMIO)) { tlb_fn(env, &scratch, reg_off, addr, retaddr); } else { + set_helper_retaddr(retaddr); host_fn(&scratch, reg_off, info.host); + clear_helper_retaddr(); } } else { /* Element crosses the page boundary. */ @@ -6782,7 +6802,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, goto fault; } + set_helper_retaddr(retaddr); host_fn(vd, reg_off, info.host); + clear_helper_retaddr(); } reg_off += esize; } while (reg_off & 63); @@ -6986,7 +7008,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, do { void *h = host[i]; if (likely(h != NULL)) { + set_helper_retaddr(retaddr); host_fn(vd, reg_off, h); + clear_helper_retaddr(); } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) { target_ulong addr = base + (off_fn(vm, reg_off) << scale); tlb_fn(env, vd, reg_off, addr, retaddr); -- cgit v1.1 From 73a93ae5f4c49a166bc2a286330a1b45f58f4df7 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Sat, 22 Jun 2024 22:48:33 +0200 Subject: target/ppc/mem_helper.c: Remove a conditional from dcbz_common() Instead of passing a bool and select a value within dcbz_common() let the callers pass in the right value to avoid this conditional statement. On PPC dcbz is often used to zero memory and some code uses it a lot. This change improves the run time of a test case that copies memory with a dcbz call in every iteration from 6.23 to 5.83 seconds. Signed-off-by: BALATON Zoltan Message-Id: <20240622204833.5F7C74E6000@zero.eik.bme.hu> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson Reviewed-by: Nicholas Piggin --- target/ppc/mem_helper.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'target') diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index f88155a..361fd72 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -271,12 +271,11 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, } static void dcbz_common(CPUPPCState *env, target_ulong addr, - uint32_t opcode, bool epid, uintptr_t retaddr) + uint32_t opcode, int mmu_idx, uintptr_t retaddr) { target_ulong mask, dcbz_size = env->dcache_line_size; uint32_t i; void *haddr; - int mmu_idx = epid ? PPC_TLB_EPID_STORE : ppc_env_mmu_index(env, false); #if defined(TARGET_PPC64) /* Check for dcbz vs dcbzl on 970 */ @@ -309,12 +308,12 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode) { - dcbz_common(env, addr, opcode, false, GETPC()); + dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC()); } void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode) { - dcbz_common(env, addr, opcode, true, GETPC()); + dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC()); } void helper_icbi(CPUPPCState *env, target_ulong addr) -- cgit v1.1 From 521a80d895ec8ef0200dcac9b9b19e60b0cc1d1a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Jul 2024 18:17:50 -0700 Subject: target/ppc: Hoist dcbz_size out of dcbz_common The 970 logic does not apply to dcbzep, which is an e500 insn. Reviewed-by: Nicholas Piggin Reviewed-by: BALATON Zoltan Signed-off-by: Richard Henderson --- target/ppc/mem_helper.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'target') diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 361fd72..5067919 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -271,22 +271,12 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, } static void dcbz_common(CPUPPCState *env, target_ulong addr, - uint32_t opcode, int mmu_idx, uintptr_t retaddr) + int dcbz_size, int mmu_idx, uintptr_t retaddr) { - target_ulong mask, dcbz_size = env->dcache_line_size; - uint32_t i; + target_ulong mask = ~(target_ulong)(dcbz_size - 1); void *haddr; -#if defined(TARGET_PPC64) - /* Check for dcbz vs dcbzl on 970 */ - if (env->excp_model == POWERPC_EXCP_970 && - !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { - dcbz_size = 32; - } -#endif - /* Align address */ - mask = ~(dcbz_size - 1); addr &= mask; /* Check reservation */ @@ -300,7 +290,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, memset(haddr, 0, dcbz_size); } else { /* Slow path */ - for (i = 0; i < dcbz_size; i += 8) { + for (int i = 0; i < dcbz_size; i += 8) { cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr); } } @@ -308,12 +298,22 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode) { - dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC()); + int dcbz_size = env->dcache_line_size; + +#if defined(TARGET_PPC64) + /* Check for dcbz vs dcbzl on 970 */ + if (env->excp_model == POWERPC_EXCP_970 && + !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { + dcbz_size = 32; + } +#endif + + dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC()); } void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode) { - dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC()); + dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC()); } void helper_icbi(CPUPPCState *env, target_ulong addr) -- cgit v1.1 From 62fe57c6d23fe8136d281f0e37ec8a9fab08b60a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Jul 2024 19:46:15 -0700 Subject: target/ppc: Split out helper_dbczl for 970 We can determine at translation time whether the insn is or is not dbczl. We must retain a runtime check against the HID5 register, but we can move that to a separate function that never affects other ppc models. Reviewed-by: Nicholas Piggin Reviewed-by: BALATON Zoltan Signed-off-by: Richard Henderson --- target/ppc/helper.h | 7 +++++-- target/ppc/mem_helper.c | 30 +++++++++++++++++++----------- target/ppc/translate.c | 24 ++++++++++++++---------- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'target') diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 76b8f25..afc5685 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -46,8 +46,11 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32) DEF_HELPER_4(lsw, void, env, tl, i32, i32) DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32) DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32) -DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, i32) -DEF_HELPER_FLAGS_3(dcbzep, TCG_CALL_NO_WG, void, env, tl, i32) +DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl) +DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl) +#ifdef TARGET_PPC64 +DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl) +#endif DEF_HELPER_FLAGS_2(icbi, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 5067919..d4957ef 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -296,25 +296,33 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, } } -void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode) +void helper_dcbz(CPUPPCState *env, target_ulong addr) +{ + dcbz_common(env, addr, env->dcache_line_size, + ppc_env_mmu_index(env, false), GETPC()); +} + +void helper_dcbzep(CPUPPCState *env, target_ulong addr) +{ + dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC()); +} + +#ifdef TARGET_PPC64 +void helper_dcbzl(CPUPPCState *env, target_ulong addr) { int dcbz_size = env->dcache_line_size; -#if defined(TARGET_PPC64) - /* Check for dcbz vs dcbzl on 970 */ - if (env->excp_model == POWERPC_EXCP_970 && - !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { + /* + * The translator checked for POWERPC_EXCP_970. + * All that's left is to check HID5. + */ + if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { dcbz_size = 32; } -#endif dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC()); } - -void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode) -{ - dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC()); -} +#endif void helper_icbi(CPUPPCState *env, target_ulong addr) { diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0bc16d7..9e472ab 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -178,6 +178,7 @@ struct DisasContext { /* Translation flags */ MemOp default_tcg_memop_mask; #if defined(TARGET_PPC64) + powerpc_excp_t excp_model; bool sf_mode; bool has_cfar; bool has_bhrb; @@ -4445,27 +4446,29 @@ static void gen_dcblc(DisasContext *ctx) /* dcbz */ static void gen_dcbz(DisasContext *ctx) { - TCGv tcgv_addr; - TCGv_i32 tcgv_op; + TCGv tcgv_addr = tcg_temp_new(); gen_set_access_type(ctx, ACCESS_CACHE); - tcgv_addr = tcg_temp_new(); - tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000); gen_addr_reg_index(ctx, tcgv_addr); - gen_helper_dcbz(tcg_env, tcgv_addr, tcgv_op); + +#ifdef TARGET_PPC64 + if (ctx->excp_model == POWERPC_EXCP_970 && !(ctx->opcode & 0x00200000)) { + gen_helper_dcbzl(tcg_env, tcgv_addr); + return; + } +#endif + + gen_helper_dcbz(tcg_env, tcgv_addr); } /* dcbzep */ static void gen_dcbzep(DisasContext *ctx) { - TCGv tcgv_addr; - TCGv_i32 tcgv_op; + TCGv tcgv_addr = tcg_temp_new(); gen_set_access_type(ctx, ACCESS_CACHE); - tcgv_addr = tcg_temp_new(); - tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000); gen_addr_reg_index(ctx, tcgv_addr); - gen_helper_dcbzep(tcg_env, tcgv_addr, tcgv_op); + gen_helper_dcbzep(tcg_env, tcgv_addr); } /* dst / dstt */ @@ -6486,6 +6489,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->default_tcg_memop_mask = ctx->le_mode ? MO_LE : MO_BE; ctx->flags = env->flags; #if defined(TARGET_PPC64) + ctx->excp_model = env->excp_model; ctx->sf_mode = (hflags >> HFLAGS_64) & 1; ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR); ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB); -- cgit v1.1 From c6d84fd7cfb46a67c5c0404e93ed024cd3a14e6e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Jul 2024 20:10:53 -0700 Subject: target/ppc: Merge helper_{dcbz,dcbzep} Merge the two and pass the mmu_idx directly from translation. Swap the argument order in dcbz_common to avoid extra swaps. Reviewed-by: Nicholas Piggin Signed-off-by: Richard Henderson --- target/ppc/helper.h | 3 +-- target/ppc/mem_helper.c | 14 ++++---------- target/ppc/translate.c | 4 ++-- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'target') diff --git a/target/ppc/helper.h b/target/ppc/helper.h index afc5685..4fa089c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -46,8 +46,7 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32) DEF_HELPER_4(lsw, void, env, tl, i32, i32) DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32) DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32) -DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl) -DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl) +DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, int) #ifdef TARGET_PPC64 DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl) #endif diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index d4957ef..24bae3b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -271,7 +271,7 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, } static void dcbz_common(CPUPPCState *env, target_ulong addr, - int dcbz_size, int mmu_idx, uintptr_t retaddr) + int mmu_idx, int dcbz_size, uintptr_t retaddr) { target_ulong mask = ~(target_ulong)(dcbz_size - 1); void *haddr; @@ -296,15 +296,9 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, } } -void helper_dcbz(CPUPPCState *env, target_ulong addr) +void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx) { - dcbz_common(env, addr, env->dcache_line_size, - ppc_env_mmu_index(env, false), GETPC()); -} - -void helper_dcbzep(CPUPPCState *env, target_ulong addr) -{ - dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC()); + dcbz_common(env, addr, mmu_idx, env->dcache_line_size, GETPC()); } #ifdef TARGET_PPC64 @@ -320,7 +314,7 @@ void helper_dcbzl(CPUPPCState *env, target_ulong addr) dcbz_size = 32; } - dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC()); + dcbz_common(env, addr, ppc_env_mmu_index(env, false), dcbz_size, GETPC()); } #endif diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 9e472ab..cba943a 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -4458,7 +4458,7 @@ static void gen_dcbz(DisasContext *ctx) } #endif - gen_helper_dcbz(tcg_env, tcgv_addr); + gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(ctx->mem_idx)); } /* dcbzep */ @@ -4468,7 +4468,7 @@ static void gen_dcbzep(DisasContext *ctx) gen_set_access_type(ctx, ACCESS_CACHE); gen_addr_reg_index(ctx, tcgv_addr); - gen_helper_dcbzep(tcg_env, tcgv_addr); + gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(PPC_TLB_EPID_STORE)); } /* dst / dstt */ -- cgit v1.1 From f6bcc5b8f91e3c1b855c3078d9133f3918080276 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 16:56:48 -0700 Subject: target/ppc: Improve helper_dcbz for user-only Mark the reserve_addr check unlikely. Use tlb_vaddr_to_host instead of probe_write, relying on the memset itself to test for page writability. Use set/clear_helper_retaddr so that we can properly unwind on segfault. With this, a trivial loop around guest memset will no longer spend nearly 25% of runtime within page_get_flags. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/ppc/mem_helper.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'target') diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index 24bae3b..953dd08 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -280,20 +280,27 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, addr &= mask; /* Check reservation */ - if ((env->reserve_addr & mask) == addr) { + if (unlikely((env->reserve_addr & mask) == addr)) { env->reserve_addr = (target_ulong)-1ULL; } /* Try fast path translate */ +#ifdef CONFIG_USER_ONLY + haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx); +#else haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr); - if (haddr) { - memset(haddr, 0, dcbz_size); - } else { + if (unlikely(!haddr)) { /* Slow path */ for (int i = 0; i < dcbz_size; i += 8) { cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr); } + return; } +#endif + + set_helper_retaddr(retaddr); + memset(haddr, 0, dcbz_size); + clear_helper_retaddr(); } void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx) -- cgit v1.1 From 814e46594da891955a6e111e2c253137fcd43f07 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 18:05:59 -0700 Subject: target/s390x: Use user_or_likely in do_access_memset Eliminate the ifdef by using a predicate that is always true with CONFIG_USER_ONLY. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/s390x/tcg/mem_helper.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'target') diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 6cdbc34..5311a15 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -225,10 +225,7 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr, uint8_t byte, uint16_t size, int mmu_idx, uintptr_t ra) { -#ifdef CONFIG_USER_ONLY - memset(haddr, byte, size); -#else - if (likely(haddr)) { + if (user_or_likely(haddr)) { memset(haddr, byte, size); } else { MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); @@ -236,7 +233,6 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr, cpu_stb_mmu(env, vaddr + i, byte, oi, ra); } } -#endif } static void access_memset(CPUS390XState *env, S390Access *desta, -- cgit v1.1 From 573b7783012a26ff1a88c010353ee6e85965dcdc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 18:59:13 -0700 Subject: target/s390x: Use user_or_likely in access_memmove Invert the conditional, indent the block, and use the macro that expands to true for user-only. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/s390x/tcg/mem_helper.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'target') diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 5311a15..331a35b 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -296,41 +296,39 @@ static void access_memmove(CPUS390XState *env, S390Access *desta, S390Access *srca, uintptr_t ra) { int len = desta->size1 + desta->size2; - int diff; assert(len == srca->size1 + srca->size2); /* Fallback to slow access in case we don't have access to all host pages */ - if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) || - !srca->haddr1 || (srca->size2 && !srca->haddr2))) { - int i; - - for (i = 0; i < len; i++) { - uint8_t byte = access_get_byte(env, srca, i, ra); - - access_set_byte(env, desta, i, byte, ra); - } - return; - } - - diff = desta->size1 - srca->size1; - if (likely(diff == 0)) { - memmove(desta->haddr1, srca->haddr1, srca->size1); - if (unlikely(srca->size2)) { - memmove(desta->haddr2, srca->haddr2, srca->size2); - } - } else if (diff > 0) { - memmove(desta->haddr1, srca->haddr1, srca->size1); - memmove(desta->haddr1 + srca->size1, srca->haddr2, diff); - if (likely(desta->size2)) { - memmove(desta->haddr2, srca->haddr2 + diff, desta->size2); + if (user_or_likely(desta->haddr1 && + srca->haddr1 && + (!desta->size2 || desta->haddr2) && + (!srca->size2 || srca->haddr2))) { + int diff = desta->size1 - srca->size1; + + if (likely(diff == 0)) { + memmove(desta->haddr1, srca->haddr1, srca->size1); + if (unlikely(srca->size2)) { + memmove(desta->haddr2, srca->haddr2, srca->size2); + } + } else if (diff > 0) { + memmove(desta->haddr1, srca->haddr1, srca->size1); + memmove(desta->haddr1 + srca->size1, srca->haddr2, diff); + if (likely(desta->size2)) { + memmove(desta->haddr2, srca->haddr2 + diff, desta->size2); + } + } else { + diff = -diff; + memmove(desta->haddr1, srca->haddr1, desta->size1); + memmove(desta->haddr2, srca->haddr1 + desta->size1, diff); + if (likely(srca->size2)) { + memmove(desta->haddr2 + diff, srca->haddr2, srca->size2); + } } } else { - diff = -diff; - memmove(desta->haddr1, srca->haddr1, desta->size1); - memmove(desta->haddr2, srca->haddr1 + desta->size1, diff); - if (likely(srca->size2)) { - memmove(desta->haddr2 + diff, srca->haddr2, srca->size2); + for (int i = 0; i < len; i++) { + uint8_t byte = access_get_byte(env, srca, i, ra); + access_set_byte(env, desta, i, byte, ra); } } } -- cgit v1.1 From 2730df919093d014e96e68f1f982054da3f79e5b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 18:40:58 -0700 Subject: target/s390x: Use set/clear_helper_retaddr in mem_helper.c Avoid a race condition with munmap in another thread. For access_memset and access_memmove, manage the value within the helper. For uses of access_{get,set}_byte, manage the value across the for loops. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- target/s390x/tcg/mem_helper.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) (limited to 'target') diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 331a35b..0e12dae 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -238,14 +238,14 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr, static void access_memset(CPUS390XState *env, S390Access *desta, uint8_t byte, uintptr_t ra) { - + set_helper_retaddr(ra); do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1, desta->mmu_idx, ra); - if (likely(!desta->size2)) { - return; + if (unlikely(desta->size2)) { + do_access_memset(env, desta->vaddr2, desta->haddr2, byte, + desta->size2, desta->mmu_idx, ra); } - do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2, - desta->mmu_idx, ra); + clear_helper_retaddr(); } static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, @@ -366,6 +366,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest, access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + set_helper_retaddr(ra); + for (i = 0; i < l; i++) { const uint8_t x = access_get_byte(env, &srca1, i, ra) & access_get_byte(env, &srca2, i, ra); @@ -373,6 +375,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest, c |= x; access_set_byte(env, &desta, i, x, ra); } + + clear_helper_retaddr(); return c != 0; } @@ -407,6 +411,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, return 0; } + set_helper_retaddr(ra); for (i = 0; i < l; i++) { const uint8_t x = access_get_byte(env, &srca1, i, ra) ^ access_get_byte(env, &srca2, i, ra); @@ -414,6 +419,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, c |= x; access_set_byte(env, &desta, i, x, ra); } + clear_helper_retaddr(); return c != 0; } @@ -441,6 +447,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest, access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + set_helper_retaddr(ra); + for (i = 0; i < l; i++) { const uint8_t x = access_get_byte(env, &srca1, i, ra) | access_get_byte(env, &srca2, i, ra); @@ -448,6 +456,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest, c |= x; access_set_byte(env, &desta, i, x, ra); } + + clear_helper_retaddr(); return c != 0; } @@ -484,11 +494,13 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, } else if (!is_destructive_overlap(env, dest, src, l)) { access_memmove(env, &desta, &srca, ra); } else { + set_helper_retaddr(ra); for (i = 0; i < l; i++) { uint8_t byte = access_get_byte(env, &srca, i, ra); access_set_byte(env, &desta, i, byte, ra); } + clear_helper_retaddr(); } return env->cc_op; @@ -514,10 +526,12 @@ void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src) access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + set_helper_retaddr(ra); for (i = l - 1; i >= 0; i--) { uint8_t byte = access_get_byte(env, &srca, i, ra); access_set_byte(env, &desta, i, byte, ra); } + clear_helper_retaddr(); } /* move inverse */ @@ -534,11 +548,13 @@ void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) src = wrap_address(env, src - l + 1); access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + + set_helper_retaddr(ra); for (i = 0; i < l; i++) { const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra); - access_set_byte(env, &desta, i, x, ra); } + clear_helper_retaddr(); } /* move numerics */ @@ -555,12 +571,15 @@ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + + set_helper_retaddr(ra); for (i = 0; i < l; i++) { const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) | (access_get_byte(env, &srca2, i, ra) & 0xf0); access_set_byte(env, &desta, i, x, ra); } + clear_helper_retaddr(); } /* move with offset */ @@ -580,6 +599,8 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) /* Handle rightmost byte */ byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra); + + set_helper_retaddr(ra); byte_src = access_get_byte(env, &srca, len_src - 1, ra); byte_dest = (byte_dest & 0x0f) | (byte_src << 4); access_set_byte(env, &desta, len_dest - 1, byte_dest, ra); @@ -595,6 +616,7 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) byte_dest |= byte_src << 4; access_set_byte(env, &desta, i, byte_dest, ra); } + clear_helper_retaddr(); } /* move zones */ @@ -611,12 +633,15 @@ void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + + set_helper_retaddr(ra); for (i = 0; i < l; i++) { const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) | (access_get_byte(env, &srca2, i, ra) & 0x0f); access_set_byte(env, &desta, i, x, ra); } + clear_helper_retaddr(); } /* compare unsigned byte arrays */ @@ -961,15 +986,19 @@ uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2) */ access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra); access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra); + + set_helper_retaddr(ra); for (i = 0; i < len; i++) { const uint8_t v = access_get_byte(env, &srca, i, ra); access_set_byte(env, &desta, i, v, ra); if (v == c) { + clear_helper_retaddr(); set_address_zero(env, r1, d + i); return 1; } } + clear_helper_retaddr(); set_address_zero(env, r1, d + len); set_address_zero(env, r2, s + len); return 3; @@ -1060,6 +1089,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env, *dest = wrap_address(env, *dest + len); } else { access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + set_helper_retaddr(ra); /* The remaining length selects the padding byte. */ for (i = 0; i < len; (*destlen)--, i++) { @@ -1069,6 +1099,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env, access_set_byte(env, &desta, i, pad >> 8, ra); } } + clear_helper_retaddr(); *dest = wrap_address(env, *dest + len); } -- cgit v1.1 From 3f57638a7eae5b56f65224c680654a2aaaa09379 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 9 Jul 2024 20:10:55 -0700 Subject: target/riscv: Simplify probing in vext_ldff The current pairing of tlb_vaddr_to_host with extra is either inefficient (user-only, with page_check_range) or incorrect (system, with probe_pages). For proper non-fault behaviour, use probe_access_flags with its nonfault parameter set to true. Reviewed-by: Max Chou Acked-by: Alistair Francis Signed-off-by: Richard Henderson --- target/riscv/vector_helper.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'target') diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 1b4d5a8..10a52ce 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base, vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) { - void *host; uint32_t i, k, vl = 0; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); @@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base, } addr = adjust_addr(env, base + i * (nf << log2_esz)); if (i == 0) { + /* Allow fault on first element. */ probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); } else { - /* if it triggers an exception, no need to check watchpoint */ remain = nf << log2_esz; while (remain > 0) { + void *host; + int flags; + offset = -(addr | TARGET_PAGE_MASK); - host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index); - if (host) { -#ifdef CONFIG_USER_ONLY - if (!page_check_range(addr, offset, PAGE_READ)) { - vl = i; - goto ProbeSuccess; - } -#else - probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); -#endif - } else { + + /* Probe nonfault on subsequent elements. */ + flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD, + mmu_index, true, &host, 0); + + /* + * Stop if invalid (unmapped) or mmio (transaction may fail). + * Do not stop if watchpoint, as the spec says that + * first-fault should continue to access the same + * elements regardless of any watchpoint. + */ + if (flags & ~TLB_WATCHPOINT) { vl = i; goto ProbeSuccess; } - if (remain <= offset) { + if (remain <= offset) { break; } remain -= offset; -- cgit v1.1