aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-07-24 11:34:01 +0100
committerPeter Maydell <peter.maydell@linaro.org>2023-07-24 11:34:01 +0100
commitc5216b69545ec391c6099a0816fc537c49ad0063 (patch)
tree7cd3b0162d7574079f7a604bc15bcc8e6598be76
parentd1181d29370a4318a9f11ea92065bea6bb159f83 (diff)
parent32b120394c578bc824f1db4835b3bffbeca88fae (diff)
downloadqemu-c5216b69545ec391c6099a0816fc537c49ad0063.zip
qemu-c5216b69545ec391c6099a0816fc537c49ad0063.tar.gz
qemu-c5216b69545ec391c6099a0816fc537c49ad0063.tar.bz2
Merge tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu into staging
accel/tcg: Zero-pad vaddr in tlb debug output accel/tcg: Fix type of 'last' for pageflags_{find,next} accel/tcg: Fix sense of read-only probes in ldst_atomicity accel/tcg: Take mmap_lock in load_atomic*_or_exit tcg: Add earlyclobber to op_add2 for x86 and s390x tcg/ppc: Fix race in goto_tb implementation # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmS+O7cdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8qrAf/VeAFnMbtantUTfM5 # zOcfBlutsDlJrNwA/ajFDrPwUDewP7s5cqxImAYqhXfhqlc2RIB3UiMCgSaQ+q6O # MBOH0bEj/zbeIlwRX07ZBWhUYVdqJVd7Nxb1W19YwgG9yieWUxa+Xo1i2fhyXMv+ # 20VOFB1dPnxYyUMrzh/bSiHE90JFZktO1WzV10FRD+IpnImY9R+YGdpGTpVzUhor # ReRHTkMKyYilY6EEUG2gFhotrY/bbSSSFyl9BcQjkZh11603nAN0mNKxtSjPJnNB # rXhCVEgmbbBvCufsO6szQ03W/7RZ/KCg/DyKqxyCP1Ril4BIOx3tiucROcapXH/K # 0y/ycA== # =hdk/ # -----END PGP SIGNATURE----- # gpg: Signature made Mon 24 Jul 2023 09:52:07 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu: accel/tcg: Fix type of 'last' for pageflags_{find,next} accel/tcg: Zero-pad vaddr in tlb_debug output tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output accel/tcg: Take mmap_lock in load_atomic*_or_exit accel/tcg: Fix sense of read-only probes in ldst_atomicity include/exec: Add WITH_MMAP_LOCK_GUARD tcg/ppc: Fix race in goto_tb implementation Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--accel/tcg/cputlb.c20
-rw-r--r--accel/tcg/ldst_atomicity.c.inc32
-rw-r--r--accel/tcg/user-exec.c4
-rw-r--r--bsd-user/mmap.c1
-rw-r--r--include/exec/exec-all.h10
-rw-r--r--linux-user/mmap.c1
-rw-r--r--tcg/i386/tcg-target-con-set.h5
-rw-r--r--tcg/i386/tcg-target.c.inc2
-rw-r--r--tcg/ppc/tcg-target.c.inc9
-rw-r--r--tcg/s390x/tcg-target-con-set.h8
-rw-r--r--tcg/s390x/tcg-target.c.inc4
-rw-r--r--tcg/tcg.c8
12 files changed, 66 insertions, 38 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e0079c9..ba44501 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -497,8 +497,8 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page)
/* Check if we need to flush due to large pages. */
if ((page & lp_mask) == lp_addr) {
- tlb_debug("forcing full flush midx %d (%"
- VADDR_PRIx "/%" VADDR_PRIx ")\n",
+ tlb_debug("forcing full flush midx %d (%016"
+ VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, lp_addr, lp_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
} else {
@@ -527,7 +527,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
assert_cpu_is_self(cpu);
- tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
+ tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -591,7 +591,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
{
- tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
+ tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
@@ -625,7 +625,7 @@ void tlb_flush_page(CPUState *cpu, vaddr addr)
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr,
uint16_t idxmap)
{
- tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
+ tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
@@ -666,7 +666,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
uint16_t idxmap)
{
- tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
+ tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
@@ -728,7 +728,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (mask < f->mask || len > f->mask) {
tlb_debug("forcing full flush midx %d ("
- "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n",
+ "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
midx, addr, mask, len);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
@@ -741,7 +741,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
tlb_debug("forcing full flush midx %d ("
- "%" VADDR_PRIx "/%" VADDR_PRIx ")\n",
+ "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, d->large_page_addr, d->large_page_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
@@ -773,7 +773,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
assert_cpu_is_self(cpu);
- tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n",
+ tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
d.addr, d.bits, d.len, d.idxmap);
qemu_spin_lock(&env_tlb(env)->c.lock);
@@ -1165,7 +1165,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
&xlat, &sz, full->attrs, &prot);
assert(sz >= TARGET_PAGE_SIZE);
- tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
+ tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
" prot=%x idx=%d\n",
addr, full->phys_addr, prot, mmu_idx);
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index 4de0a80..e5c590a 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -159,9 +159,11 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*/
- if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
- uint64_t *p = __builtin_assume_aligned(pv, 8);
- return *p;
+ WITH_MMAP_LOCK_GUARD() {
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
+ return *p;
+ }
}
#endif
@@ -186,25 +188,27 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
return atomic16_read_ro(p);
}
-#ifdef CONFIG_USER_ONLY
/*
* We can only use cmpxchg to emulate a load if the page is writable.
* If the page is not writable, then assume the value is immutable
* and requires no locking. This ignores the case of MAP_SHARED with
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
+ *
+ * In system mode all guest pages are writable. For user mode,
+ * we must take mmap_lock so that the query remains valid until
+ * the write is complete -- tests/tcg/multiarch/munmap-pthread.c
+ * is an example that can race.
*/
- if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
- return *p;
- }
+ WITH_MMAP_LOCK_GUARD() {
+#ifdef CONFIG_USER_ONLY
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
+ return *p;
+ }
#endif
-
- /*
- * In system mode all guest pages are writable, and for user-only
- * we have just checked writability. Try cmpxchg.
- */
- if (HAVE_ATOMIC128_RW) {
- return atomic16_read_rw(p);
+ if (HAVE_ATOMIC128_RW) {
+ return atomic16_read_rw(p);
+ }
}
/* Ultimate fallback: re-execute in serial context. */
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index ac38c2b..ab48cb4 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -144,7 +144,7 @@ typedef struct PageFlagsNode {
static IntervalTreeRoot pageflags_root;
-static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
+static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
{
IntervalTreeNode *n;
@@ -153,7 +153,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
}
static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
- target_long last)
+ target_ulong last)
{
IntervalTreeNode *n;
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index aca8764..74ed00b 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -32,6 +32,7 @@ void mmap_lock(void)
void mmap_unlock(void)
{
+ assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 5fa0687..d02517e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -629,6 +629,15 @@ void TSA_NO_TSA mmap_lock(void);
void TSA_NO_TSA mmap_unlock(void);
bool have_mmap_lock(void);
+static inline void mmap_unlock_guard(void *unused)
+{
+ mmap_unlock();
+}
+
+#define WITH_MMAP_LOCK_GUARD() \
+ for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \
+ = (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1)
+
/**
* adjust_signal_pc:
* @pc: raw pc from the host signal ucontext_t.
@@ -683,6 +692,7 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
#else
static inline void mmap_lock(void) {}
static inline void mmap_unlock(void) {}
+#define WITH_MMAP_LOCK_GUARD()
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_set_dirty(CPUState *cpu, vaddr addr);
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 44b53bd..a5dfb56 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -36,6 +36,7 @@ void mmap_lock(void)
void mmap_unlock(void)
{
+ assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 91ceb0e..5ea3a29 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -11,6 +11,9 @@
*
* C_N1_Im(...) defines a constraint set with 1 output and <m> inputs,
* except that the output must use a new register.
+ *
+ * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
+ * inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(L, L)
@@ -53,4 +56,4 @@ C_O2_I1(r, r, L)
C_O2_I2(a, d, a, r)
C_O2_I2(r, r, L, L)
C_O2_I3(a, d, 0, 1, r)
-C_O2_I4(r, r, 0, 1, re, re)
+C_N1_O1_I4(r, r, 0, 1, re, re)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index ab997b5..77482da 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3335,7 +3335,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
- return C_O2_I4(r, r, 0, 1, re, re);
+ return C_N1_O1_I4(r, r, 0, 1, re, re);
case INDEX_op_ctz_i32:
case INDEX_op_ctz_i64:
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index c866f2c..511e14b 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2496,11 +2496,10 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
- /* Direct branch will be patched by tb_target_set_jmp_target. */
+ /* TODO: Use direct branches when possible. */
set_jmp_insn_offset(s, which);
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
- /* When branch is out of range, fall through to indirect. */
tcg_out32(s, BCCTR | BO_ALWAYS);
/* For the unlinked case, need to reset TCG_REG_TB. */
@@ -2528,10 +2527,12 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
intptr_t diff = addr - jmp_rx;
tcg_insn_unit insn;
+ if (USE_REG_TB) {
+ return;
+ }
+
if (in_range_b(diff)) {
insn = B | (diff & 0x3fffffc);
- } else if (USE_REG_TB) {
- insn = MTSPR | RS(TCG_REG_TB) | CTR;
} else {
insn = NOP;
}
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index cbad91b..9a42037 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -8,6 +8,9 @@
* C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
* Each operand should be a sequence of constraint letters as defined by
* tcg-target-con-str.h; the constraint combination is inclusive or.
+ *
+ * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
+ * inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(r, r)
@@ -41,6 +44,5 @@ C_O2_I1(o, m, r)
C_O2_I2(o, m, 0, r)
C_O2_I2(o, m, r, r)
C_O2_I3(o, m, 0, 1, r)
-C_O2_I4(r, r, 0, 1, rA, r)
-C_O2_I4(r, r, 0, 1, ri, r)
-C_O2_I4(r, r, 0, 1, r, r)
+C_N1_O1_I4(r, r, 0, 1, ri, r)
+C_N1_O1_I4(r, r, 0, 1, rA, r)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index a878acd..a94f790 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -3229,11 +3229,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
- return C_O2_I4(r, r, 0, 1, ri, r);
+ return C_N1_O1_I4(r, r, 0, 1, ri, r);
case INDEX_op_add2_i64:
case INDEX_op_sub2_i64:
- return C_O2_I4(r, r, 0, 1, rA, r);
+ return C_N1_O1_I4(r, r, 0, 1, rA, r);
case INDEX_op_st_vec:
return C_O0_I2(v, r);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 652e8ea..ddfe9a9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -648,6 +648,7 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
+#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
typedef enum {
#include "tcg-target-con-set.h"
@@ -668,6 +669,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
#undef C_O2_I2
#undef C_O2_I3
#undef C_O2_I4
+#undef C_N1_O1_I4
/* Put all of the constraint sets into an array, indexed by the enum. */
@@ -687,6 +689,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
+#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
static const TCGTargetOpDef constraint_sets[] = {
#include "tcg-target-con-set.h"
@@ -706,6 +709,7 @@ static const TCGTargetOpDef constraint_sets[] = {
#undef C_O2_I2
#undef C_O2_I3
#undef C_O2_I4
+#undef C_N1_O1_I4
/* Expand the enumerator to be returned from tcg_target_op_def(). */
@@ -725,6 +729,7 @@ static const TCGTargetOpDef constraint_sets[] = {
#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
+#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
#include "tcg-target.c.inc"
@@ -4703,7 +4708,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
* dead after the instruction, we must allocate a new
* register and move it.
*/
- if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
+ if (temp_readonly(ts) || !IS_DEAD_ARG(i)
+ || def->args_ct[arg_ct->alias_index].newreg) {
allocate_new_reg = true;
} else if (ts->val_type == TEMP_VAL_REG) {
/*