aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-08-08 14:10:06 -0700
committerRichard Henderson <richard.henderson@linaro.org>2023-08-08 14:10:07 -0700
commit32e07fddc6d989dc5fdff4f9c9e47cb1f3911904 (patch)
tree3b3133ed27998c6986fb2b740a29ddb6b8a45ae4
parentcef297a9d7506299e98e72c4f5dc89d0d1eae774 (diff)
parentdd55885516f42f718d0d121c59a5f7be5fdae3e6 (diff)
downloadqemu-32e07fddc6d989dc5fdff4f9c9e47cb1f3911904.zip
qemu-32e07fddc6d989dc5fdff4f9c9e47cb1f3911904.tar.gz
qemu-32e07fddc6d989dc5fdff4f9c9e47cb1f3911904.tar.bz2
Merge tag 'pull-lu-20230808' of https://gitlab.com/rth7680/qemu into staging
linux-user: Adjust guest image layout vs reserved_va linux-user: Do not adjust image mapping for host page size linux-user: Adjust initial brk when interpreter is close to executable util/selfmap: Rewrite using qemu/interval-tree.h linux-user: Rewrite probe_guest_base # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmTSrp4dHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9lTQf/W/Tbd6CFnZpVE8Sb # BPrhdmo+x6Jftt1Ha66b/4xnasX7DuVaI1ECDh4CQQKIOh9A4LETx6ue9/UGi4vT # Fe4UrrJcAjt/CPaZhwXniJM9CvEnw1gkl3AgKAtZOBEConuPnkTiSWjySmCt3T4r # EGQxDe0HLpWYavNtvyywak/sEbwOD4hNAunFpJB6PLZ+KEoCDZwtcQdl55kg5bIt # WBMgUSXnWhC45t+26OcSDeHovqxHoA647H10T0y0U6bNVkW0tRW51xCTvHt+iDyR # s8UOCe1Q+w8F18fN68HIWBJ6NCzUts/AmQrWwc/MWiK1z91/ht5mlKAuNYnoZ6jH # htCSEA== # =ERAI # -----END PGP SIGNATURE----- # gpg: Signature made Tue 08 Aug 2023 02:07:42 PM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-lu-20230808' of https://gitlab.com/rth7680/qemu: linux-user: Rewrite non-fixed probe_guest_base linux-user: Rewrite fixed probe_guest_base linux-user: Consolidate guest bounds check in probe_guest_base linux-user: Remove duplicate CPU_LOG_PAGE from probe_guest_base util/selfmap: Rewrite using qemu/interval-tree.h linux-user: Use zero_bss for PT_LOAD with no file contents too linux-user: Do not adjust zero_bss for host page size linux-user: Do not adjust image mapping for host page size linux-user: Adjust initial brk when interpreter is close to executable linux-user: Use elf_et_dyn_base for ET_DYN with interpreter linux-user: Use MAP_FIXED_NOREPLACE for initial image mmap linux-user: Define ELF_ET_DYN_BASE in $guest/target_mman.h linux-user: Define TASK_UNMAPPED_BASE in $guest/target_mman.h linux-user: Adjust task_unmapped_base for reserved_va Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--include/qemu/selfmap.h22
-rw-r--r--linux-user/aarch64/target_mman.h13
-rw-r--r--linux-user/alpha/target_mman.h11
-rw-r--r--linux-user/arm/target_mman.h11
-rw-r--r--linux-user/cris/target_mman.h12
-rw-r--r--linux-user/elfload.c720
-rw-r--r--linux-user/hexagon/target_mman.h13
-rw-r--r--linux-user/hppa/target_mman.h6
-rw-r--r--linux-user/i386/target_mman.h16
-rw-r--r--linux-user/loongarch64/target_mman.h11
-rw-r--r--linux-user/m68k/target_mman.h5
-rw-r--r--linux-user/main.c43
-rw-r--r--linux-user/microblaze/target_mman.h11
-rw-r--r--linux-user/mips/target_mman.h10
-rw-r--r--linux-user/mmap.c19
-rw-r--r--linux-user/nios2/target_mman.h10
-rw-r--r--linux-user/openrisc/target_mman.h10
-rw-r--r--linux-user/ppc/target_mman.h20
-rw-r--r--linux-user/qemu.h1
-rw-r--r--linux-user/riscv/target_mman.h10
-rw-r--r--linux-user/s390x/target_mman.h20
-rw-r--r--linux-user/sh4/target_mman.h7
-rw-r--r--linux-user/sparc/target_mman.h25
-rw-r--r--linux-user/syscall.c15
-rw-r--r--linux-user/user-mmap.h29
-rw-r--r--linux-user/x86_64/target_mman.h15
-rw-r--r--linux-user/xtensa/target_mman.h10
-rw-r--r--util/selfmap.c114
28 files changed, 767 insertions, 442 deletions
diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h
index 3479a2a..7d93894 100644
--- a/include/qemu/selfmap.h
+++ b/include/qemu/selfmap.h
@@ -9,9 +9,10 @@
#ifndef SELFMAP_H
#define SELFMAP_H
+#include "qemu/interval-tree.h"
+
typedef struct {
- unsigned long start;
- unsigned long end;
+ IntervalTreeNode itree;
/* flags */
bool is_read;
@@ -19,26 +20,25 @@ typedef struct {
bool is_exec;
bool is_priv;
- unsigned long offset;
- gchar *dev;
+ uint64_t offset;
uint64_t inode;
- gchar *path;
+ const char *path;
+ char dev[];
} MapInfo;
-
/**
* read_self_maps:
*
- * Read /proc/self/maps and return a list of MapInfo structures.
+ * Read /proc/self/maps and return a tree of MapInfo structures.
*/
-GSList *read_self_maps(void);
+IntervalTreeRoot *read_self_maps(void);
/**
* free_self_maps:
- * @info: a GSlist
+ * @info: an interval tree
*
- * Free a list of MapInfo structures.
+ * Free a tree of MapInfo structures.
*/
-void free_self_maps(GSList *info);
+void free_self_maps(IntervalTreeRoot *root);
#endif /* SELFMAP_H */
diff --git a/linux-user/aarch64/target_mman.h b/linux-user/aarch64/target_mman.h
index f721295..69ec5d5 100644
--- a/linux-user/aarch64/target_mman.h
+++ b/linux-user/aarch64/target_mman.h
@@ -4,6 +4,19 @@
#define TARGET_PROT_BTI 0x10
#define TARGET_PROT_MTE 0x20
+/*
+ * arch/arm64/include/asm/processor.h:
+ *
+ * TASK_UNMAPPED_BASE DEFAULT_MAP_WINDOW / 4
+ * DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64
+ * DEFAULT_MAP_WINDOW_64 UL(1) << VA_BITS_MIN
+ * VA_BITS_MIN 48 (unless explicitly configured smaller)
+ */
+#define TASK_UNMAPPED_BASE (1ull << (48 - 2))
+
+/* arch/arm64/include/asm/elf.h */
+#define ELF_ET_DYN_BASE TARGET_PAGE_ALIGN((1ull << 48) / 3 * 2)
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/alpha/target_mman.h b/linux-user/alpha/target_mman.h
index 6bb03e7..8edfe2b 100644
--- a/linux-user/alpha/target_mman.h
+++ b/linux-user/alpha/target_mman.h
@@ -20,6 +20,17 @@
#define TARGET_MS_SYNC 2
#define TARGET_MS_INVALIDATE 4
+/*
+ * arch/alpha/include/asm/processor.h:
+ *
+ * TASK_UNMAPPED_BASE TASK_SIZE / 2
+ * TASK_SIZE 0x40000000000UL
+ */
+#define TASK_UNMAPPED_BASE 0x20000000000ull
+
+/* arch/alpha/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/arm/target_mman.h b/linux-user/arm/target_mman.h
index e7ba607..51005da 100644
--- a/linux-user/arm/target_mman.h
+++ b/linux-user/arm/target_mman.h
@@ -1 +1,12 @@
+/*
+ * arch/arm/include/asm/memory.h
+ * TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M)
+ * TASK_SIZE CONFIG_PAGE_OFFSET
+ * CONFIG_PAGE_OFFSET 0xC0000000 (default in Kconfig)
+ */
+#define TASK_UNMAPPED_BASE 0x40000000
+
+/* arch/arm/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0x00400000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/cris/target_mman.h b/linux-user/cris/target_mman.h
index e7ba607..9ace8ac 100644
--- a/linux-user/cris/target_mman.h
+++ b/linux-user/cris/target_mman.h
@@ -1 +1,13 @@
+/*
+ * arch/cris/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+ *
+ * arch/cris/include/arch-v32/arch/processor.h
+ * TASK_SIZE 0xb0000000
+ */
+#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0xb0000000 / 3)
+
+/* arch/cris/include/uapi/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 36e4026..ac03beb 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1960,15 +1960,6 @@ struct exec
#define ZMAGIC 0413
#define QMAGIC 0314
-/* Necessary parameters */
-#define TARGET_ELF_EXEC_PAGESIZE \
- (((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \
- TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE))
-#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE)
-#define TARGET_ELF_PAGESTART(_v) ((_v) & \
- ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1))
-#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
-
#define DLINFO_ITEMS 16
static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
@@ -2220,47 +2211,37 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
}
}
-/* Map and zero the bss. We need to explicitly zero any fractional pages
- after the data section (i.e. bss). */
-static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
+/**
+ * zero_bss:
+ *
+ * Map and zero the bss. We need to explicitly zero any fractional pages
+ * after the data section (i.e. bss). Return false on mapping failure.
+ */
+static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, int prot)
{
- uintptr_t host_start, host_map_start, host_end;
-
- last_bss = TARGET_PAGE_ALIGN(last_bss);
-
- /* ??? There is confusion between qemu_real_host_page_size and
- qemu_host_page_size here and elsewhere in target_mmap, which
- may lead to the end of the data section mapping from the file
- not being mapped. At least there was an explicit test and
- comment for that here, suggesting that "the file size must
- be known". The comment probably pre-dates the introduction
- of the fstat system call in target_mmap which does in fact
- find out the size. What isn't clear is if the workaround
- here is still actually needed. For now, continue with it,
- but merge it with the "normal" mmap that would allocate the bss. */
-
- host_start = (uintptr_t) g2h_untagged(elf_bss);
- host_end = (uintptr_t) g2h_untagged(last_bss);
- host_map_start = REAL_HOST_PAGE_ALIGN(host_start);
-
- if (host_map_start < host_end) {
- void *p = mmap((void *)host_map_start, host_end - host_map_start,
- prot, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED) {
- perror("cannot mmap brk");
- exit(-1);
- }
- }
+ abi_ulong align_bss;
- /* Ensure that the bss page(s) are valid */
- if ((page_get_flags(last_bss-1) & prot) != prot) {
- page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss - 1,
- prot | PAGE_VALID);
- }
+ align_bss = TARGET_PAGE_ALIGN(start_bss);
+ end_bss = TARGET_PAGE_ALIGN(end_bss);
- if (host_start < host_map_start) {
- memset((void *)host_start, 0, host_map_start - host_start);
+ if (start_bss < align_bss) {
+ int flags = page_get_flags(start_bss);
+
+ if (!(flags & PAGE_VALID)) {
+ /* Map the start of the bss. */
+ align_bss -= TARGET_PAGE_SIZE;
+ } else if (flags & PAGE_WRITE) {
+ /* The page is already mapped writable. */
+ memset(g2h_untagged(start_bss), 0, align_bss - start_bss);
+ } else {
+ /* Read-only zeros? */
+ g_assert_not_reached();
+ }
}
+
+ return align_bss >= end_bss ||
+ target_mmap(align_bss, end_bss - align_bss, prot,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) != -1;
}
#if defined(TARGET_ARM)
@@ -2523,308 +2504,322 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
#endif
#endif
-static void pgb_fail_in_use(const char *image_name)
+/**
+ * pgb_try_mmap:
+ * @addr: host start address
+ * @addr_last: host last address
+ * @keep: do not unmap the probe region
+ *
+ * Return 1 if [@addr, @addr_last] is not mapped in the host,
+ * return 0 if it is not available to map, and -1 on mmap error.
+ * If @keep, the region is left mapped on success, otherwise unmapped.
+ */
+static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep)
{
- error_report("%s: requires virtual address space that is in use "
- "(omit the -B option or choose a different value)",
- image_name);
- exit(EXIT_FAILURE);
+ size_t size = addr_last - addr + 1;
+ void *p = mmap((void *)addr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
+ int ret;
+
+ if (p == MAP_FAILED) {
+ return errno == EEXIST ? 0 : -1;
+ }
+ ret = p == (void *)addr;
+ if (!keep || !ret) {
+ munmap(p, size);
+ }
+ return ret;
}
-static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
- abi_ulong guest_hiaddr, long align)
+/**
+ * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk)
+ * @addr: host address
+ * @addr_last: host last address
+ * @brk: host brk
+ *
+ * Like pgb_try_mmap, but additionally reserve some memory following brk.
+ */
+static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last,
+ uintptr_t brk, bool keep)
{
- const int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
- void *addr, *test;
+ uintptr_t brk_last = brk + 16 * MiB - 1;
- if (!QEMU_IS_ALIGNED(guest_base, align)) {
- fprintf(stderr, "Requested guest base %p does not satisfy "
- "host minimum alignment (0x%lx)\n",
- (void *)guest_base, align);
- exit(EXIT_FAILURE);
+ /* Do not map anything close to the host brk. */
+ if (addr <= brk_last && brk <= addr_last) {
+ return 0;
}
+ return pgb_try_mmap(addr, addr_last, keep);
+}
- /* Sanity check the guest binary. */
- if (reserved_va) {
- if (guest_hiaddr > reserved_va) {
- error_report("%s: requires more than reserved virtual "
- "address space (0x%" PRIx64 " > 0x%lx)",
- image_name, (uint64_t)guest_hiaddr, reserved_va);
- exit(EXIT_FAILURE);
+/**
+ * pgb_try_mmap_set:
+ * @ga: set of guest addrs
+ * @base: guest_base
+ * @brk: host brk
+ *
+ * Return true if all @ga can be mapped by the host at @base.
+ * On success, retain the mapping at index 0 for reserved_va.
+ */
+
+typedef struct PGBAddrs {
+ uintptr_t bounds[3][2]; /* start/last pairs */
+ int nbounds;
+} PGBAddrs;
+
+static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk)
+{
+ for (int i = ga->nbounds - 1; i >= 0; --i) {
+ if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base,
+ ga->bounds[i][1] + base,
+ brk, i == 0 && reserved_va) <= 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * pgb_addr_set:
+ * @ga: output set of guest addrs
+ * @guest_loaddr: guest image low address
+ * @guest_loaddr: guest image high address
+ * @identity: create for identity mapping
+ *
+ * Fill in @ga with the image, COMMPAGE and NULL page.
+ */
+static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr,
+ abi_ulong guest_hiaddr, bool try_identity)
+{
+ int n;
+
+ /*
+ * With a low commpage, or a guest mapped very low,
+ * we may not be able to use the identity map.
+ */
+ if (try_identity) {
+ if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) {
+ return false;
+ }
+ if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) {
+ return false;
}
+ }
+
+ memset(ga, 0, sizeof(*ga));
+ n = 0;
+
+ if (reserved_va) {
+ ga->bounds[n][0] = try_identity ? mmap_min_addr : 0;
+ ga->bounds[n][1] = reserved_va;
+ n++;
+ /* LO_COMMPAGE and NULL handled by reserving from 0. */
} else {
-#if HOST_LONG_BITS < TARGET_ABI_BITS
- if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) {
- error_report("%s: requires more virtual address space "
- "than the host can provide (0x%" PRIx64 ")",
- image_name, (uint64_t)guest_hiaddr + 1 - guest_base);
- exit(EXIT_FAILURE);
+ /* Add any LO_COMMPAGE or NULL page. */
+ if (LO_COMMPAGE != -1) {
+ ga->bounds[n][0] = 0;
+ ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1;
+ n++;
+ } else if (!try_identity) {
+ ga->bounds[n][0] = 0;
+ ga->bounds[n][1] = TARGET_PAGE_SIZE - 1;
+ n++;
+ }
+
+ /* Add the guest image for ET_EXEC. */
+ if (guest_loaddr) {
+ ga->bounds[n][0] = guest_loaddr;
+ ga->bounds[n][1] = guest_hiaddr;
+ n++;
}
-#endif
}
/*
- * Expand the allocation to the entire reserved_va.
- * Exclude the mmap_min_addr hole.
+ * Temporarily disable
+ * "comparison is always false due to limited range of data type"
+ * due to comparison between unsigned and (possible) 0.
*/
- if (reserved_va) {
- guest_loaddr = (guest_base >= mmap_min_addr ? 0
- : mmap_min_addr - guest_base);
- guest_hiaddr = reserved_va;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+
+ /* Add any HI_COMMPAGE not covered by reserved_va. */
+ if (reserved_va < HI_COMMPAGE) {
+ ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask;
+ ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1;
+ n++;
}
- /* Reserve the address space for the binary, or reserved_va. */
- test = g2h_untagged(guest_loaddr);
- addr = mmap(test, guest_hiaddr - guest_loaddr + 1, PROT_NONE, flags, -1, 0);
- if (test != addr) {
+#pragma GCC diagnostic pop
+
+ ga->nbounds = n;
+ return true;
+}
+
+static void pgb_fail_in_use(const char *image_name)
+{
+ error_report("%s: requires virtual address space that is in use "
+ "(omit the -B option or choose a different value)",
+ image_name);
+ exit(EXIT_FAILURE);
+}
+
+static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr,
+ uintptr_t guest_hiaddr, uintptr_t align)
+{
+ PGBAddrs ga;
+ uintptr_t brk = (uintptr_t)sbrk(0);
+
+ if (!QEMU_IS_ALIGNED(guest_base, align)) {
+ fprintf(stderr, "Requested guest base %p does not satisfy "
+ "host minimum alignment (0x%" PRIxPTR ")\n",
+ (void *)guest_base, align);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base)
+ || !pgb_try_mmap_set(&ga, guest_base, brk)) {
pgb_fail_in_use(image_name);
}
- qemu_log_mask(CPU_LOG_PAGE,
- "%s: base @ %p for %" PRIu64 " bytes\n",
- __func__, addr, (uint64_t)guest_hiaddr - guest_loaddr + 1);
}
/**
- * pgd_find_hole_fallback: potential mmap address
- * @guest_size: size of available space
- * @brk: location of break
- * @align: memory alignment
+ * pgb_find_fallback:
*
- * This is a fallback method for finding a hole in the host address
- * space if we don't have the benefit of being able to access
- * /proc/self/map. It can potentially take a very long time as we can
- * only dumbly iterate up the host address space seeing if the
- * allocation would work.
+ * This is a fallback method for finding holes in the host address space
+ * if we don't have the benefit of being able to access /proc/self/map.
+ * It can potentially take a very long time as we can only dumbly iterate
+ * up the host address space seeing if the allocation would work.
*/
-static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk,
- long align, uintptr_t offset)
+static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align,
+ uintptr_t brk)
{
- uintptr_t base;
-
- /* Start (aligned) at the bottom and work our way up */
- base = ROUND_UP(mmap_min_addr, align);
+ /* TODO: come up with a better estimate of how much to skip. */
+ uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB;
- while (true) {
- uintptr_t align_start, end;
- align_start = ROUND_UP(base, align);
- end = align_start + guest_size + offset;
-
- /* if brk is anywhere in the range give ourselves some room to grow. */
- if (align_start <= brk && brk < end) {
- base = brk + (16 * MiB);
- continue;
- } else if (align_start + guest_size < align_start) {
- /* we have run out of space */
+ for (uintptr_t base = skip; ; base += skip) {
+ base = ROUND_UP(base, align);
+ if (pgb_try_mmap_set(ga, base, brk)) {
+ return base;
+ }
+ if (base >= -skip) {
return -1;
- } else {
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE |
- MAP_FIXED_NOREPLACE;
- void * mmap_start = mmap((void *) align_start, guest_size,
- PROT_NONE, flags, -1, 0);
- if (mmap_start != MAP_FAILED) {
- munmap(mmap_start, guest_size);
- if (mmap_start == (void *) align_start) {
- qemu_log_mask(CPU_LOG_PAGE,
- "%s: base @ %p for %" PRIdPTR" bytes\n",
- __func__, mmap_start + offset, guest_size);
- return (uintptr_t) mmap_start + offset;
- }
- }
- base += qemu_host_page_size;
}
}
}
-/* Return value for guest_base, or -1 if no hole found. */
-static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size,
- long align, uintptr_t offset)
+static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base,
+ IntervalTreeRoot *root)
{
- GSList *maps, *iter;
- uintptr_t this_start, this_end, next_start, brk;
- intptr_t ret = -1;
-
- assert(QEMU_IS_ALIGNED(guest_loaddr, align));
-
- maps = read_self_maps();
-
- /* Read brk after we've read the maps, which will malloc. */
- brk = (uintptr_t)sbrk(0);
+ for (int i = ga->nbounds - 1; i >= 0; --i) {
+ uintptr_t s = base + ga->bounds[i][0];
+ uintptr_t l = base + ga->bounds[i][1];
+ IntervalTreeNode *n;
+
+ if (l < s) {
+ /* Wraparound. Skip to advance S to mmap_min_addr. */
+ return mmap_min_addr - s;
+ }
- if (!maps) {
- return pgd_find_hole_fallback(guest_size, brk, align, offset);
+ n = interval_tree_iter_first(root, s, l);
+ if (n != NULL) {
+ /* Conflict. Skip to advance S to LAST + 1. */
+ return n->last - s + 1;
+ }
}
+ return 0; /* success */
+}
- /* The first hole is before the first map entry. */
- this_start = mmap_min_addr;
-
- for (iter = maps; iter;
- this_start = next_start, iter = g_slist_next(iter)) {
- uintptr_t align_start, hole_size;
-
- this_end = ((MapInfo *)iter->data)->start;
- next_start = ((MapInfo *)iter->data)->end;
- align_start = ROUND_UP(this_start + offset, align);
-
- /* Skip holes that are too small. */
- if (align_start >= this_end) {
- continue;
- }
- hole_size = this_end - align_start;
- if (hole_size < guest_size) {
- continue;
- }
+static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root,
+ uintptr_t align, uintptr_t brk)
+{
+ uintptr_t last = mmap_min_addr;
+ uintptr_t base, skip;
- /* If this hole contains brk, give ourselves some room to grow. */
- if (this_start <= brk && brk < this_end) {
- hole_size -= guest_size;
- if (sizeof(uintptr_t) == 8 && hole_size >= 1 * GiB) {
- align_start += 1 * GiB;
- } else if (hole_size >= 16 * MiB) {
- align_start += 16 * MiB;
- } else {
- align_start = (this_end - guest_size) & -align;
- if (align_start < this_start) {
- continue;
- }
- }
+ while (true) {
+ base = ROUND_UP(last, align);
+ if (base < last) {
+ return -1;
}
- /* Record the lowest successful match. */
- if (ret < 0) {
- ret = align_start;
- }
- /* If this hole contains the identity map, select it. */
- if (align_start <= guest_loaddr &&
- guest_loaddr + guest_size <= this_end) {
- ret = 0;
- }
- /* If this hole ends above the identity map, stop looking. */
- if (this_end >= guest_loaddr) {
+ skip = pgb_try_itree(ga, base, root);
+ if (skip == 0) {
break;
}
- }
- free_self_maps(maps);
- if (ret != -1) {
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %" PRIxPTR
- " for %" PRIuPTR " bytes\n",
- __func__, ret, guest_size);
+ last = base + skip;
+ if (last < base) {
+ return -1;
+ }
}
- return ret;
+ /*
+ * We've chosen 'base' based on holes in the interval tree,
+ * but we don't yet know if it is a valid host address.
+ * Because it is the first matching hole, if the host addresses
+ * are invalid we know there are no further matches.
+ */
+ return pgb_try_mmap_set(ga, base, brk) ? base : -1;
}
-static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
- abi_ulong orig_hiaddr, long align)
+static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr,
+ uintptr_t guest_hiaddr, uintptr_t align)
{
- uintptr_t loaddr = orig_loaddr;
- uintptr_t hiaddr = orig_hiaddr;
- uintptr_t offset = 0;
- uintptr_t addr;
-
- if (hiaddr != orig_hiaddr) {
- error_report("%s: requires virtual address space that the "
- "host cannot provide (0x%" PRIx64 ")",
- image_name, (uint64_t)orig_hiaddr + 1);
- exit(EXIT_FAILURE);
- }
+ IntervalTreeRoot *root;
+ uintptr_t brk, ret;
+ PGBAddrs ga;
- loaddr &= -align;
- if (HI_COMMPAGE) {
- /*
- * Extend the allocation to include the commpage.
- * For a 64-bit host, this is just 4GiB; for a 32-bit host we
- * need to ensure there is space bellow the guest_base so we
- * can map the commpage in the place needed when the address
- * arithmetic wraps around.
- */
- if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) {
- hiaddr = UINT32_MAX;
- } else {
- offset = -(HI_COMMPAGE & -align);
- }
- } else if (LO_COMMPAGE != -1) {
- loaddr = MIN(loaddr, LO_COMMPAGE & -align);
- }
+ assert(QEMU_IS_ALIGNED(guest_loaddr, align));
- addr = pgb_find_hole(loaddr, hiaddr - loaddr + 1, align, offset);
- if (addr == -1) {
- /*
- * If HI_COMMPAGE, there *might* be a non-consecutive allocation
- * that can satisfy both. But as the normal arm32 link base address
- * is ~32k, and we extend down to include the commpage, making the
- * overhead only ~96k, this is unlikely.
- */
- error_report("%s: Unable to allocate %#zx bytes of "
- "virtual address space", image_name,
- (size_t)(hiaddr - loaddr));
- exit(EXIT_FAILURE);
+ /* Try the identity map first. */
+ if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) {
+ brk = (uintptr_t)sbrk(0);
+ if (pgb_try_mmap_set(&ga, 0, brk)) {
+ guest_base = 0;
+ return;
+ }
}
- guest_base = addr;
-
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %"PRIxPTR" for %" PRIuPTR" bytes\n",
- __func__, addr, hiaddr - loaddr);
-}
-
-static void pgb_dynamic(const char *image_name, long align)
-{
/*
- * The executable is dynamic and does not require a fixed address.
- * All we need is a commpage that satisfies align.
- * If we do not need a commpage, leave guest_base == 0.
+ * Rebuild the address set for non-identity map.
+ * This differs in the mapping of the guest NULL page.
*/
- if (HI_COMMPAGE) {
- uintptr_t addr, commpage;
+ pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false);
- /* 64-bit hosts should have used reserved_va. */
- assert(sizeof(uintptr_t) == 4);
+ root = read_self_maps();
+
+ /* Read brk after we've read the maps, which will malloc. */
+ brk = (uintptr_t)sbrk(0);
+ if (!root) {
+ ret = pgb_find_fallback(&ga, align, brk);
+ } else {
/*
- * By putting the commpage at the first hole, that puts guest_base
- * just above that, and maximises the positive guest addresses.
+ * Reserve the area close to the host brk.
+ * This will be freed with the rest of the tree.
*/
- commpage = HI_COMMPAGE & -align;
- addr = pgb_find_hole(commpage, -commpage, align, 0);
- assert(addr != -1);
- guest_base = addr;
- }
-}
+ IntervalTreeNode *b = g_new0(IntervalTreeNode, 1);
+ b->start = brk;
+ b->last = brk + 16 * MiB - 1;
+ interval_tree_insert(b, root);
-static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
- abi_ulong guest_hiaddr, long align)
-{
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
- void *addr, *test;
-
- if (guest_hiaddr > reserved_va) {
- error_report("%s: requires more than reserved virtual "
- "address space (0x%" PRIx64 " > 0x%lx)",
- image_name, (uint64_t)guest_hiaddr, reserved_va);
- exit(EXIT_FAILURE);
+ ret = pgb_find_itree(&ga, root, align, brk);
+ free_self_maps(root);
}
- /* Widen the "image" to the entire reserved address space. */
- pgb_static(image_name, 0, reserved_va, align);
+ if (ret == -1) {
+ int w = TARGET_LONG_BITS / 4;
- /* osdep.h defines this as 0 if it's missing */
- flags |= MAP_FIXED_NOREPLACE;
+ error_report("%s: Unable to find a guest_base to satisfy all "
+ "guest address mapping requirements", image_name);
- /* Reserve the memory on the host. */
- assert(guest_base != 0);
- test = g2h_untagged(0);
- addr = mmap(test, reserved_va + 1, PROT_NONE, flags, -1, 0);
- if (addr == MAP_FAILED || addr != test) {
- error_report("Unable to reserve 0x%lx bytes of virtual address "
- "space at %p (%s) for use as guest address space (check your "
- "virtual memory ulimit setting, mmap_min_addr or reserve less "
- "using qemu-user's -R option)",
- reserved_va + 1, test, strerror(errno));
+ for (int i = 0; i < ga.nbounds; ++i) {
+ error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n",
+ w, (uint64_t)ga.bounds[i][0],
+ w, (uint64_t)ga.bounds[i][1]);
+ }
exit(EXIT_FAILURE);
}
-
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",
- __func__, addr, reserved_va + 1);
+ guest_base = ret;
}
void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
@@ -2833,25 +2828,33 @@ void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
/* In order to use host shmat, we must be able to honor SHMLBA. */
uintptr_t align = MAX(SHMLBA, qemu_host_page_size);
+ /* Sanity check the guest binary. */
+ if (reserved_va) {
+ if (guest_hiaddr > reserved_va) {
+ error_report("%s: requires more than reserved virtual "
+ "address space (0x%" PRIx64 " > 0x%lx)",
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (guest_hiaddr != (uintptr_t)guest_hiaddr) {
+ error_report("%s: requires more virtual address space "
+ "than the host can provide (0x%" PRIx64 ")",
+ image_name, (uint64_t)guest_hiaddr + 1);
+ exit(EXIT_FAILURE);
+ }
+ }
+
if (have_guest_base) {
- pgb_have_guest_base(image_name, guest_loaddr, guest_hiaddr, align);
- } else if (reserved_va) {
- pgb_reserved_va(image_name, guest_loaddr, guest_hiaddr, align);
- } else if (guest_loaddr) {
- pgb_static(image_name, guest_loaddr, guest_hiaddr, align);
+ pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align);
} else {
- pgb_dynamic(image_name, align);
+ pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align);
}
/* Reserve and initialize the commpage. */
if (!init_guest_commpage()) {
- /*
- * With have_guest_base, the user has selected the address and
- * we are trying to work with that. Otherwise, we have selected
- * free space and init_guest_commpage must succeeded.
- */
- assert(have_guest_base);
- pgb_fail_in_use(image_name);
+ /* We have already probed for the commpage being free. */
+ g_assert_not_reached();
}
assert(QEMU_IS_ALIGNED(guest_base, align));
@@ -3107,28 +3110,9 @@ static void load_elf_image(const char *image_name, int image_fd,
}
}
- if (pinterp_name != NULL) {
- /*
- * This is the main executable.
- *
- * Reserve extra space for brk.
- * We hold on to this space while placing the interpreter
- * and the stack, lest they be placed immediately after
- * the data segment and block allocation from the brk.
- *
- * 16MB is chosen as "large enough" without being so large as
- * to allow the result to not fit with a 32-bit guest on a
- * 32-bit host. However some 64 bit guests (e.g. s390x)
- * attempt to place their heap further ahead and currently
- * nothing stops them smashing into QEMUs address space.
- */
-#if TARGET_LONG_BITS == 64
- info->reserve_brk = 32 * MiB;
-#else
- info->reserve_brk = 16 * MiB;
-#endif
- hiaddr += info->reserve_brk;
+ load_addr = loaddr;
+ if (pinterp_name != NULL) {
if (ehdr->e_type == ET_EXEC) {
/*
* Make sure that the low address does not conflict with
@@ -3136,31 +3120,55 @@ static void load_elf_image(const char *image_name, int image_fd,
*/
probe_guest_base(image_name, loaddr, hiaddr);
} else {
+ abi_ulong align;
+
/*
* The binary is dynamic, but we still need to
* select guest_base. In this case we pass a size.
*/
probe_guest_base(image_name, 0, hiaddr - loaddr);
+
+ /*
+ * Avoid collision with the loader by providing a different
+ * default load address.
+ */
+ load_addr += elf_et_dyn_base;
+
+ /*
+ * TODO: Better support for mmap alignment is desirable.
+ * Since we do not have complete control over the guest
+ * address space, we prefer the kernel to choose some address
+ * rather than force the use of LOAD_ADDR via MAP_FIXED.
+ * But without MAP_FIXED we cannot guarantee alignment,
+ * only suggest it.
+ */
+ align = pow2ceil(info->alignment);
+ if (align) {
+ load_addr &= -align;
+ }
}
}
/*
* Reserve address space for all of this.
*
- * In the case of ET_EXEC, we supply MAP_FIXED so that we get
- * exactly the address range that is required.
+ * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get
+ * exactly the address range that is required. Without reserved_va,
+ * the guest address space is not isolated. We have attempted to avoid
+ * conflict with the host program itself via probe_guest_base, but using
+ * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check.
*
* Otherwise this is ET_DYN, and we are searching for a location
* that can hold the memory space required. If the image is
- * pre-linked, LOADDR will be non-zero, and the kernel should
+ * pre-linked, LOAD_ADDR will be non-zero, and the kernel should
* honor that address if it happens to be free.
*
* In both cases, we will overwrite pages in this range with mappings
* from the executable.
*/
- load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
+ load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
- (ehdr->e_type == ET_EXEC ? MAP_FIXED : 0),
+ (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0),
-1, 0);
if (load_addr == -1) {
goto exit_mmap;
@@ -3195,7 +3203,8 @@ static void load_elf_image(const char *image_name, int image_fd,
info->end_code = 0;
info->start_data = -1;
info->end_data = 0;
- info->brk = 0;
+ /* Usual start for brk is after all sections of the main executable. */
+ info->brk = TARGET_PAGE_ALIGN(hiaddr);
info->elf_flags = ehdr->e_flags;
prot_exec = PROT_EXEC;
@@ -3221,7 +3230,7 @@ static void load_elf_image(const char *image_name, int image_fd,
for (i = 0; i < ehdr->e_phnum; i++) {
struct elf_phdr *eppnt = phdr + i;
if (eppnt->p_type == PT_LOAD) {
- abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len;
+ abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
int elf_prot = 0;
if (eppnt->p_flags & PF_R) {
@@ -3235,8 +3244,8 @@ static void load_elf_image(const char *image_name, int image_fd,
}
vaddr = load_bias + eppnt->p_vaddr;
- vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr);
- vaddr_ps = TARGET_ELF_PAGESTART(vaddr);
+ vaddr_po = vaddr & ~TARGET_PAGE_MASK;
+ vaddr_ps = vaddr & TARGET_PAGE_MASK;
vaddr_ef = vaddr + eppnt->p_filesz;
vaddr_em = vaddr + eppnt->p_memsz;
@@ -3246,30 +3255,18 @@ static void load_elf_image(const char *image_name, int image_fd,
* but no backing file segment.
*/
if (eppnt->p_filesz != 0) {
- vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po);
- error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
- MAP_PRIVATE | MAP_FIXED,
+ error = target_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po,
+ elf_prot, MAP_PRIVATE | MAP_FIXED,
image_fd, eppnt->p_offset - vaddr_po);
-
if (error == -1) {
goto exit_mmap;
}
+ }
- /*
- * If the load segment requests extra zeros (e.g. bss), map it.
- */
- if (eppnt->p_filesz < eppnt->p_memsz) {
- zero_bss(vaddr_ef, vaddr_em, elf_prot);
- }
- } else if (eppnt->p_memsz != 0) {
- vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_memsz + vaddr_po);
- error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
- MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS,
- -1, 0);
-
- if (error == -1) {
- goto exit_mmap;
- }
+ /* If the load segment requests extra zeros (e.g. bss), map it. */
+ if (vaddr_ef < vaddr_em &&
+ !zero_bss(vaddr_ef, vaddr_em, elf_prot)) {
+ goto exit_mmap;
}
/* Find the full program boundaries. */
@@ -3289,9 +3286,6 @@ static void load_elf_image(const char *image_name, int image_fd,
info->end_data = vaddr_ef;
}
}
- if (vaddr_em > info->brk) {
- info->brk = vaddr_em;
- }
#ifdef TARGET_MIPS
} else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
Mips_elf_abiflags_v0 abiflags;
@@ -3620,6 +3614,19 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
if (elf_interpreter) {
load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
+ /*
+ * While unusual because of ELF_ET_DYN_BASE, if we are unlucky
+ * with the mappings the interpreter can be loaded above but
+ * near the main executable, which can leave very little room
+ * for the heap.
+ * If the current brk has less than 16MB, use the end of the
+ * interpreter.
+ */
+ if (interp_info.brk > info->brk &&
+ interp_info.load_bias - info->brk < 16 * MiB) {
+ info->brk = interp_info.brk;
+ }
+
/* If the program interpreter is one of these two, then assume
an iBCS2 image. Otherwise assume a native linux image. */
@@ -3673,17 +3680,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
bprm->core_dump = &elf_core_dump;
#endif
- /*
- * If we reserved extra space for brk, release it now.
- * The implementation of do_brk in syscalls.c expects to be able
- * to mmap pages in this space.
- */
- if (info->reserve_brk) {
- abi_ulong start_brk = TARGET_PAGE_ALIGN(info->brk);
- abi_ulong end_brk = TARGET_PAGE_ALIGN(info->brk + info->reserve_brk);
- target_munmap(start_brk, end_brk - start_brk);
- }
-
return 0;
}
diff --git a/linux-user/hexagon/target_mman.h b/linux-user/hexagon/target_mman.h
index e7ba607..e6b5e2c 100644
--- a/linux-user/hexagon/target_mman.h
+++ b/linux-user/hexagon/target_mman.h
@@ -1 +1,14 @@
+/*
+ * arch/hexgon/include/asm/processor.h
+ * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
+ *
+ * arch/hexagon/include/asm/mem-layout.h
+ * TASK_SIZE PAGE_OFFSET
+ * PAGE_OFFSET 0xc0000000
+ */
+#define TASK_UNMAPPED_BASE 0x40000000
+
+/* arch/hexagon/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0x08000000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/hppa/target_mman.h b/linux-user/hppa/target_mman.h
index 97f87d0..ccda46e 100644
--- a/linux-user/hppa/target_mman.h
+++ b/linux-user/hppa/target_mman.h
@@ -24,6 +24,12 @@
#define TARGET_MS_ASYNC 2
#define TARGET_MS_INVALIDATE 4
+/* arch/parisc/include/asm/processor.h: DEFAULT_MAP_BASE32 */
+#define TASK_UNMAPPED_BASE 0x40000000
+
+/* arch/parisc/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000)
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/i386/target_mman.h b/linux-user/i386/target_mman.h
index e7ba607..e3b8e1e 100644
--- a/linux-user/i386/target_mman.h
+++ b/linux-user/i386/target_mman.h
@@ -1 +1,17 @@
+/*
+ * arch/x86/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
+ * __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3)
+ *
+ * arch/x86/include/asm/page_32_types.h:
+ * TASK_SIZE_LOW TASK_SIZE
+ * TASK_SIZE __PAGE_OFFSET
+ * __PAGE_OFFSET CONFIG_PAGE_OFFSET
+ * CONFIG_PAGE_OFFSET 0xc0000000 (default in Kconfig)
+ */
+#define TASK_UNMAPPED_BASE 0x40000000
+
+/* arch/x86/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0x00400000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/loongarch64/target_mman.h b/linux-user/loongarch64/target_mman.h
index e7ba607..8c2a3d5 100644
--- a/linux-user/loongarch64/target_mman.h
+++ b/linux-user/loongarch64/target_mman.h
@@ -1 +1,12 @@
+/*
+ * arch/loongarch/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
+ * TASK_SIZE64 0x1UL << (... ? VA_BITS : ...)
+ */
+#define TASK_UNMAPPED_BASE \
+ TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
+
+/* arch/loongarch/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/m68k/target_mman.h b/linux-user/m68k/target_mman.h
index e7ba607..20cfe75 100644
--- a/linux-user/m68k/target_mman.h
+++ b/linux-user/m68k/target_mman.h
@@ -1 +1,6 @@
+/* arch/m68k/include/asm/processor.h */
+#define TASK_UNMAPPED_BASE 0xC0000000
+/* arch/m68k/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0xD0000000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/main.c b/linux-user/main.c
index 556956c..96be354 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -821,6 +821,49 @@ int main(int argc, char **argv, char **envp)
reserved_va = max_reserved_va;
}
+ /*
+ * Temporarily disable
+ * "comparison is always false due to limited range of data type"
+ * due to comparison between (possible) uint64_t and uintptr_t.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+
+ /*
+ * Select an initial value for task_unmapped_base that is in range.
+ */
+ if (reserved_va) {
+ if (TASK_UNMAPPED_BASE < reserved_va) {
+ task_unmapped_base = TASK_UNMAPPED_BASE;
+ } else {
+ /* The most common default formula is TASK_SIZE / 3. */
+ task_unmapped_base = TARGET_PAGE_ALIGN(reserved_va / 3);
+ }
+ } else if (TASK_UNMAPPED_BASE < UINTPTR_MAX) {
+ task_unmapped_base = TASK_UNMAPPED_BASE;
+ } else {
+ /* 32-bit host: pick something medium size. */
+ task_unmapped_base = 0x10000000;
+ }
+ mmap_next_start = task_unmapped_base;
+
+ /* Similarly for elf_et_dyn_base. */
+ if (reserved_va) {
+ if (ELF_ET_DYN_BASE < reserved_va) {
+ elf_et_dyn_base = ELF_ET_DYN_BASE;
+ } else {
+ /* The most common default formula is TASK_SIZE / 3 * 2. */
+ elf_et_dyn_base = TARGET_PAGE_ALIGN(reserved_va / 3) * 2;
+ }
+ } else if (ELF_ET_DYN_BASE < UINTPTR_MAX) {
+ elf_et_dyn_base = ELF_ET_DYN_BASE;
+ } else {
+ /* 32-bit host: pick something medium size. */
+ elf_et_dyn_base = 0x18000000;
+ }
+
+#pragma GCC diagnostic pop
+
{
Error *err = NULL;
if (seed_optarg != NULL) {
diff --git a/linux-user/microblaze/target_mman.h b/linux-user/microblaze/target_mman.h
index e7ba607..6b3dd54 100644
--- a/linux-user/microblaze/target_mman.h
+++ b/linux-user/microblaze/target_mman.h
@@ -1 +1,12 @@
+/*
+ * arch/microblaze/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
+ * TASK_SIZE CONFIG_KERNEL_START
+ * CONFIG_KERNEL_START 0xc0000000 (default in Kconfig)
+ */
+#define TASK_UNMAPPED_BASE 0x48000000
+
+/* arch/microblaze/include/uapi/asm/elf.h */
+#define ELF_ET_DYN_BASE 0x08000000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/mips/target_mman.h b/linux-user/mips/target_mman.h
index e97694a..b84fe1e 100644
--- a/linux-user/mips/target_mman.h
+++ b/linux-user/mips/target_mman.h
@@ -14,6 +14,16 @@
#define TARGET_MAP_STACK 0x40000
#define TARGET_MAP_HUGETLB 0x80000
+/*
+ * arch/mips/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
+ */
+#define TASK_UNMAPPED_BASE \
+ TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
+
+/* arch/mips/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index eb04fab..9aab48d 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -299,20 +299,9 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
return true;
}
-#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
-#ifdef TARGET_AARCH64
-# define TASK_UNMAPPED_BASE 0x5500000000
-#else
-# define TASK_UNMAPPED_BASE (1ul << 38)
-#endif
-#else
-#ifdef TARGET_HPPA
-# define TASK_UNMAPPED_BASE 0xfa000000
-#else
-# define TASK_UNMAPPED_BASE 0x40000000
-#endif
-#endif
-abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
+abi_ulong task_unmapped_base;
+abi_ulong elf_et_dyn_base;
+abi_ulong mmap_next_start;
/*
* Subroutine of mmap_find_vma, used when we have pre-allocated
@@ -391,7 +380,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
if ((addr & (align - 1)) == 0) {
/* Success. */
- if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+ if (start == mmap_next_start && addr >= task_unmapped_base) {
mmap_next_start = addr + size;
}
return addr;
diff --git a/linux-user/nios2/target_mman.h b/linux-user/nios2/target_mman.h
index e7ba607..ab16ad4 100644
--- a/linux-user/nios2/target_mman.h
+++ b/linux-user/nios2/target_mman.h
@@ -1 +1,11 @@
+/*
+ * arch/nios2/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
+ * TASK_SIZE 0x7FFF0000UL
+ */
+#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0x7FFF0000 / 3)
+
+/* arch/nios2/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0xD0000000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/openrisc/target_mman.h b/linux-user/openrisc/target_mman.h
index e7ba607..243c1d5 100644
--- a/linux-user/openrisc/target_mman.h
+++ b/linux-user/openrisc/target_mman.h
@@ -1 +1,11 @@
+/*
+ * arch/openrisc/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
+ * TASK_SIZE (0x80000000UL)
+ */
+#define TASK_UNMAPPED_BASE 0x30000000
+
+/* arch/openrisc/include/asm/elf.h */
+#define ELF_ET_DYN_BASE 0x08000000
+
#include "../generic/target_mman.h"
diff --git a/linux-user/ppc/target_mman.h b/linux-user/ppc/target_mman.h
index 67cc218..646d1cc 100644
--- a/linux-user/ppc/target_mman.h
+++ b/linux-user/ppc/target_mman.h
@@ -4,6 +4,26 @@
#define TARGET_MAP_NORESERVE 0x40
#define TARGET_MAP_LOCKED 0x80
+/*
+ * arch/powerpc/include/asm/task_size_64.h
+ * TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+ * TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
+ * TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
+ * DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB (with 4k pages)
+ */
+#ifdef TARGET_PPC64
+#define TASK_UNMAPPED_BASE 0x0000100000000000ull
+#else
+#define TASK_UNMAPPED_BASE 0x40000000
+#endif
+
+/* arch/powerpc/include/asm/elf.h */
+#ifdef TARGET_PPC64
+#define ELF_ET_DYN_BASE 0x100000000ull
+#else
+#define ELF_ET_DYN_BASE 0x000400000
+#endif
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 2046a23..4f8b55e 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -30,7 +30,6 @@ struct image_info {
abi_ulong start_data;
abi_ulong end_data;
abi_ulong brk;
- abi_ulong reserve_brk;
abi_ulong start_mmap;
abi_ulong start_stack;
abi_ulong stack_limit;
diff --git a/linux-user/riscv/target_mman.h b/linux-user/riscv/target_mman.h
index e7ba607..3049bcc 100644
--- a/linux-user/riscv/target_mman.h
+++ b/linux-user/riscv/target_mman.h
@@ -1 +1,11 @@
+/*
+ * arch/loongarch/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
+ */
+#define TASK_UNMAPPED_BASE \
+ TARGET_PAGE_ALIGN((1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 1)) / 3)
+
+/* arch/riscv/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/s390x/target_mman.h b/linux-user/s390x/target_mman.h
index e7ba607..c82435e 100644
--- a/linux-user/s390x/target_mman.h
+++ b/linux-user/s390x/target_mman.h
@@ -1 +1,21 @@
+/*
+ * arch/s390/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE (... : (_REGION2_SIZE >> 1))
+ *
+ * arch/s390/include/asm/pgtable.h:
+ * _REGION2_SIZE (1UL << _REGION2_SHIFT)
+ * _REGION2_SHIFT 42
+ */
+#define TASK_UNMAPPED_BASE (1ull << 41)
+
+/*
+ * arch/s390/include/asm/elf.h:
+ * ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)
+ *
+ * arch/s390/include/asm/processor.h:
+ * STACK_TOP VDSO_LIMIT - VDSO_SIZE - PAGE_SIZE
+ * VDSO_LIMIT _REGION2_SIZE
+ */
+#define ELF_ET_DYN_BASE (((1ull << 42) / 3 * 2) & ~0xffffffffull)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/sh4/target_mman.h b/linux-user/sh4/target_mman.h
index e7ba607..dd90160 100644
--- a/linux-user/sh4/target_mman.h
+++ b/linux-user/sh4/target_mman.h
@@ -1 +1,8 @@
+/* arch/sh/include/asm/processor_32.h */
+#define TASK_UNMAPPED_BASE \
+ TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
+
+/* arch/sh/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/sparc/target_mman.h b/linux-user/sparc/target_mman.h
index 9bad99c..696ca73 100644
--- a/linux-user/sparc/target_mman.h
+++ b/linux-user/sparc/target_mman.h
@@ -5,6 +5,31 @@
#define TARGET_MAP_LOCKED 0x100
#define TARGET_MAP_GROWSDOWN 0x0200
+/*
+ * arch/sparc/include/asm/page_64.h:
+ * TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
+ * _AC(0x0000000070000000,UL) : \
+ * VA_EXCLUDE_END)
+ * But VA_EXCLUDE_END is > 0xffff800000000000UL which doesn't work
+ * in userland emulation.
+ */
+#ifdef TARGET_ABI32
+#define TASK_UNMAPPED_BASE 0x70000000
+#else
+#define TASK_UNMAPPED_BASE (1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 2))
+#endif
+
+/*
+ * arch/sparc/include/asm/elf_64.h
+ * Except that COMPAT_ELF_ET_DYN_BASE exactly matches TASK_UNMAPPED_BASE,
+ * so move it up a bit.
+ */
+#ifdef TARGET_ABI32
+#define ELF_ET_DYN_BASE 0x78000000
+#else
+#define ELF_ET_DYN_BASE 0x0000010000000000ull
+#endif
+
#include "../generic/target_mman.h"
#endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 7c2c2f6..a15bce2 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8070,16 +8070,17 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps)
{
CPUState *cpu = env_cpu(cpu_env);
TaskState *ts = cpu->opaque;
- GSList *map_info = read_self_maps();
- GSList *s;
+ IntervalTreeRoot *map_info = read_self_maps();
+ IntervalTreeNode *s;
int count;
- for (s = map_info; s; s = g_slist_next(s)) {
- MapInfo *e = (MapInfo *) s->data;
+ for (s = interval_tree_iter_first(map_info, 0, -1); s;
+ s = interval_tree_iter_next(s, 0, -1)) {
+ MapInfo *e = container_of(s, MapInfo, itree);
- if (h2g_valid(e->start)) {
- unsigned long min = e->start;
- unsigned long max = e->end;
+ if (h2g_valid(e->itree.start)) {
+ unsigned long min = e->itree.start;
+ unsigned long max = e->itree.last + 1;
int flags = page_get_flags(h2g(min));
const char *path;
diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
index 7265c2c..0f4883e 100644
--- a/linux-user/user-mmap.h
+++ b/linux-user/user-mmap.h
@@ -18,6 +18,34 @@
#ifndef LINUX_USER_USER_MMAP_H
#define LINUX_USER_USER_MMAP_H
+/*
+ * Guest parameters for the ADDR_COMPAT_LAYOUT personality
+ * (at present this is the only layout supported by QEMU).
+ *
+ * TASK_UNMAPPED_BASE: For mmap without hint (addr != 0), the search
+ * for unused virtual memory begins at TASK_UNMAPPED_BASE.
+ *
+ * ELF_ET_DYN_BASE: When the executable is ET_DYN (i.e. PIE), and requires
+ * an interpreter (i.e. not -static-pie), use ELF_ET_DYN_BASE instead of
+ * TASK_UNMAPPED_BASE for selecting the address of the executable.
+ * This provides some distance between the executable and the interpreter,
+ * which allows the initial brk to be placed immediately after the
+ * executable and also have room to grow.
+ *
+ * task_unmapped_base, elf_et_dyn_base: When the guest address space is
+ * limited via -R, the values of TASK_UNMAPPED_BASE and ELF_ET_DYN_BASE
+ * must be adjusted to fit.
+ */
+extern abi_ulong task_unmapped_base;
+extern abi_ulong elf_et_dyn_base;
+
+/*
+ * mmap_next_start: The base address for the next mmap without hint,
+ * increased after each successful map, starting at task_unmapped_base.
+ * This is an optimization within QEMU and not part of ADDR_COMPAT_LAYOUT.
+ */
+extern abi_ulong mmap_next_start;
+
int target_mprotect(abi_ulong start, abi_ulong len, int prot);
abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
int flags, int fd, off_t offset);
@@ -26,7 +54,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_size, unsigned long flags,
abi_ulong new_addr);
abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice);
-extern abi_ulong mmap_next_start;
abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
void mmap_fork_start(void);
void mmap_fork_end(int child);
diff --git a/linux-user/x86_64/target_mman.h b/linux-user/x86_64/target_mman.h
index e7ba607..48fbf20 100644
--- a/linux-user/x86_64/target_mman.h
+++ b/linux-user/x86_64/target_mman.h
@@ -1 +1,16 @@
+/*
+ * arch/x86/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
+ * __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3)
+ *
+ * arch/x86/include/asm/page_64_types.h:
+ * TASK_SIZE_LOW DEFAULT_MAP_WINDOW
+ * DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+ */
+#define TASK_UNMAPPED_BASE \
+ TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
+
+/* arch/x86/include/asm/elf.h */
+#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2)
+
#include "../generic/target_mman.h"
diff --git a/linux-user/xtensa/target_mman.h b/linux-user/xtensa/target_mman.h
index 3933771..8fa6337 100644
--- a/linux-user/xtensa/target_mman.h
+++ b/linux-user/xtensa/target_mman.h
@@ -14,6 +14,16 @@
#define TARGET_MAP_STACK 0x40000
#define TARGET_MAP_HUGETLB 0x80000
+/*
+ * arch/xtensa/include/asm/processor.h:
+ * TASK_UNMAPPED_BASE (TASK_SIZE / 2)
+ */
+#define TASK_UNMAPPED_BASE (1u << (TARGET_VIRT_ADDR_SPACE_BITS - 1))
+
+/* arch/xtensa/include/asm/elf.h */
+#define ELF_ET_DYN_BASE \
+ TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3)
+
#include "../generic/target_mman.h"
#endif
diff --git a/util/selfmap.c b/util/selfmap.c
index 2c14f01..4db5b42 100644
--- a/util/selfmap.c
+++ b/util/selfmap.c
@@ -10,74 +10,98 @@
#include "qemu/cutils.h"
#include "qemu/selfmap.h"
-GSList *read_self_maps(void)
+IntervalTreeRoot *read_self_maps(void)
{
- gchar *maps;
- GSList *map_info = NULL;
+ IntervalTreeRoot *root;
+ gchar *maps, **lines;
+ guint i, nlines;
- if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
- gchar **lines = g_strsplit(maps, "\n", 0);
- int i, entries = g_strv_length(lines);
+ if (!g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
+ return NULL;
+ }
+
+ root = g_new0(IntervalTreeRoot, 1);
+ lines = g_strsplit(maps, "\n", 0);
+ nlines = g_strv_length(lines);
+
+ for (i = 0; i < nlines; i++) {
+ gchar **fields = g_strsplit(lines[i], " ", 6);
+ guint nfields = g_strv_length(fields);
+
+ if (nfields > 4) {
+ uint64_t start, end, offset, inode;
+ int errors = 0;
+ const char *p;
- for (i = 0; i < entries; i++) {
- gchar **fields = g_strsplit(lines[i], " ", 6);
- if (g_strv_length(fields) > 4) {
- MapInfo *e = g_new0(MapInfo, 1);
- int errors = 0;
- const char *end;
+ errors |= qemu_strtou64(fields[0], &p, 16, &start);
+ errors |= qemu_strtou64(p + 1, NULL, 16, &end);
+ errors |= qemu_strtou64(fields[2], NULL, 16, &offset);
+ errors |= qemu_strtou64(fields[4], NULL, 10, &inode);
+
+ if (!errors) {
+ size_t dev_len, path_len;
+ MapInfo *e;
+
+ dev_len = strlen(fields[3]) + 1;
+ if (nfields == 6) {
+ p = fields[5];
+ p += strspn(p, " ");
+ path_len = strlen(p) + 1;
+ } else {
+ p = NULL;
+ path_len = 0;
+ }
- errors |= qemu_strtoul(fields[0], &end, 16, &e->start);
- errors |= qemu_strtoul(end + 1, NULL, 16, &e->end);
+ e = g_malloc0(sizeof(*e) + dev_len + path_len);
+
+ e->itree.start = start;
+ e->itree.last = end - 1;
+ e->offset = offset;
+ e->inode = inode;
e->is_read = fields[1][0] == 'r';
e->is_write = fields[1][1] == 'w';
e->is_exec = fields[1][2] == 'x';
e->is_priv = fields[1][3] == 'p';
- errors |= qemu_strtoul(fields[2], NULL, 16, &e->offset);
- e->dev = g_strdup(fields[3]);
- errors |= qemu_strtou64(fields[4], NULL, 10, &e->inode);
-
- if (!errors) {
- /*
- * The last field may have leading spaces which we
- * need to strip.
- */
- if (g_strv_length(fields) == 6) {
- e->path = g_strdup(g_strchug(fields[5]));
- }
- map_info = g_slist_prepend(map_info, e);
- } else {
- g_free(e->dev);
- g_free(e);
+ memcpy(e->dev, fields[3], dev_len);
+ if (path_len) {
+ e->path = memcpy(e->dev + dev_len, p, path_len);
}
- }
- g_strfreev(fields);
+ interval_tree_insert(&e->itree, root);
+ }
}
- g_strfreev(lines);
- g_free(maps);
+ g_strfreev(fields);
}
+ g_strfreev(lines);
+ g_free(maps);
- /* ensure the map data is in the same order we collected it */
- return g_slist_reverse(map_info);
+ return root;
}
/**
* free_self_maps:
- * @info: a GSlist
+ * @root: an interval tree
*
- * Free a list of MapInfo structures.
+ * Free a tree of MapInfo structures.
+ * Since we allocated each MapInfo in one chunk, we need not consider the
+ * contents and can simply free each RBNode.
*/
-static void free_info(gpointer data)
+
+static void free_rbnode(RBNode *n)
{
- MapInfo *e = (MapInfo *) data;
- g_free(e->dev);
- g_free(e->path);
- g_free(e);
+ if (n) {
+ free_rbnode(n->rb_left);
+ free_rbnode(n->rb_right);
+ g_free(n);
+ }
}
-void free_self_maps(GSList *info)
+void free_self_maps(IntervalTreeRoot *root)
{
- g_slist_free_full(info, &free_info);
+ if (root) {
+ free_rbnode(root->rb_root.rb_node);
+ g_free(root);
+ }
}