aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2022-10-05 10:17:02 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2022-10-05 10:17:02 -0400
commit4a9c04672a875ed00ea807ea4d552c01f6440bc7 (patch)
tree5ae0c7c85d114c608cf6018b8dc50f4feaaa83b8 /include
parentfafd35a6dab8e70a7c395aaa8e1273267cf9f3c8 (diff)
parentab419fd8a035a65942de4e63effcd55ccbf1a9fe (diff)
downloadqemu-4a9c04672a875ed00ea807ea4d552c01f6440bc7.zip
qemu-4a9c04672a875ed00ea807ea4d552c01f6440bc7.tar.gz
qemu-4a9c04672a875ed00ea807ea4d552c01f6440bc7.tar.bz2
Merge tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu into staging
Cache CPUClass for use in hot code paths. Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full. Add generic support for TARGET_TB_PCREL. tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07 target/sh4: Fix TB_FLAG_UNALIGN # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmM8jXEdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/oEggArAHK8FtydfQ4ZwnF # SjXfpdP50OC0SZn3uBN93FZOrxz9UYG9t1oDHs39J/+b/u2nwJYch//EH2k+NtOW # hc3iIgS9bWgs/UWZESkViKQccw7gpYlc21Br38WWwFNEFyecX0p+e9pJgld5rSv1 # mRGvCs5J2svH2tcXl/Sb/JWgcumOJoG7qy2aLyJGolR6UOfwcfFMzQXzq8qjpRKH # Jh84qusE/rLbzBsdN6snJY4+dyvUo03lT5IJ4d+FQg2tUip+Qqt7pnMbsqq6qF6H # R6fWU1JTbsh7GxXJwQJ83jLBnUsi8cy6FKrZ3jyiBq76+DIpR0PqoEe+PN/weInU # TN0z4g== # =RfXJ # -----END PGP SIGNATURE----- # gpg: Signature made Tue 04 Oct 2022 15:45:53 EDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu: target/sh4: Fix TB_FLAG_UNALIGN tcg/ppc: Optimize 26-bit jumps accel/tcg: Introduce TARGET_TB_PCREL accel/tcg: Introduce tb_pc and log_pc hw/core: Add CPUClass.get_pc include/hw/core: Create struct CPUJumpCache accel/tcg: Inline tb_flush_jmp_cache accel/tcg: Do not align tb->page_addr[0] accel/tcg: Use DisasContextBase in plugin_gen_tb_start accel/tcg: Use bool for page_find_alloc accel/tcg: Remove PageDesc code_bitmap include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA accel/tcg: Introduce tlb_set_page_full accel/tcg: Introduce probe_access_full accel/tcg: Suppress auto-invalidate in probe_access_internal accel/tcg: Drop addr member from SavedIOTLB accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull cputlb: used cached CPUClass in our hot-paths hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs cpu: cache CPUClass in CPUState for hot code paths Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/exec/cpu-common.h1
-rw-r--r--include/exec/cpu-defs.h48
-rw-r--r--include/exec/exec-all.h75
-rw-r--r--include/exec/plugin-gen.h7
-rw-r--r--include/hw/core/cpu.h28
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/tcg/tcg.h2
7 files changed, 131 insertions, 32 deletions
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index d909429..c493510 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -38,6 +38,7 @@ void cpu_list_unlock(void);
unsigned int cpu_list_generation_id_get(void);
void tcg_flush_softmmu_tlb(CPUState *cs);
+void tcg_flush_jmp_cache(CPUState *cs);
void tcg_iommu_init_notifier_list(CPUState *cpu);
void tcg_iommu_free_notifier_list(CPUState *cpu);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index ba3cd32..21309cf 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -54,6 +54,9 @@
# error TARGET_PAGE_BITS must be defined in cpu-param.h
# endif
#endif
+#ifndef TARGET_TB_PCREL
+# define TARGET_TB_PCREL 0
+#endif
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
@@ -108,6 +111,7 @@ typedef uint64_t target_ulong;
# endif
# endif
+/* Minimalized TLB entry for use by TCG fast path. */
typedef struct CPUTLBEntry {
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
@@ -131,14 +135,14 @@ typedef struct CPUTLBEntry {
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
-/* The IOTLB is not accessed directly inline by generated TCG code,
- * so the CPUIOTLBEntry layout is not as critical as that of the
- * CPUTLBEntry. (This is also why we don't want to combine the two
- * structs into one.)
+/*
+ * The full TLB entry, which is not accessed by generated TCG code,
+ * so the layout is not as critical as that of CPUTLBEntry. This is
+ * also why we don't want to combine the two structs.
*/
-typedef struct CPUIOTLBEntry {
+typedef struct CPUTLBEntryFull {
/*
- * @addr contains:
+ * @xlat_section contains:
* - in the lower TARGET_PAGE_BITS, a physical section number
* - with the lower TARGET_PAGE_BITS masked off, an offset which
* must be added to the virtual address to obtain:
@@ -146,9 +150,32 @@ typedef struct CPUIOTLBEntry {
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
* + the offset within the target MemoryRegion (otherwise)
*/
- hwaddr addr;
+ hwaddr xlat_section;
+
+ /*
+ * @phys_addr contains the physical address in the address space
+ * given by cpu_asidx_from_attrs(cpu, @attrs).
+ */
+ hwaddr phys_addr;
+
+ /* @attrs contains the memory transaction attributes for the page. */
MemTxAttrs attrs;
-} CPUIOTLBEntry;
+
+ /* @prot contains the complete protections for the page. */
+ uint8_t prot;
+
+ /* @lg_page_size contains the log2 of the page size. */
+ uint8_t lg_page_size;
+
+ /*
+ * Allow target-specific additions to this structure.
+ * This may be used to cache items from the guest cpu
+ * page tables for later use by the implementation.
+ */
+#ifdef TARGET_PAGE_ENTRY_EXTRA
+ TARGET_PAGE_ENTRY_EXTRA
+#endif
+} CPUTLBEntryFull;
/*
* Data elements that are per MMU mode, minus the bits accessed by
@@ -172,9 +199,8 @@ typedef struct CPUTLBDesc {
size_t vindex;
/* The tlb victim table, in two parts. */
CPUTLBEntry vtable[CPU_VTLB_SIZE];
- CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
- /* The iotlb. */
- CPUIOTLBEntry *iotlb;
+ CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
+ CPUTLBEntryFull *fulltlb;
} CPUTLBDesc;
/*
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index bcad607..e5f8b22 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -258,6 +258,28 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
unsigned bits);
/**
+ * tlb_set_page_full:
+ * @cpu: CPU context
+ * @mmu_idx: mmu index of the tlb to modify
+ * @vaddr: virtual address of the entry to add
+ * @full: the details of the tlb entry
+ *
+ * Add an entry to @cpu tlb index @mmu_idx. All of the fields of
+ * @full must be filled, except for xlat_section, and constitute
+ * the complete description of the translated page.
+ *
+ * This is generally called by the target tlb_fill function after
+ * having performed a successful page table walk to find the physical
+ * address and attributes for the translation.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
+ CPUTLBEntryFull *full);
+
+/**
* tlb_set_page_with_attrs:
* @cpu: CPU to add this TLB entry for
* @vaddr: virtual address of page to add entry for
@@ -434,6 +456,21 @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr);
+#ifndef CONFIG_USER_ONLY
+/**
+ * probe_access_full:
+ * Like probe_access_flags, except also return into @pfull.
+ *
+ * The CPUTLBEntryFull structure returned via @pfull is transient
+ * and must be consumed or copied immediately, before any further
+ * access or changes to TLB @mmu_idx.
+ */
+int probe_access_full(CPUArchState *env, target_ulong addr,
+ MMUAccessType access_type, int mmu_idx,
+ bool nonfault, void **phost,
+ CPUTLBEntryFull **pfull, uintptr_t retaddr);
+#endif
+
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
/* Estimated block size for TB allocation. */
@@ -459,8 +496,32 @@ struct tb_tc {
};
struct TranslationBlock {
- target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
- target_ulong cs_base; /* CS base for this block */
+#if !TARGET_TB_PCREL
+ /*
+ * Guest PC corresponding to this block. This must be the true
+ * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
+ * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
+ * privilege, must store those bits elsewhere.
+ *
+ * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
+ * written such that the TB is associated only with the physical
+ * page and may be run in any virtual address context. In this case,
+ * PC must always be taken from ENV in a target-specific manner.
+ * Unwind information is taken as offsets from the page, to be
+ * deposited into the "current" PC.
+ */
+ target_ulong pc;
+#endif
+
+ /*
+ * Target-specific data associated with the TranslationBlock, e.g.:
+ * x86: the original user, the Code Segment virtual base,
+ * arm: an extension of tb->flags,
+ * s390x: instruction data for EXECUTE,
+ * sparc: the next pc of the instruction queue (for delay slots).
+ */
+ target_ulong cs_base;
+
uint32_t flags; /* flags defining in which context the code was generated */
uint32_t cflags; /* compile flags */
@@ -533,6 +594,16 @@ struct TranslationBlock {
uintptr_t jmp_dest[2];
};
+/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
+static inline target_ulong tb_pc(const TranslationBlock *tb)
+{
+#if TARGET_TB_PCREL
+ qemu_build_not_reached();
+#else
+ return tb->pc;
+#endif
+}
+
/* Hide the qatomic_read to make code a little easier on the eyes */
static inline uint32_t tb_cflags(const TranslationBlock *tb)
{
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
index f92f169..5004728 100644
--- a/include/exec/plugin-gen.h
+++ b/include/exec/plugin-gen.h
@@ -19,7 +19,8 @@ struct DisasContextBase;
#ifdef CONFIG_PLUGIN
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
+ bool supress);
void plugin_gen_tb_end(CPUState *cpu);
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
void plugin_gen_insn_end(void);
@@ -48,8 +49,8 @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
#else /* !CONFIG_PLUGIN */
-static inline
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
+static inline bool
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
{
return false;
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 500503d..f9b5877 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -51,6 +51,13 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
*/
#define CPU(obj) ((CPUState *)(obj))
+/*
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
+ * expensive given the eventual call to
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
+ * has a cached value for the class in cs->cc which is set up in
+ * cpu_exec_realizefn() for use in hot code paths.
+ */
typedef struct CPUClass CPUClass;
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
TYPE_CPU)
@@ -108,6 +115,8 @@ struct SysemuCPUOps;
* If the target behaviour here is anything other than "set
* the PC register to the value passed in" then the target must
* also implement the synchronize_from_tb hook.
+ * @get_pc: Callback for getting the Program Counter register.
+ * As above, with the semantics of the target architecture.
* @gdb_read_register: Callback for letting GDB read a register.
* @gdb_write_register: Callback for letting GDB write a register.
* @gdb_adjust_breakpoint: Callback for adjusting the address of a
@@ -144,6 +153,7 @@ struct CPUClass {
void (*dump_state)(CPUState *cpu, FILE *, int flags);
int64_t (*get_arch_id)(CPUState *cpu);
void (*set_pc)(CPUState *cpu, vaddr value);
+ vaddr (*get_pc)(CPUState *cpu);
int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
@@ -218,7 +228,6 @@ struct CPUWatchpoint {
* the memory regions get moved around by io_writex.
*/
typedef struct SavedIOTLB {
- hwaddr addr;
MemoryRegionSection *section;
hwaddr mr_offset;
} SavedIOTLB;
@@ -230,9 +239,6 @@ struct kvm_run;
struct hax_vcpu_state;
struct hvf_vcpu_state;
-#define TB_JMP_CACHE_BITS 12
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
-
/* work queue */
/* The union type allows passing of 64 bit target pointers on 32 bit
@@ -317,6 +323,8 @@ struct qemu_work_item;
struct CPUState {
/*< private >*/
DeviceState parent_obj;
+ /* cache to avoid expensive CPU_GET_CLASS */
+ CPUClass *cc;
/*< public >*/
int nr_cores;
@@ -361,8 +369,7 @@ struct CPUState {
CPUArchState *env_ptr;
IcountDecr *icount_decr_ptr;
- /* Accessed in parallel; all accesses must be atomic */
- TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+ CPUJumpCache *tb_jmp_cache;
struct GDBRegisterState *gdb_regs;
int gdb_num_regs;
@@ -448,15 +455,6 @@ extern CPUTailQ cpus;
extern __thread CPUState *current_cpu;
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
-{
- unsigned int i;
-
- for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
- qatomic_set(&cpu->tb_jmp_cache[i], NULL);
- }
-}
-
/**
* qemu_tcg_mttcg_enabled:
* Check whether we are running MultiThread TCG or not.
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 42f4ceb..5f95169 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -41,7 +41,9 @@ typedef struct CoMutex CoMutex;
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
typedef struct CPUAddressSpace CPUAddressSpace;
typedef struct CPUArchState CPUArchState;
+typedef struct CPUJumpCache CPUJumpCache;
typedef struct CPUState CPUState;
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
typedef struct DeviceListener DeviceListener;
typedef struct DeviceState DeviceState;
typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 26a7052..d84bae6 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -840,7 +840,7 @@ void tcg_register_thread(void);
void tcg_prologue_init(TCGContext *s);
void tcg_func_start(TCGContext *s);
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);