diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-05-09 17:21:39 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-05-09 17:21:39 +0100 |
commit | 577e648bdb524d1984659baf1bd6165de2edae83 (patch) | |
tree | 7c5e59e610be8da6f3ffca922824687caebd0592 | |
parent | 271477b59e723250f17a7e20f139262057921b6a (diff) | |
parent | ef709860ea12ec59c4cd7373bd2fd7a4e50143ee (diff) | |
download | qemu-577e648bdb524d1984659baf1bd6165de2edae83.zip qemu-577e648bdb524d1984659baf1bd6165de2edae83.tar.gz qemu-577e648bdb524d1984659baf1bd6165de2edae83.tar.bz2 |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* target/i386: improved EPYC models
* more removal of mb_read/mb_set
* bump _WIN32_WINNT to the Windows 8 API
* fix for modular builds with --disable-system
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmRZK7wUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroObngf8D6A5l1QQAnImRrZAny6HZV/9xseD
# 9QhkUW3fxXlUhb8tXomv2BlT8h9GzLIN6aWvcCotT+xK3kAX7mRcYKgPMr9CYL7y
# vev/hh+B6RY1CJ/xPT09/BMVjkj50AL0O/OuWMhcQ5nCO7F2sdMjMrsYqqeZcjYf
# zx9RTX7gVGt+wWFHxgCgdfL0kfgzexK55YuZU0vLzcA+pYsZWoEfW+fKBIf4rzDV
# r9M6mDBUkHBQ0rIVC3QFloAXnYb1JrpeqqL2i2qwhAkLz8LyGqk3lZF20hE/04im
# XZcZjWO5pxAxIEPeTken+2x1n8tn2BLkMtvwJdV5TpvICCFRtPZlbH79qw==
# =rXLN
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 08 May 2023 06:05:00 PM BST
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
meson: leave unnecessary modules out of the build
docs: clarify --without-default-devices
target/i386: Add EPYC-Genoa model to support Zen 4 processor series
target/i386: Add VNMI and automatic IBRS feature bits
target/i386: Add missing feature bits in EPYC-Milan model
target/i386: Add feature bits for CPUID_Fn80000021_EAX
target/i386: Add a couple of feature bits in 8000_0008_EBX
target/i386: Add new EPYC CPU versions with updated cache_info
target/i386: allow versioned CPUs to specify new cache_info
include/qemu/osdep.h: Bump _WIN32_WINNT to the Windows 8 API
MAINTAINERS: add stanza for Kconfig files
tb-maint: do not use mb_read/mb_set
call_rcu: stop using mb_set/mb_read
test-aio-multithread: simplify test_multi_co_schedule
test-aio-multithread: do not use mb_read/mb_set for simple flags
rcu: remove qatomic_mb_set, expand comments
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | MAINTAINERS | 10 | ||||
-rw-r--r-- | accel/tcg/tb-maint.c | 4 | ||||
-rw-r--r-- | docs/devel/kconfig.rst | 16 | ||||
-rw-r--r-- | include/qemu/osdep.h | 2 | ||||
-rw-r--r-- | include/qemu/rcu.h | 5 | ||||
-rw-r--r-- | meson.build | 4 | ||||
-rw-r--r-- | target/i386/cpu.c | 375 | ||||
-rw-r--r-- | target/i386/cpu.h | 15 | ||||
-rw-r--r-- | tests/unit/test-aio-multithread.c | 30 | ||||
-rw-r--r-- | util/rcu.c | 69 |
10 files changed, 479 insertions, 51 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 55102f4..f757369 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3910,6 +3910,16 @@ F: configure F: scripts/mtest2make.py F: tests/Makefile.include +Kconfig +M: Paolo Bonzini <pbonzini@redhat.com> +S: Maintained +F: scripts/minikconf.py +F: docs/devel/kconfig.rst +F: Kconfig* +F: */Kconfig* +F: hw/*/Kconfig* +F: target/*/Kconfig* + GIT submodules M: Daniel P. Berrange <berrange@redhat.com> S: Odd Fixes diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index cb1f806..0dd173f 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -746,7 +746,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) tcg_region_reset_all(); /* XXX: flush processor icache at this point if cache flush is expensive */ - qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); + qatomic_inc(&tb_ctx.tb_flush_count); done: mmap_unlock(); @@ -758,7 +758,7 @@ done: void tb_flush(CPUState *cpu) { if (tcg_enabled()) { - unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count); + unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count); if (cpu_in_exclusive_context(cpu)) { do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index ac9453e..e3a544e 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -282,9 +282,19 @@ want to change some lines in the first group, for example like this:: CONFIG_PCI_DEVICES=y #CONFIG_TEST_DEVICES=n -and/or pick a subset of the devices in those device groups. Right now -there is no single place that lists all the optional devices for -``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, +and/or pick a subset of the devices in those device groups. Without +further modifications to ``configs/devices/``, a system emulator built +without default devices might not do much more than start an empty +machine, and even then only if ``--nodefaults`` is specified on the +command line. Starting a VM *without* ``--nodefaults`` is allowed to +fail, but should never abort. Failures in ``make check`` with +``--without-default-devices`` are considered bugs in the test code: +the tests should either use ``--nodefaults``, and should be skipped +if a necessary device is not present in the build. Such failures +should not be worked around with ``select`` directives. + +Right now there is no single place that lists all the optional devices +for ``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, we expect that ``.mak`` files will be automatically generated, so that they will include all these symbols and some help text on what they do. diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9eff0be..cc61b00 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int); #ifdef _WIN32 /* as defined in sdkddkver.h */ #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */ +#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be >= the one from glib) */ #endif /* reduces the number of implicitly included headers */ #ifndef WIN32_LEAN_AND_MEAN diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 313fc41..661c1a1 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -87,7 +87,10 @@ static inline void rcu_read_lock(void) ctr = qatomic_read(&rcu_gp_ctr); qatomic_set(&p_rcu_reader->ctr, ctr); - /* Write p_rcu_reader->ctr before reading RCU-protected pointers. */ + /* + * Read rcu_gp_ptr and write p_rcu_reader->ctr before reading + * RCU-protected pointers. + */ smp_mb_placeholder(); } diff --git a/meson.build b/meson.build index 229eb58..27782f8 100644 --- a/meson.build +++ b/meson.build @@ -3217,6 +3217,10 @@ modinfo_files = [] block_mods = [] softmmu_mods = [] foreach d, list : modules + if not (d == 'block' ? have_block : have_system) + continue + endif + foreach m, module_ss : list if enable_modules and targetos != 'windows' module_ss = module_ss.apply(config_all, strict: false) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 823320f..4187759 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -809,7 +809,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "pfthreshold", "avic", NULL, "v-vmsave-vmload", "vgif", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "vnmi", NULL, NULL, "svme-addr-chk", NULL, NULL, NULL, }, .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, @@ -933,15 +933,31 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, "wbnoinvd", NULL, NULL, "ibpb", NULL, "ibrs", "amd-stibp", - NULL, NULL, NULL, NULL, + NULL, "stibp-always-on", NULL, NULL, NULL, NULL, NULL, NULL, "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, - NULL, NULL, NULL, NULL, + "amd-psfd", NULL, NULL, NULL, }, .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, + "auto-ibrs", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_XSAVE] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1620,6 +1636,7 @@ typedef struct X86CPUVersionDefinition { const char *alias; const char *note; PropValue *props; + const CPUCaches *const cache_info; } X86CPUVersionDefinition; /* Base definition for a CPU model */ @@ -1728,6 +1745,56 @@ static const CPUCaches epyc_cache_info = { }, }; +static CPUCaches epyc_v4_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -1778,6 +1845,56 @@ static const CPUCaches epyc_rome_cache_info = { }, }; +static const CPUCaches epyc_rome_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -1828,6 +1945,106 @@ static const CPUCaches epyc_milan_cache_info = { }, }; +static const CPUCaches epyc_milan_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + +static const CPUCaches epyc_genoa_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -4112,6 +4329,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-v4 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v4_cache_info + }, { /* end of list */ } } }, @@ -4231,6 +4457,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Rome-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v3_cache_info + }, { /* end of list */ } } }, @@ -4288,6 +4523,98 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000001E, .model_id = "AMD EPYC-Milan Processor", .cache_info = &epyc_milan_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Milan-v2 Processor" }, + { "vaes", "on" }, + { "vpclmulqdq", "on" }, + { "stibp-always-on", "on" }, + { "amd-psfd", "on" }, + { "no-nested-data-bp", "on" }, + { "lfence-always-serializing", "on" }, + { "null-sel-clr-base", "on" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v2_cache_info + }, + { /* end of list */ } + } + }, + { + .name = "EPYC-Genoa", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 25, + .model = 17, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_No_NESTED_DATA_BP | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | + CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Genoa Processor", + .cache_info = &epyc_genoa_cache_info, }, }; @@ -5225,6 +5552,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) assert(vdef->version == version); } +static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, + X86CPUModel *model) +{ + const X86CPUVersionDefinition *vdef; + X86CPUVersion version = x86_cpu_model_resolve_version(model); + const CPUCaches *cache_info = model->cpudef->cache_info; + + if (version == CPU_VERSION_LEGACY) { + return cache_info; + } + + for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { + if (vdef->cache_info) { + cache_info = vdef->cache_info; + } + + if (vdef->version == version) { + break; + } + } + + assert(vdef->version == version); + return cache_info; +} + /* * Load data from X86CPUDefinition into a X86CPU object. * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. @@ -5257,7 +5609,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) } /* legacy-cache defaults to 'off' if CPU model provides cache info */ - cpu->legacy_cache = !def->cache_info; + cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; @@ -6024,6 +6376,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } break; + case 0x80000021: + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = *ecx = *edx = 0; + break; default: /* reserved values: zero */ *eax = 0; @@ -6453,6 +6809,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); } + if (env->features[FEAT_8000_0021_EAX]) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); + } + /* SGX requires CPUID[0x12] for EPC enumeration */ if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); @@ -6736,14 +7096,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) /* Cache information initialization */ if (!cpu->legacy_cache) { - if (!xcc->model || !xcc->model->cpudef->cache_info) { + const CPUCaches *cache_info = + x86_cpu_get_versioned_cache_info(cpu, xcc->model); + + if (!xcc->model || !cache_info) { g_autofree char *name = x86_cpu_class_get_model_name(xcc); error_setg(errp, "CPU model '%s' doesn't support legacy-cache=off", name); return; } env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *xcc->model->cpudef->cache_info; + *cache_info; } else { /* Build legacy cache information */ env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8504aaa..8ade71a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -600,6 +600,7 @@ typedef enum FeatureWord { FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ + FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ @@ -773,6 +774,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_SVM_AVIC (1U << 13) #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) #define CPUID_SVM_VGIF (1U << 16) +#define CPUID_SVM_VNMI (1U << 25) #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ @@ -946,8 +948,21 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_8000_0008_EBX_IBRS (1U << 14) /* Single Thread Indirect Branch Predictors */ #define CPUID_8000_0008_EBX_STIBP (1U << 15) +/* STIBP mode has enhanced performance and may be left always on */ +#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) /* Speculative Store Bypass Disable */ #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Predictive Store Forwarding Disable */ +#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + +/* Processor ignores nested data breakpoints */ +#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) +/* LFENCE is always serializing */ +#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Null Selector Clears Base */ +#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) +/* Automatic IBRS */ +#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c index a555cc8..80c5d4e 100644 --- a/tests/unit/test-aio-multithread.c +++ b/tests/unit/test-aio-multithread.c @@ -107,8 +107,7 @@ static void test_lifecycle(void) /* aio_co_schedule test. */ static Coroutine *to_schedule[NUM_CONTEXTS]; - -static bool now_stopping; +static bool stop[NUM_CONTEXTS]; static int count_retry; static int count_here; @@ -136,6 +135,7 @@ static bool schedule_next(int n) static void finish_cb(void *opaque) { + stop[id] = true; schedule_next(id); } @@ -143,13 +143,19 @@ static coroutine_fn void test_multi_co_schedule_entry(void *opaque) { g_assert(to_schedule[id] == NULL); - while (!qatomic_mb_read(&now_stopping)) { + /* + * The next iteration will set to_schedule[id] again, but once finish_cb + * is scheduled there is no guarantee that it will actually be woken up, + * so at that point it must not go to sleep. + */ + while (!stop[id]) { int n; n = g_test_rand_int_range(0, NUM_CONTEXTS); schedule_next(n); qatomic_mb_set(&to_schedule[id], qemu_coroutine_self()); + /* finish_cb can run here. */ qemu_coroutine_yield(); g_assert(to_schedule[id] == NULL); } @@ -161,7 +167,6 @@ static void test_multi_co_schedule(int seconds) int i; count_here = count_other = count_retry = 0; - now_stopping = false; create_aio_contexts(); for (i = 0; i < NUM_CONTEXTS; i++) { @@ -171,10 +176,10 @@ static void test_multi_co_schedule(int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + /* Guarantee that each AioContext is woken up from its last wait. */ for (i = 0; i < NUM_CONTEXTS; i++) { ctx_run(i, finish_cb, NULL); - to_schedule[i] = NULL; + g_assert(to_schedule[i] == NULL); } join_aio_contexts(); @@ -199,10 +204,11 @@ static uint32_t atomic_counter; static uint32_t running; static uint32_t counter; static CoMutex comutex; +static bool now_stopping; static void coroutine_fn test_multi_co_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_co_mutex_lock(&comutex); counter++; qemu_co_mutex_unlock(&comutex); @@ -236,7 +242,7 @@ static void test_multi_co_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -327,7 +333,7 @@ static void mcs_mutex_unlock(void) static void test_multi_fair_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { mcs_mutex_lock(); counter++; mcs_mutex_unlock(); @@ -355,7 +361,7 @@ static void test_multi_fair_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -383,7 +389,7 @@ static QemuMutex mutex; static void test_multi_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_mutex_lock(&mutex); counter++; qemu_mutex_unlock(&mutex); @@ -411,7 +417,7 @@ static void test_multi_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -83,12 +83,6 @@ static void wait_for_readers(void) */ qemu_event_reset(&rcu_gp_event); - /* Instead of using qatomic_mb_set for index->waiting, and - * qatomic_mb_read for index->ctr, memory barriers are placed - * manually since writes to different threads are independent. - * qemu_event_reset has acquire semantics, so no memory barrier - * is needed here. - */ QLIST_FOREACH(index, ®istry, node) { qatomic_set(&index->waiting, true); } @@ -96,6 +90,10 @@ static void wait_for_readers(void) /* Here, order the stores to index->waiting before the loads of * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(), * ensuring that the loads of index->ctr are sequentially consistent. + * + * If this is the last iteration, this barrier also prevents + * frees from seeping upwards, and orders the two wait phases + * on architectures with 32-bit longs; see synchronize_rcu(). */ smp_mb_global(); @@ -104,7 +102,7 @@ static void wait_for_readers(void) QLIST_REMOVE(index, node); QLIST_INSERT_HEAD(&qsreaders, index, node); - /* No need for mb_set here, worst of all we + /* No need for memory barriers here, worst of all we * get some extra futex wakeups. */ qatomic_set(&index->waiting, false); @@ -149,26 +147,26 @@ void synchronize_rcu(void) /* Write RCU-protected pointers before reading p_rcu_reader->ctr. * Pairs with smp_mb_placeholder() in rcu_read_lock(). + * + * Also orders write to RCU-protected pointers before + * write to rcu_gp_ctr. */ smp_mb_global(); QEMU_LOCK_GUARD(&rcu_registry_lock); if (!QLIST_EMPTY(®istry)) { - /* In either case, the qatomic_mb_set below blocks stores that free - * old RCU-protected pointers. - */ if (sizeof(rcu_gp_ctr) < 8) { /* For architectures with 32-bit longs, a two-subphases algorithm * ensures we do not encounter overflow bugs. * * Switch parity: 0 -> 1, 1 -> 0. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); wait_for_readers(); - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); } else { /* Increment current grace period. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); } wait_for_readers(); @@ -191,8 +189,22 @@ static void enqueue(struct rcu_head *node) struct rcu_head **old_tail; node->next = NULL; + + /* + * Make this node the tail of the list. The node will be + * used by further enqueue operations, but it will not + * be dequeued yet... + */ old_tail = qatomic_xchg(&tail, &node->next); - qatomic_mb_set(old_tail, node); + + /* + * ... until it is pointed to from another item in the list. + * In the meantime, try_dequeue() will find a NULL next pointer + * and loop. + * + * Synchronizes with qatomic_load_acquire() in try_dequeue(). + */ + qatomic_store_release(old_tail, node); } static struct rcu_head *try_dequeue(void) @@ -200,26 +212,31 @@ static struct rcu_head *try_dequeue(void) struct rcu_head *node, *next; retry: - /* Test for an empty list, which we do not expect. Note that for + /* Head is only written by this thread, so no need for barriers. */ + node = head; + + /* + * If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + next = qatomic_load_acquire(&node->next); + if (!next) { + return NULL; + } + + /* + * Test for an empty list, which we do not expect. Note that for * the consumer head and tail are always consistent. The head * is consistent because only the consumer reads/writes it. * The tail, because it is the first step in the enqueuing. * It is only the next pointers that might be inconsistent. */ - if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { + if (head == &dummy && qatomic_read(&tail) == &dummy.next) { abort(); } - /* If the head node has NULL in its next pointer, the value is - * wrong and we need to wait until its enqueuer finishes the update. - */ - node = head; - next = qatomic_mb_read(&head->next); - if (!next) { - return NULL; - } - - /* Since we are the sole consumer, and we excluded the empty case + /* + * Since we are the sole consumer, and we excluded the empty case * above, the queue will always have at least two nodes: the * dummy node, and the one being removed. So we do not need to update * the tail pointer. |