diff options
author | ling xu <ling1.xu@intel.com> | 2022-11-16 23:29:22 +0800 |
---|---|---|
committer | Juan Quintela <quintela@redhat.com> | 2023-02-11 16:51:09 +0100 |
commit | 04ffce137b6d85ab4e7687e54e4dffcef0a9ab99 (patch) | |
tree | 857d2986fa10dc683585f56c742038b413f9ae5e /migration/ram.c | |
parent | e26470501271adf22e4f37d218c2164884ae96fb (diff) | |
download | qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.zip qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.gz qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.bz2 |
AVX512 support for xbzrle_encode_buffer
This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer
function to accelerate xbzrle encoding speed. Runtime check of avx512
support and benchmark for this feature are added. Compared with C
version of xbzrle_encode_buffer function, avx512 version can achieve
50%-70% performance improvement on benchmarking. In addition, if dirty
data is randomly located in 4K page, the avx512 version can achieve
almost 140% performance gain.
Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'migration/ram.c')
-rw-r--r-- | migration/ram.c | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/migration/ram.c b/migration/ram.c index 0890816..18ac68b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -83,6 +83,34 @@ /* 0x80 is reserved in migration.h start with 0x100 next */ #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 +int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, + uint8_t *, int) = xbzrle_encode_buffer; +#if defined(CONFIG_AVX512BW_OPT) +#include "qemu/cpuid.h" +static void __attribute__((constructor)) init_cpu_flag(void) +{ + unsigned max = __get_cpuid_max(0, NULL); + int a, b, c, d; + if (max >= 1) { + __cpuid(1, a, b, c, d); + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) { + int bv; + __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0)); + __cpuid_count(7, 0, a, b, c, d); + /* 0xe6: + * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 + * and ZMM16-ZMM31 state are enabled by OS) + * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS) + */ + if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) { + xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512; + } + } + } +} +#endif + XBZRLECacheStats xbzrle_counters; /* used by the search for pages to send */ @@ -806,9 +834,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); /* XBZRLE encoding (if there is no overflow) */ - encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, - TARGET_PAGE_SIZE, XBZRLE.encoded_buf, - TARGET_PAGE_SIZE); + encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); /* * Update the cache contents, so that it corresponds to the data |