aboutsummaryrefslogtreecommitdiff
path: root/migration/ram.c
diff options
context:
space:
mode:
authorling xu <ling1.xu@intel.com>2022-11-16 23:29:22 +0800
committerJuan Quintela <quintela@redhat.com>2023-02-11 16:51:09 +0100
commit04ffce137b6d85ab4e7687e54e4dffcef0a9ab99 (patch)
tree857d2986fa10dc683585f56c742038b413f9ae5e /migration/ram.c
parente26470501271adf22e4f37d218c2164884ae96fb (diff)
downloadqemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.zip
qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.gz
qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.bz2
AVX512 support for xbzrle_encode_buffer
This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer function to accelerate xbzrle encoding speed. Runtime check of avx512 support and benchmark for this feature are added. Compared with C version of xbzrle_encode_buffer function, avx512 version can achieve 50%-70% performance improvement on benchmarking. In addition, if dirty data is randomly located in 4K page, the avx512 version can achieve almost 140% performance gain. Signed-off-by: ling xu <ling1.xu@intel.com> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com> Co-authored-by: Jun Jin <jun.i.jin@intel.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'migration/ram.c')
-rw-r--r--migration/ram.c34
1 files changed, 31 insertions, 3 deletions
diff --git a/migration/ram.c b/migration/ram.c
index 0890816..18ac68b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -83,6 +83,34 @@
/* 0x80 is reserved in migration.h start with 0x100 next */
#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+ uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+ unsigned max = __get_cpuid_max(0, NULL);
+ int a, b, c, d;
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable. */
+ if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+ int bv;
+ __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+ __cpuid_count(7, 0, a, b, c, d);
+ /* 0xe6:
+ * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+ * and ZMM16-ZMM31 state are enabled by OS)
+ * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+ */
+ if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+ xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+ }
+ }
+ }
+}
+#endif
+
XBZRLECacheStats xbzrle_counters;
/* used by the search for pages to send */
@@ -806,9 +834,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
/* XBZRLE encoding (if there is no overflow) */
- encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
- TARGET_PAGE_SIZE);
+ encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+ TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+ TARGET_PAGE_SIZE);
/*
* Update the cache contents, so that it corresponds to the data