AVX512 support for xbzrle_encode_buffer

This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer function to accelerate xbzrle encoding speed. Runtime check of avx512 support and benchmark for this feature are added. Compared with C version of xbzrle_encode_buffer function, avx512 version can achieve 50%-70% performance improvement on benchmarking. In addition, if dirty data is randomly located in 4K page, the avx512 version can achieve almost 140% performance gain. Signed-off-by: ling xu <ling1.xu@intel.com> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com> Co-authored-by: Jun Jin <jun.i.jin@intel.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
author: ling xu <ling1.xu@intel.com> 2022-11-16 23:29:22 +0800
committer: Juan Quintela <quintela@redhat.com> 2023-02-11 16:51:09 +0100
commit: 04ffce137b6d85ab4e7687e54e4dffcef0a9ab99 (patch)
tree: 857d2986fa10dc683585f56c742038b413f9ae5e /migration/ram.c
parent: e26470501271adf22e4f37d218c2164884ae96fb (diff)
download: qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.zip
qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.gz
qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.bz2
1 files changed, 31 insertions, 3 deletions
diff --git a/migration/ram.c b/migration/ram.c
index 0890816..18ac68b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -83,6 +83,34 @@
 /* 0x80 is reserved in migration.h start with 0x100 next */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+}
+#endif
+
 XBZRLECacheStats xbzrle_counters;
 
 /* used by the search for pages to send */
@@ -806,9 +834,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
     /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
+    encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                            TARGET_PAGE_SIZE);
 
     /*
      * Update the cache contents, so that it corresponds to the data
author	ling xu <ling1.xu@intel.com>	2022-11-16 23:29:22 +0800
committer	Juan Quintela <quintela@redhat.com>	2023-02-11 16:51:09 +0100
commit	04ffce137b6d85ab4e7687e54e4dffcef0a9ab99 (patch)
tree	857d2986fa10dc683585f56c742038b413f9ae5e /migration/ram.c
parent	e26470501271adf22e4f37d218c2164884ae96fb (diff)
download	qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.zip qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.gz qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.bz2