aboutsummaryrefslogtreecommitdiff
path: root/include/qemu
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2024-02-06 23:48:05 +0300
committerRichard Henderson <richard.henderson@linaro.org>2024-05-03 08:03:05 -0700
commitcbe3d5264631aa193fd2705820cbde6c5a602abb (patch)
tree8d7c64e91e3b1e7d595c0987a4453419e922ff70 /include/qemu
parentd018425c324704949c7f65230def9586e71f07f5 (diff)
downloadqemu-cbe3d5264631aa193fd2705820cbde6c5a602abb.zip
qemu-cbe3d5264631aa193fd2705820cbde6c5a602abb.tar.gz
qemu-cbe3d5264631aa193fd2705820cbde6c5a602abb.tar.bz2
util/bufferiszero: Reorganize for early test for acceleration
Test for length >= 256 inline, where is is often a constant. Before calling into the accelerated routine, sample three bytes from the buffer, which handles most non-zero buffers. Signed-off-by: Alexander Monakov <amonakov@ispras.ru> Signed-off-by: Mikhail Romanov <mmromanov@ispras.ru> Message-Id: <20240206204809.9859-3-amonakov@ispras.ru> [rth: Use __builtin_constant_p; move the indirect call out of line.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'include/qemu')
-rw-r--r--include/qemu/cutils.h32
1 files changed, 31 insertions, 1 deletions
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c927a..741dade 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -187,9 +187,39 @@ char *freq_to_str(uint64_t freq_hz);
/* used to print char* safely */
#define STR_OR_NULL(str) ((str) ? (str) : "null")
-bool buffer_is_zero(const void *buf, size_t len);
+/*
+ * Check if a buffer is all zeroes.
+ */
+
+bool buffer_is_zero_ool(const void *vbuf, size_t len);
+bool buffer_is_zero_ge256(const void *vbuf, size_t len);
bool test_buffer_is_zero_next_accel(void);
+static inline bool buffer_is_zero_sample3(const char *buf, size_t len)
+{
+ /*
+ * For any reasonably sized buffer, these three samples come from
+ * three different cachelines. In qemu-img usage, we find that
+ * each byte eliminates more than half of all buffer testing.
+ * It is therefore critical to performance that the byte tests
+ * short-circuit, so that we do not pull in additional cache lines.
+ * Do not "optimize" this to !(a | b | c).
+ */
+ return !buf[0] && !buf[len - 1] && !buf[len / 2];
+}
+
+#ifdef __OPTIMIZE__
+static inline bool buffer_is_zero(const void *buf, size_t len)
+{
+ return (__builtin_constant_p(len) && len >= 256
+ ? buffer_is_zero_sample3(buf, len) &&
+ buffer_is_zero_ge256(buf, len)
+ : buffer_is_zero_ool(buf, len));
+}
+#else
+#define buffer_is_zero buffer_is_zero_ool
+#endif
+
/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers