aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-05-03 08:13:51 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-05-03 08:13:51 -0700
commit909aff7eaf6335aeeb4962fb0ac2a6c571c96af2 (patch)
tree6da740ce80908710c400399e25b784660d59386c /include
parent4977ce198d2390bff8c71ad5cb1a5f6aa24b56fb (diff)
parenta06d9eddb015a9f5895161b0a3958a2e4be21579 (diff)
downloadqemu-909aff7eaf6335aeeb4962fb0ac2a6c571c96af2.zip
qemu-909aff7eaf6335aeeb4962fb0ac2a6c571c96af2.tar.gz
qemu-909aff7eaf6335aeeb4962fb0ac2a6c571c96af2.tar.bz2
Merge tag 'pull-misc-20240503' of https://gitlab.com/rth7680/qemu into staging
util/bufferiszero: - Remove sse4.1 and avx512 variants - Reorganize for early test for acceleration - Remove useless prefetches - Optimize sse2, avx2 and integer variants - Add simd acceleration for aarch64 - Add bufferiszero-bench # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY0/qMdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+ULQf/T2JSdvG6/EjDCf4N # cnSGiUV2MIeByw8tkrc/fWCNdlulHhk9gbg9l+f2muwK8H/k2BdynbrQnt1Ymmtk # xzM6+PNOcByaovSAkvNweZVbrQX36Yih9S7f3n+xcxfVuvvYhKSLHXLkeqO96LMd # rN+WRpxhReaU3n8/FO7o3S26SRpk7X9kRfShaT7U7ytHGjGsXUvMKIRs30hbsJTB # yjed0a0u54FoSlN6AEqjWdgzaWP8nT65+8Yxe3dzB9hx09UiolZo60eHqYy7Mkno # N6aMOB6gUUbCiKZ3Qk+1zEX97vl26NH3zt5tIIJTWDoIkC3f9qbg1x5hwWLQ3rra # rM8h8w== # =DnZO # -----END PGP SIGNATURE----- # gpg: Signature made Fri 03 May 2024 08:11:31 AM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-misc-20240503' of https://gitlab.com/rth7680/qemu: tests/bench: Add bufferiszero-bench util/bufferiszero: Add simd acceleration for aarch64 util/bufferiszero: Simplify test_buffer_is_zero_next_accel util/bufferiszero: Introduce biz_accel_fn typedef util/bufferiszero: Improve scalar variant util/bufferiszero: Optimize SSE2 and AVX2 variants util/bufferiszero: Remove useless prefetches util/bufferiszero: Reorganize for early test for acceleration util/bufferiszero: Remove AVX512 variant util/bufferiszero: Remove SSE4.1 variant Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'include')
-rw-r--r--include/qemu/cutils.h32
1 files changed, 31 insertions, 1 deletions
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c927a..741dade 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -187,9 +187,39 @@ char *freq_to_str(uint64_t freq_hz);
/* used to print char* safely */
#define STR_OR_NULL(str) ((str) ? (str) : "null")
-bool buffer_is_zero(const void *buf, size_t len);
+/*
+ * Check if a buffer is all zeroes.
+ */
+
+bool buffer_is_zero_ool(const void *vbuf, size_t len);
+bool buffer_is_zero_ge256(const void *vbuf, size_t len);
bool test_buffer_is_zero_next_accel(void);
+static inline bool buffer_is_zero_sample3(const char *buf, size_t len)
+{
+ /*
+ * For any reasonably sized buffer, these three samples come from
+ * three different cachelines. In qemu-img usage, we find that
+ * each byte eliminates more than half of all buffer testing.
+ * It is therefore critical to performance that the byte tests
+ * short-circuit, so that we do not pull in additional cache lines.
+ * Do not "optimize" this to !(a | b | c).
+ */
+ return !buf[0] && !buf[len - 1] && !buf[len / 2];
+}
+
+#ifdef __OPTIMIZE__
+static inline bool buffer_is_zero(const void *buf, size_t len)
+{
+ return (__builtin_constant_p(len) && len >= 256
+ ? buffer_is_zero_sample3(buf, len) &&
+ buffer_is_zero_ge256(buf, len)
+ : buffer_is_zero_ool(buf, len));
+}
+#else
+#define buffer_is_zero buffer_is_zero_ool
+#endif
+
/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers