aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Brook <paul@nowt.org>2022-04-24 23:01:39 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2022-09-01 20:16:33 +0200
commit6567ffb4f259d9937ff74f21e96cdac905440620 (patch)
treea60ddfdb920044798a5ceb51b9eb0255d4cb0b61
parent6f218d6e994bd8b229d6522899b6ac6cd98bdb47 (diff)
downloadqemu-6567ffb4f259d9937ff74f21e96cdac905440620.zip
qemu-6567ffb4f259d9937ff74f21e96cdac905440620.tar.gz
qemu-6567ffb4f259d9937ff74f21e96cdac905440620.tar.bz2
target/i386: Destructive FP helpers for AVX
Perpare the horizontal atithmetic vector helpers for AVX These currently use a dummy Reg typed variable to store the result then assign the whole register. This will cause 128 bit operations to corrupt the upper half of the register, so replace it with explicit temporaries and element assignments. Signed-off-by: Paul Brook <paul@nowt.org> Message-Id: <20220424220204.2493824-18-paul@nowt.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/ops_sse.h101
1 files changed, 43 insertions, 58 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index c9737e1..c6dba95 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -22,7 +22,6 @@
#if SHIFT == 0
#define Reg MMXReg
-#define SIZE 8
#define XMM_ONLY(...)
#define B(n) MMX_B(n)
#define W(n) MMX_W(n)
@@ -31,7 +30,6 @@
#define SUFFIX _mmx
#else
#define Reg ZMMReg
-#define SIZE 16
#define XMM_ONLY(...) __VA_ARGS__
#define B(n) ZMM_B(n)
#define W(n) ZMM_W(n)
@@ -43,22 +41,6 @@
#define LANE_WIDTH (SHIFT ? 16 : 8)
#define PACK_WIDTH (LANE_WIDTH / 2)
-/*
- * Copy the relevant parts of a Reg value around. In the case where
- * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
- * a 64 byte ZMMReg, so we must copy only those and keep the top bytes
- * untouched in the guest-visible destination destination register.
- * Note that the "lower bytes" are placed last in memory on big-endian
- * hosts, which store the vector backwards in memory. In that case the
- * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of
- * the little-endian case.
- */
-#if HOST_BIG_ENDIAN
-#define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE)
-#else
-#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
-#endif
-
#if SHIFT == 0
#define FPSRL(x, c) ((x) >> shift)
#define FPSRAW(x, c) ((int16_t)(x) >> shift)
@@ -945,45 +927,49 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int index, int length)
d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
}
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
-}
+#define SSE_HELPER_HPS(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float32 r[2 << SHIFT]; \
+ int i, j, k; \
+ for (k = 0; k < 2 << SHIFT; k += LANE_WIDTH / 4) { \
+ for (i = j = 0; j < 4; i++, j += 2) { \
+ r[i + k] = F(v->ZMM_S(j + k), v->ZMM_S(j + k + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 4; i++, j += 2) { \
+ r[i + k] = F(s->ZMM_S(j + k), s->ZMM_S(j + k + 1), &env->sse_status); \
+ } \
+ } \
+ for (i = 0; i < 2 << SHIFT; i++) { \
+ d->ZMM_S(i) = r[i]; \
+ } \
+}
+
+SSE_HELPER_HPS(haddps, float32_add)
+SSE_HELPER_HPS(hsubps, float32_sub)
+
+#define SSE_HELPER_HPD(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float64 r[1 << SHIFT]; \
+ int i, j, k; \
+ for (k = 0; k < 1 << SHIFT; k += LANE_WIDTH / 8) { \
+ for (i = j = 0; j < 2; i++, j += 2) { \
+ r[i + k] = F(v->ZMM_D(j + k), v->ZMM_D(j + k + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 2; i++, j += 2) { \
+ r[i + k] = F(s->ZMM_D(j + k), s->ZMM_D(j + k + 1), &env->sse_status); \
+ } \
+ } \
+ for (i = 0; i < 1 << SHIFT; i++) { \
+ d->ZMM_D(i) = r[i]; \
+ } \
+}
+
+SSE_HELPER_HPD(haddpd, float64_add)
+SSE_HELPER_HPD(hsubpd, float64_sub)
void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
@@ -2331,4 +2317,3 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
#undef L
#undef Q
#undef SUFFIX
-#undef SIZE