aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Brook <paul@nowt.org>2022-04-24 23:01:32 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2022-09-01 20:16:33 +0200
commit18592d2ec2dddff3b08568d29aa82d96e0369b88 (patch)
tree7336078f27fb2c78a84dad4139c5c9ba400c833f
parent25bdec79c629b49fbcf134f9eca063aaba6d4094 (diff)
downloadqemu-18592d2ec2dddff3b08568d29aa82d96e0369b88.zip
qemu-18592d2ec2dddff3b08568d29aa82d96e0369b88.tar.gz
qemu-18592d2ec2dddff3b08568d29aa82d96e0369b88.tar.bz2
target/i386: Rewrite vector shift helper
Rewrite the vector shift helpers in preperation for AVX support (3 operand form and 256 bit vectors). For now keep the existing two operand interface. No functional changes to existing helpers. Signed-off-by: Paul Brook <paul@nowt.org> Message-Id: <20220424220204.2493824-11-paul@nowt.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/ops_sse.h239
1 files changed, 108 insertions, 131 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 2c0090a..a4a0922 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -40,6 +40,8 @@
#define SUFFIX _xmm
#endif
+#define LANE_WIDTH (SHIFT ? 16 : 8)
+
/*
* Copy the relevant parts of a Reg value around. In the case where
* sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
@@ -56,198 +58,173 @@
#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
#endif
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT == 0
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
+
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) >>= shift;
- d->W(1) >>= shift;
- d->W(2) >>= shift;
- d->W(3) >>= shift;
-#if SHIFT == 1
- d->W(4) >>= shift;
- d->W(5) >>= shift;
- d->W(6) >>= shift;
- d->W(7) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRL(s->W(i), shift);
+ }
}
}
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- shift = 15;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSLL(s->W(i), shift);
+ }
}
- d->W(0) = (int16_t)d->W(0) >> shift;
- d->W(1) = (int16_t)d->W(1) >> shift;
- d->W(2) = (int16_t)d->W(2) >> shift;
- d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
- d->W(4) = (int16_t)d->W(4) >> shift;
- d->W(5) = (int16_t)d->W(5) >> shift;
- d->W(6) = (int16_t)d->W(6) >> shift;
- d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
}
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 15) {
+ shift = 15;
} else {
- shift = s->B(0);
- d->W(0) <<= shift;
- d->W(1) <<= shift;
- d->W(2) <<= shift;
- d->W(3) <<= shift;
-#if SHIFT == 1
- d->W(4) <<= shift;
- d->W(5) <<= shift;
- d->W(6) <<= shift;
- d->W(7) <<= shift;
-#endif
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRAW(s->W(i), shift);
}
}
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) >>= shift;
- d->L(1) >>= shift;
-#if SHIFT == 1
- d->L(2) >>= shift;
- d->L(3) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- shift = 31;
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSLL(s->L(i), shift);
+ }
}
- d->L(0) = (int32_t)d->L(0) >> shift;
- d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
- d->L(2) = (int32_t)d->L(2) >> shift;
- d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
}
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ shift = 31;
} else {
- shift = s->B(0);
- d->L(0) <<= shift;
- d->L(1) <<= shift;
-#if SHIFT == 1
- d->L(2) <<= shift;
- d->L(3) <<= shift;
-#endif
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRAL(s->L(i), shift);
}
}
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) >>= shift;
-#if SHIFT == 1
- d->Q(1) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSRL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) <<= shift;
-#if SHIFT == 1
- d->Q(1) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSLL(s->Q(i), shift);
+ }
}
}
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT >= 1
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift, i;
+ Reg *s = d;
+ int shift, i, j;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 0; i < 16 - shift; i++) {
- d->B(i) = d->B(i + shift);
- }
- for (i = 16 - shift; i < 16; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 0; i < 16 - shift; i++) {
+ d->B(j + i) = s->B(j + i + shift);
+ }
+ for (i = 16 - shift; i < 16; i++) {
+ d->B(j + i) = 0;
+ }
}
}
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift, i;
+ Reg *s = d;
+ int shift, i, j;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 15; i >= shift; i--) {
- d->B(i) = d->B(i - shift);
- }
- for (i = 0; i < shift; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 15; i >= shift; i--) {
+ d->B(j + i) = s->B(j + i - shift);
+ }
+ for (i = 0; i < shift; i++) {
+ d->B(j + i) = 0;
+ }
}
}
#endif