aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/i386/ops_sse.h239
1 files changed, 108 insertions, 131 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 2c0090a..a4a0922 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -40,6 +40,8 @@
#define SUFFIX _xmm
#endif
+#define LANE_WIDTH (SHIFT ? 16 : 8)
+
/*
* Copy the relevant parts of a Reg value around. In the case where
* sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
@@ -56,198 +58,173 @@
#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
#endif
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT == 0
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
+
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) >>= shift;
- d->W(1) >>= shift;
- d->W(2) >>= shift;
- d->W(3) >>= shift;
-#if SHIFT == 1
- d->W(4) >>= shift;
- d->W(5) >>= shift;
- d->W(6) >>= shift;
- d->W(7) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRL(s->W(i), shift);
+ }
}
}
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- shift = 15;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSLL(s->W(i), shift);
+ }
}
- d->W(0) = (int16_t)d->W(0) >> shift;
- d->W(1) = (int16_t)d->W(1) >> shift;
- d->W(2) = (int16_t)d->W(2) >> shift;
- d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
- d->W(4) = (int16_t)d->W(4) >> shift;
- d->W(5) = (int16_t)d->W(5) >> shift;
- d->W(6) = (int16_t)d->W(6) >> shift;
- d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
}
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 15) {
+ shift = 15;
} else {
- shift = s->B(0);
- d->W(0) <<= shift;
- d->W(1) <<= shift;
- d->W(2) <<= shift;
- d->W(3) <<= shift;
-#if SHIFT == 1
- d->W(4) <<= shift;
- d->W(5) <<= shift;
- d->W(6) <<= shift;
- d->W(7) <<= shift;
-#endif
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRAW(s->W(i), shift);
}
}
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) >>= shift;
- d->L(1) >>= shift;
-#if SHIFT == 1
- d->L(2) >>= shift;
- d->L(3) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- shift = 31;
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSLL(s->L(i), shift);
+ }
}
- d->L(0) = (int32_t)d->L(0) >> shift;
- d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
- d->L(2) = (int32_t)d->L(2) >> shift;
- d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
}
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ shift = 31;
} else {
- shift = s->B(0);
- d->L(0) <<= shift;
- d->L(1) <<= shift;
-#if SHIFT == 1
- d->L(2) <<= shift;
- d->L(3) <<= shift;
-#endif
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRAL(s->L(i), shift);
}
}
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) >>= shift;
-#if SHIFT == 1
- d->Q(1) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSRL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) <<= shift;
-#if SHIFT == 1
- d->Q(1) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSLL(s->Q(i), shift);
+ }
}
}
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT >= 1
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift, i;
+ Reg *s = d;
+ int shift, i, j;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 0; i < 16 - shift; i++) {
- d->B(i) = d->B(i + shift);
- }
- for (i = 16 - shift; i < 16; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 0; i < 16 - shift; i++) {
+ d->B(j + i) = s->B(j + i + shift);
+ }
+ for (i = 16 - shift; i < 16; i++) {
+ d->B(j + i) = 0;
+ }
}
}
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
- int shift, i;
+ Reg *s = d;
+ int shift, i, j;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
- for (i = 15; i >= shift; i--) {
- d->B(i) = d->B(i - shift);
- }
- for (i = 0; i < shift; i++) {
- d->B(i) = 0;
+ for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
+ for (i = 15; i >= shift; i--) {
+ d->B(j + i) = s->B(j + i - shift);
+ }
+ for (i = 0; i < shift; i++) {
+ d->B(j + i) = 0;
+ }
}
}
#endif