aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2017-08-08 20:21:01 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2017-09-19 14:09:10 +0200
commitc6a56c8e990b213a1638af2d34352771d5fa4d9c (patch)
tree0ed40fe117bff541286343fb2f96097e8fdb866c
parent11e06ce1ed28fd0ffcbc1e2436b72f3412b4ecc8 (diff)
downloadqemu-c6a56c8e990b213a1638af2d34352771d5fa4d9c.zip
qemu-c6a56c8e990b213a1638af2d34352771d5fa4d9c.tar.gz
qemu-c6a56c8e990b213a1638af2d34352771d5fa4d9c.tar.bz2
target/i386: fix pmovsx/pmovzx in-place operations
The SSE4.1 pmovsx* and pmovzx* instructions take packed 1-byte, 2-byte or 4-byte inputs and sign-extend or zero-extend them to a wider vector output. The associated helpers for these instructions do the extension on each element in turn, starting with the lowest. If the input and output are the same register, this means that all the input elements after the first have been overwritten before they are read. This patch makes the helpers extend starting with the highest element, not the lowest, to avoid such overwriting. This fixes many GCC test failures (161 in the gcc testsuite in my GCC 6-based testing) when testing with a default CPU setting enabling those instructions. Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.20.1708082018390.23380@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/ops_sse.h14
1 files changed, 7 insertions, 7 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 16509d0..d578216 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1617,18 +1617,18 @@ void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
#define SSE_HELPER_F(name, elem, num, F) \
void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
{ \
- d->elem(0) = F(0); \
- d->elem(1) = F(1); \
if (num > 2) { \
- d->elem(2) = F(2); \
- d->elem(3) = F(3); \
if (num > 4) { \
- d->elem(4) = F(4); \
- d->elem(5) = F(5); \
- d->elem(6) = F(6); \
d->elem(7) = F(7); \
+ d->elem(6) = F(6); \
+ d->elem(5) = F(5); \
+ d->elem(4) = F(4); \
} \
+ d->elem(3) = F(3); \
+ d->elem(2) = F(2); \
} \
+ d->elem(1) = F(1); \
+ d->elem(0) = F(0); \
}
SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)