aboutsummaryrefslogtreecommitdiff
path: root/tests/tcg
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2022-10-19 13:22:06 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2022-10-20 15:16:18 +0200
commitcf5ec6641ed456e2748b211b7bbf5103bfc93098 (patch)
tree84e5a3c059e7de484c1c7d66714c68e8d60ca9e1 /tests/tcg
parent314d3eff66f41f39191aaca2e5f6e3dc81480c1b (diff)
downloadqemu-cf5ec6641ed456e2748b211b7bbf5103bfc93098.zip
qemu-cf5ec6641ed456e2748b211b7bbf5103bfc93098.tar.gz
qemu-cf5ec6641ed456e2748b211b7bbf5103bfc93098.tar.bz2
target/i386: implement F16C instructions
F16C only consists of two instructions, which are a bit peculiar nevertheless. First, they access only the low half of an YMM or XMM register for the packed-half operand; the exact size still depends on the VEX.L flag. This is similar to the existing avx_movx flag, but not exactly because avx_movx is hardcoded to affect operand 2. To this end I added a "ph" format name; it's possible to reuse this approach for the VPMOVSX and VPMOVZX instructions, though that would also require adding two more formats for the low-quarter and low-eighth of an operand. Second, VCVTPS2PH is somewhat weird because it *stores* the result of the instruction into memory rather than loading it. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'tests/tcg')
-rw-r--r--tests/tcg/i386/test-avx.c17
-rwxr-xr-xtests/tcg/i386/test-avx.py8
2 files changed, 23 insertions, 2 deletions
diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c
index 953e290..c39c0e5 100644
--- a/tests/tcg/i386/test-avx.c
+++ b/tests/tcg/i386/test-avx.c
@@ -28,6 +28,7 @@ typedef struct {
} TestDef;
reg_state initI;
+reg_state initF16;
reg_state initF32;
reg_state initF64;
@@ -221,6 +222,7 @@ static void run_all(void)
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
+uint16_t val_f16[] = { 0x4000, 0xbc00, 0x44cd, 0x3a66, 0x4200, 0x7a1a, 0x4780, 0x4826 };
float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
v4di val_i64[] = {
@@ -241,6 +243,12 @@ v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull,
v4di gather_mem[0x20];
+void init_f16reg(v4di *r)
+{
+ memset(r, 0, sizeof(*r));
+ memcpy(r, val_f16, sizeof(val_f16));
+}
+
void init_f32reg(v4di *r)
{
static int n;
@@ -315,6 +323,15 @@ int main(int argc, char *argv[])
printf("Int:\n");
dump_regs(&initI);
+ init_all(&initF16);
+ init_f16reg(&initF16.ymm[10]);
+ init_f16reg(&initF16.ymm[11]);
+ init_f16reg(&initF16.ymm[12]);
+ init_f16reg(&initF16.mem0[1]);
+ initF16.ff = 16;
+ printf("F16:\n");
+ dump_regs(&initF16);
+
init_all(&initF32);
init_f32reg(&initF32.ymm[10]);
init_f32reg(&initF32.ymm[11]);
diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py
index 0298232..ebb1d99 100755
--- a/tests/tcg/i386/test-avx.py
+++ b/tests/tcg/i386/test-avx.py
@@ -9,6 +9,7 @@ from fnmatch import fnmatch
archs = [
"SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2",
"AES", "AVX", "AVX2", "AES+AVX", "VAES+AVX",
+ "F16C",
]
ignore = set(["FISTTP",
@@ -19,6 +20,7 @@ imask = {
'vBLENDPS': 0x0f,
'CMP[PS][SD]': 0x07,
'VCMP[PS][SD]': 0x1f,
+ 'vCVTPS2PH': 0x7,
'vDPPD': 0x33,
'vDPPS': 0xff,
'vEXTRACTPS': 0x03,
@@ -221,8 +223,10 @@ def ArgGenerator(arg, op):
class InsnGenerator:
def __init__(self, op, args):
self.op = op
- if op[-2:] in ["PS", "PD", "SS", "SD"]:
- if op[-1] == 'S':
+ if op[-2:] in ["PH", "PS", "PD", "SS", "SD"]:
+ if op[-1] == 'H':
+ self.optype = 'F16'
+ elif op[-1] == 'S':
self.optype = 'F32'
else:
self.optype = 'F64'