From 2872b0f390c3fbd8f19f6b82da3dca15fa820118 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 19 Oct 2022 13:22:06 +0200 Subject: target/i386: implement FMA instructions The only issue with FMA instructions is that there are _a lot_ of them (30 opcodes, each of which comes in up to 4 versions depending on VEX.W and VEX.L; a total of 96 possibilities). However, they can be implement with only 6 helpers, two for scalar operations and four for packed operations. (Scalar versions do not do any merging; they only affect the bottom 32 or 64 bits of the output operand. Therefore, there is no separate XMM and YMM of the scalar helpers). First, we can reduce the number of helpers to one third by passing four operands (one output and three inputs); the reordering of which operands go to the multiply and which go to the add is done in emit.c. Second, the different instructions also dispatch to the same softfloat function, so the flags for float32_muladd and float64_muladd are passed in the helper as int arguments, with a little extra complication to handle FMADDSUB and FMSUBADD. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- tests/tcg/i386/test-avx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/tcg/i386') diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py index ebb1d99..d9ca00a 100755 --- a/tests/tcg/i386/test-avx.py +++ b/tests/tcg/i386/test-avx.py @@ -9,7 +9,7 @@ from fnmatch import fnmatch archs = [ "SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2", "AES", "AVX", "AVX2", "AES+AVX", "VAES+AVX", - "F16C", + "F16C", "FMA", ] ignore = set(["FISTTP", -- cgit v1.1