aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlue Swirl <blauwirbel@gmail.com>2011-08-01 07:37:45 +0000
committerBlue Swirl <blauwirbel@gmail.com>2011-10-23 15:08:56 +0000
commit1bccec25e10e9073e21c2f71cd16a0b3ffe06c39 (patch)
treefb704ab9b4fe7ec64f233babf800ebc1807c63b9
parent99ca02195cad858043444107acc3ab1df874edea (diff)
downloadqemu-1bccec25e10e9073e21c2f71cd16a0b3ffe06c39.zip
qemu-1bccec25e10e9073e21c2f71cd16a0b3ffe06c39.tar.gz
qemu-1bccec25e10e9073e21c2f71cd16a0b3ffe06c39.tar.bz2
Sparc: split FPU and VIS op helpers
Move FPU op helpers to fop_helper.c. Move VIS op helpers to vis_helper.c, compile it only for Sparc64. Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
-rw-r--r--Makefile.target5
-rw-r--r--target-sparc/fop_helper.c394
-rw-r--r--target-sparc/op_helper.c743
-rw-r--r--target-sparc/vis_helper.c403
4 files changed, 800 insertions, 745 deletions
diff --git a/Makefile.target b/Makefile.target
index 26c99ca..fc277b3 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -74,10 +74,11 @@ libobj-y += op_helper.o helper.o
ifeq ($(TARGET_BASE_ARCH), i386)
libobj-y += cpuid.o
endif
+libobj-$(TARGET_SPARC64) += vis_helper.o
libobj-$(CONFIG_NEED_MMU) += mmu.o
libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
ifeq ($(TARGET_BASE_ARCH), sparc)
-libobj-y += cpu_init.o
+libobj-y += fop_helper.o cpu_init.o
endif
libobj-$(TARGET_SPARC) += int32_helper.o
libobj-$(TARGET_SPARC64) += int64_helper.o
@@ -96,7 +97,7 @@ tcg/tcg.o: cpu.h
# HELPER_CFLAGS is used for all the code compiled with static register
# variables
-op_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
+op_helper.o fop_helper.o vis_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
# Note: this is a workaround. The real fix is to avoid compiling
# cpu_signal_handler() in user-exec.c.
diff --git a/target-sparc/fop_helper.c b/target-sparc/fop_helper.c
new file mode 100644
index 0000000..ddd0af9
--- /dev/null
+++ b/target-sparc/fop_helper.c
@@ -0,0 +1,394 @@
+/*
+ * FPU op helpers
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+#define F_HELPER(name, p) void helper_f##name##p(void)
+
+#define F_BINOP(name) \
+ float32 helper_f ## name ## s (float32 src1, float32 src2) \
+ { \
+ return float32_ ## name (src1, src2, &env->fp_status); \
+ } \
+ F_HELPER(name, d) \
+ { \
+ DT0 = float64_ ## name (DT0, DT1, &env->fp_status); \
+ } \
+ F_HELPER(name, q) \
+ { \
+ QT0 = float128_ ## name (QT0, QT1, &env->fp_status); \
+ }
+
+F_BINOP(add);
+F_BINOP(sub);
+F_BINOP(mul);
+F_BINOP(div);
+#undef F_BINOP
+
+void helper_fsmuld(float32 src1, float32 src2)
+{
+ DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+ float32_to_float64(src2, &env->fp_status),
+ &env->fp_status);
+}
+
+void helper_fdmulq(void)
+{
+ QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
+ float64_to_float128(DT1, &env->fp_status),
+ &env->fp_status);
+}
+
+float32 helper_fnegs(float32 src)
+{
+ return float32_chs(src);
+}
+
+#ifdef TARGET_SPARC64
+F_HELPER(neg, d)
+{
+ DT0 = float64_chs(DT1);
+}
+
+F_HELPER(neg, q)
+{
+ QT0 = float128_chs(QT1);
+}
+#endif
+
+/* Integer to float conversion. */
+float32 helper_fitos(int32_t src)
+{
+ return int32_to_float32(src, &env->fp_status);
+}
+
+void helper_fitod(int32_t src)
+{
+ DT0 = int32_to_float64(src, &env->fp_status);
+}
+
+void helper_fitoq(int32_t src)
+{
+ QT0 = int32_to_float128(src, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+float32 helper_fxtos(void)
+{
+ return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, d)
+{
+ DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, q)
+{
+ QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+}
+#endif
+#undef F_HELPER
+
+/* floating point conversion */
+float32 helper_fdtos(void)
+{
+ return float64_to_float32(DT1, &env->fp_status);
+}
+
+void helper_fstod(float32 src)
+{
+ DT0 = float32_to_float64(src, &env->fp_status);
+}
+
+float32 helper_fqtos(void)
+{
+ return float128_to_float32(QT1, &env->fp_status);
+}
+
+void helper_fstoq(float32 src)
+{
+ QT0 = float32_to_float128(src, &env->fp_status);
+}
+
+void helper_fqtod(void)
+{
+ DT0 = float128_to_float64(QT1, &env->fp_status);
+}
+
+void helper_fdtoq(void)
+{
+ QT0 = float64_to_float128(DT1, &env->fp_status);
+}
+
+/* Float to integer conversion. */
+int32_t helper_fstoi(float32 src)
+{
+ return float32_to_int32_round_to_zero(src, &env->fp_status);
+}
+
+int32_t helper_fdtoi(void)
+{
+ return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+}
+
+int32_t helper_fqtoi(void)
+{
+ return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fstox(float32 src)
+{
+ *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+}
+
+void helper_fdtox(void)
+{
+ *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+}
+
+void helper_fqtox(void)
+{
+ *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+}
+#endif
+
+float32 helper_fabss(float32 src)
+{
+ return float32_abs(src);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fabsd(void)
+{
+ DT0 = float64_abs(DT1);
+}
+
+void helper_fabsq(void)
+{
+ QT0 = float128_abs(QT1);
+}
+#endif
+
+float32 helper_fsqrts(float32 src)
+{
+ return float32_sqrt(src, &env->fp_status);
+}
+
+void helper_fsqrtd(void)
+{
+ DT0 = float64_sqrt(DT1, &env->fp_status);
+}
+
+void helper_fsqrtq(void)
+{
+ QT0 = float128_sqrt(QT1, &env->fp_status);
+}
+
+#define GEN_FCMP(name, size, reg1, reg2, FS, E) \
+ void glue(helper_, name) (void) \
+ { \
+ env->fsr &= FSR_FTT_NMASK; \
+ if (E && (glue(size, _is_any_nan)(reg1) || \
+ glue(size, _is_any_nan)(reg2)) && \
+ (env->fsr & FSR_NVM)) { \
+ env->fsr |= FSR_NVC; \
+ env->fsr |= FSR_FTT_IEEE_EXCP; \
+ helper_raise_exception(env, TT_FP_EXCP); \
+ } \
+ switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) { \
+ case float_relation_unordered: \
+ if ((env->fsr & FSR_NVM)) { \
+ env->fsr |= FSR_NVC; \
+ env->fsr |= FSR_FTT_IEEE_EXCP; \
+ helper_raise_exception(env, TT_FP_EXCP); \
+ } else { \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS; \
+ env->fsr |= FSR_NVA; \
+ } \
+ break; \
+ case float_relation_less: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= FSR_FCC0 << FS; \
+ break; \
+ case float_relation_greater: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= FSR_FCC1 << FS; \
+ break; \
+ default: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ break; \
+ } \
+ }
+#define GEN_FCMPS(name, size, FS, E) \
+ void glue(helper_, name)(float32 src1, float32 src2) \
+ { \
+ env->fsr &= FSR_FTT_NMASK; \
+ if (E && (glue(size, _is_any_nan)(src1) || \
+ glue(size, _is_any_nan)(src2)) && \
+ (env->fsr & FSR_NVM)) { \
+ env->fsr |= FSR_NVC; \
+ env->fsr |= FSR_FTT_IEEE_EXCP; \
+ helper_raise_exception(env, TT_FP_EXCP); \
+ } \
+ switch (glue(size, _compare) (src1, src2, &env->fp_status)) { \
+ case float_relation_unordered: \
+ if ((env->fsr & FSR_NVM)) { \
+ env->fsr |= FSR_NVC; \
+ env->fsr |= FSR_FTT_IEEE_EXCP; \
+ helper_raise_exception(env, TT_FP_EXCP); \
+ } else { \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS; \
+ env->fsr |= FSR_NVA; \
+ } \
+ break; \
+ case float_relation_less: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= FSR_FCC0 << FS; \
+ break; \
+ case float_relation_greater: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ env->fsr |= FSR_FCC1 << FS; \
+ break; \
+ default: \
+ env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
+ break; \
+ } \
+ }
+
+GEN_FCMPS(fcmps, float32, 0, 0);
+GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
+
+GEN_FCMPS(fcmpes, float32, 0, 1);
+GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
+
+GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
+GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
+
+#ifdef TARGET_SPARC64
+GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
+GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
+
+GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
+GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
+
+GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
+GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
+
+GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
+GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
+
+GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
+GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
+
+GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
+GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
+GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
+#endif
+#undef GEN_FCMPS
+
+void helper_check_ieee_exceptions(void)
+{
+ target_ulong status;
+
+ status = get_float_exception_flags(&env->fp_status);
+ if (status) {
+ /* Copy IEEE 754 flags into FSR */
+ if (status & float_flag_invalid) {
+ env->fsr |= FSR_NVC;
+ }
+ if (status & float_flag_overflow) {
+ env->fsr |= FSR_OFC;
+ }
+ if (status & float_flag_underflow) {
+ env->fsr |= FSR_UFC;
+ }
+ if (status & float_flag_divbyzero) {
+ env->fsr |= FSR_DZC;
+ }
+ if (status & float_flag_inexact) {
+ env->fsr |= FSR_NXC;
+ }
+
+ if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
+ /* Unmasked exception, generate a trap */
+ env->fsr |= FSR_FTT_IEEE_EXCP;
+ helper_raise_exception(env, TT_FP_EXCP);
+ } else {
+ /* Accumulate exceptions */
+ env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
+ }
+ }
+}
+
+void helper_clear_float_exceptions(void)
+{
+ set_float_exception_flags(0, &env->fp_status);
+}
+
+static inline void set_fsr(void)
+{
+ int rnd_mode;
+
+ switch (env->fsr & FSR_RD_MASK) {
+ case FSR_RD_NEAREST:
+ rnd_mode = float_round_nearest_even;
+ break;
+ default:
+ case FSR_RD_ZERO:
+ rnd_mode = float_round_to_zero;
+ break;
+ case FSR_RD_POS:
+ rnd_mode = float_round_up;
+ break;
+ case FSR_RD_NEG:
+ rnd_mode = float_round_down;
+ break;
+ }
+ set_float_rounding_mode(rnd_mode, &env->fp_status);
+}
+
+void helper_ldfsr(uint32_t new_fsr)
+{
+ env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
+ set_fsr();
+}
+
+#ifdef TARGET_SPARC64
+void helper_ldxfsr(uint64_t new_fsr)
+{
+ env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
+ set_fsr();
+}
+#endif
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index f2bca77..fd20366 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -333,655 +333,6 @@ void helper_check_align(target_ulong addr, uint32_t align)
}
}
-#define F_HELPER(name, p) void helper_f##name##p(void)
-
-#define F_BINOP(name) \
- float32 helper_f ## name ## s (float32 src1, float32 src2) \
- { \
- return float32_ ## name (src1, src2, &env->fp_status); \
- } \
- F_HELPER(name, d) \
- { \
- DT0 = float64_ ## name (DT0, DT1, &env->fp_status); \
- } \
- F_HELPER(name, q) \
- { \
- QT0 = float128_ ## name (QT0, QT1, &env->fp_status); \
- }
-
-F_BINOP(add);
-F_BINOP(sub);
-F_BINOP(mul);
-F_BINOP(div);
-#undef F_BINOP
-
-void helper_fsmuld(float32 src1, float32 src2)
-{
- DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
- float32_to_float64(src2, &env->fp_status),
- &env->fp_status);
-}
-
-void helper_fdmulq(void)
-{
- QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
- float64_to_float128(DT1, &env->fp_status),
- &env->fp_status);
-}
-
-float32 helper_fnegs(float32 src)
-{
- return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-F_HELPER(neg, d)
-{
- DT0 = float64_chs(DT1);
-}
-
-F_HELPER(neg, q)
-{
- QT0 = float128_chs(QT1);
-}
-#endif
-
-/* Integer to float conversion. */
-float32 helper_fitos(int32_t src)
-{
- return int32_to_float32(src, &env->fp_status);
-}
-
-void helper_fitod(int32_t src)
-{
- DT0 = int32_to_float64(src, &env->fp_status);
-}
-
-void helper_fitoq(int32_t src)
-{
- QT0 = int32_to_float128(src, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-float32 helper_fxtos(void)
-{
- return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, d)
-{
- DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, q)
-{
- QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
-}
-#endif
-#undef F_HELPER
-
-/* floating point conversion */
-float32 helper_fdtos(void)
-{
- return float64_to_float32(DT1, &env->fp_status);
-}
-
-void helper_fstod(float32 src)
-{
- DT0 = float32_to_float64(src, &env->fp_status);
-}
-
-float32 helper_fqtos(void)
-{
- return float128_to_float32(QT1, &env->fp_status);
-}
-
-void helper_fstoq(float32 src)
-{
- QT0 = float32_to_float128(src, &env->fp_status);
-}
-
-void helper_fqtod(void)
-{
- DT0 = float128_to_float64(QT1, &env->fp_status);
-}
-
-void helper_fdtoq(void)
-{
- QT0 = float64_to_float128(DT1, &env->fp_status);
-}
-
-/* Float to integer conversion. */
-int32_t helper_fstoi(float32 src)
-{
- return float32_to_int32_round_to_zero(src, &env->fp_status);
-}
-
-int32_t helper_fdtoi(void)
-{
- return float64_to_int32_round_to_zero(DT1, &env->fp_status);
-}
-
-int32_t helper_fqtoi(void)
-{
- return float128_to_int32_round_to_zero(QT1, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fstox(float32 src)
-{
- *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
-}
-
-void helper_fdtox(void)
-{
- *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
-}
-
-void helper_fqtox(void)
-{
- *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
-}
-
-void helper_faligndata(void)
-{
- uint64_t tmp;
-
- tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
- /* on many architectures a shift of 64 does nothing */
- if ((env->gsr & 7) != 0) {
- tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
- }
- *((uint64_t *)&DT0) = tmp;
-}
-
-#ifdef HOST_WORDS_BIGENDIAN
-#define VIS_B64(n) b[7 - (n)]
-#define VIS_W64(n) w[3 - (n)]
-#define VIS_SW64(n) sw[3 - (n)]
-#define VIS_L64(n) l[1 - (n)]
-#define VIS_B32(n) b[3 - (n)]
-#define VIS_W32(n) w[1 - (n)]
-#else
-#define VIS_B64(n) b[n]
-#define VIS_W64(n) w[n]
-#define VIS_SW64(n) sw[n]
-#define VIS_L64(n) l[n]
-#define VIS_B32(n) b[n]
-#define VIS_W32(n) w[n]
-#endif
-
-typedef union {
- uint8_t b[8];
- uint16_t w[4];
- int16_t sw[4];
- uint32_t l[2];
- uint64_t ll;
- float64 d;
-} VIS64;
-
-typedef union {
- uint8_t b[4];
- uint16_t w[2];
- uint32_t l;
- float32 f;
-} VIS32;
-
-void helper_fpmerge(void)
-{
- VIS64 s, d;
-
- s.d = DT0;
- d.d = DT1;
-
- /* Reverse calculation order to handle overlap */
- d.VIS_B64(7) = s.VIS_B64(3);
- d.VIS_B64(6) = d.VIS_B64(3);
- d.VIS_B64(5) = s.VIS_B64(2);
- d.VIS_B64(4) = d.VIS_B64(2);
- d.VIS_B64(3) = s.VIS_B64(1);
- d.VIS_B64(2) = d.VIS_B64(1);
- d.VIS_B64(1) = s.VIS_B64(0);
- /* d.VIS_B64(0) = d.VIS_B64(0); */
-
- DT0 = d.d;
-}
-
-void helper_fmul8x16(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmul8x16al(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmul8x16au(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmul8sux16(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmul8ulx16(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmuld8sux16(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_L64(r) = tmp;
-
- /* Reverse calculation order to handle overlap */
- PMUL(1);
- PMUL(0);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fmuld8ulx16(void)
-{
- VIS64 s, d;
- uint32_t tmp;
-
- s.d = DT0;
- d.d = DT1;
-
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_L64(r) = tmp;
-
- /* Reverse calculation order to handle overlap */
- PMUL(1);
- PMUL(0);
-#undef PMUL
-
- DT0 = d.d;
-}
-
-void helper_fexpand(void)
-{
- VIS32 s;
- VIS64 d;
-
- s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
- d.d = DT1;
- d.VIS_W64(0) = s.VIS_B32(0) << 4;
- d.VIS_W64(1) = s.VIS_B32(1) << 4;
- d.VIS_W64(2) = s.VIS_B32(2) << 4;
- d.VIS_W64(3) = s.VIS_B32(3) << 4;
-
- DT0 = d.d;
-}
-
-#define VIS_HELPER(name, F) \
- void name##16(void) \
- { \
- VIS64 s, d; \
- \
- s.d = DT0; \
- d.d = DT1; \
- \
- d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \
- d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \
- d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \
- d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \
- \
- DT0 = d.d; \
- } \
- \
- uint32_t name##16s(uint32_t src1, uint32_t src2) \
- { \
- VIS32 s, d; \
- \
- s.l = src1; \
- d.l = src2; \
- \
- d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \
- d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \
- \
- return d.l; \
- } \
- \
- void name##32(void) \
- { \
- VIS64 s, d; \
- \
- s.d = DT0; \
- d.d = DT1; \
- \
- d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \
- d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \
- \
- DT0 = d.d; \
- } \
- \
- uint32_t name##32s(uint32_t src1, uint32_t src2) \
- { \
- VIS32 s, d; \
- \
- s.l = src1; \
- d.l = src2; \
- \
- d.l = F(d.l, s.l); \
- \
- return d.l; \
- }
-
-#define FADD(a, b) ((a) + (b))
-#define FSUB(a, b) ((a) - (b))
-VIS_HELPER(helper_fpadd, FADD)
-VIS_HELPER(helper_fpsub, FSUB)
-
-#define VIS_CMPHELPER(name, F) \
- uint64_t name##16(void) \
- { \
- VIS64 s, d; \
- \
- s.d = DT0; \
- d.d = DT1; \
- \
- d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
- d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
- \
- return d.ll; \
- } \
- \
- uint64_t name##32(void) \
- { \
- VIS64 s, d; \
- \
- s.d = DT0; \
- d.d = DT1; \
- \
- d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
- d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
- d.VIS_L64(1) = 0; \
- \
- return d.ll; \
- }
-
-#define FCMPGT(a, b) ((a) > (b))
-#define FCMPEQ(a, b) ((a) == (b))
-#define FCMPLE(a, b) ((a) <= (b))
-#define FCMPNE(a, b) ((a) != (b))
-
-VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
-VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
-VIS_CMPHELPER(helper_fcmple, FCMPLE)
-VIS_CMPHELPER(helper_fcmpne, FCMPNE)
-#endif
-
-void helper_check_ieee_exceptions(void)
-{
- target_ulong status;
-
- status = get_float_exception_flags(&env->fp_status);
- if (status) {
- /* Copy IEEE 754 flags into FSR */
- if (status & float_flag_invalid) {
- env->fsr |= FSR_NVC;
- }
- if (status & float_flag_overflow) {
- env->fsr |= FSR_OFC;
- }
- if (status & float_flag_underflow) {
- env->fsr |= FSR_UFC;
- }
- if (status & float_flag_divbyzero) {
- env->fsr |= FSR_DZC;
- }
- if (status & float_flag_inexact) {
- env->fsr |= FSR_NXC;
- }
-
- if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
- /* Unmasked exception, generate a trap */
- env->fsr |= FSR_FTT_IEEE_EXCP;
- helper_raise_exception(env, TT_FP_EXCP);
- } else {
- /* Accumulate exceptions */
- env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
- }
- }
-}
-
-void helper_clear_float_exceptions(void)
-{
- set_float_exception_flags(0, &env->fp_status);
-}
-
-float32 helper_fabss(float32 src)
-{
- return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fabsd(void)
-{
- DT0 = float64_abs(DT1);
-}
-
-void helper_fabsq(void)
-{
- QT0 = float128_abs(QT1);
-}
-#endif
-
-float32 helper_fsqrts(float32 src)
-{
- return float32_sqrt(src, &env->fp_status);
-}
-
-void helper_fsqrtd(void)
-{
- DT0 = float64_sqrt(DT1, &env->fp_status);
-}
-
-void helper_fsqrtq(void)
-{
- QT0 = float128_sqrt(QT1, &env->fp_status);
-}
-
-#define GEN_FCMP(name, size, reg1, reg2, FS, E) \
- void glue(helper_, name) (void) \
- { \
- env->fsr &= FSR_FTT_NMASK; \
- if (E && (glue(size, _is_any_nan)(reg1) || \
- glue(size, _is_any_nan)(reg2)) && \
- (env->fsr & FSR_NVM)) { \
- env->fsr |= FSR_NVC; \
- env->fsr |= FSR_FTT_IEEE_EXCP; \
- helper_raise_exception(env, TT_FP_EXCP); \
- } \
- switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) { \
- case float_relation_unordered: \
- if ((env->fsr & FSR_NVM)) { \
- env->fsr |= FSR_NVC; \
- env->fsr |= FSR_FTT_IEEE_EXCP; \
- helper_raise_exception(env, TT_FP_EXCP); \
- } else { \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS; \
- env->fsr |= FSR_NVA; \
- } \
- break; \
- case float_relation_less: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= FSR_FCC0 << FS; \
- break; \
- case float_relation_greater: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= FSR_FCC1 << FS; \
- break; \
- default: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- break; \
- } \
- }
-#define GEN_FCMPS(name, size, FS, E) \
- void glue(helper_, name)(float32 src1, float32 src2) \
- { \
- env->fsr &= FSR_FTT_NMASK; \
- if (E && (glue(size, _is_any_nan)(src1) || \
- glue(size, _is_any_nan)(src2)) && \
- (env->fsr & FSR_NVM)) { \
- env->fsr |= FSR_NVC; \
- env->fsr |= FSR_FTT_IEEE_EXCP; \
- helper_raise_exception(env, TT_FP_EXCP); \
- } \
- switch (glue(size, _compare) (src1, src2, &env->fp_status)) { \
- case float_relation_unordered: \
- if ((env->fsr & FSR_NVM)) { \
- env->fsr |= FSR_NVC; \
- env->fsr |= FSR_FTT_IEEE_EXCP; \
- helper_raise_exception(env, TT_FP_EXCP); \
- } else { \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS; \
- env->fsr |= FSR_NVA; \
- } \
- break; \
- case float_relation_less: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= FSR_FCC0 << FS; \
- break; \
- case float_relation_greater: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- env->fsr |= FSR_FCC1 << FS; \
- break; \
- default: \
- env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS); \
- break; \
- } \
- }
-
-GEN_FCMPS(fcmps, float32, 0, 0);
-GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
-
-GEN_FCMPS(fcmpes, float32, 0, 1);
-GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
-
-GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
-GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
-
static uint32_t compute_all_flags(void)
{
return env->psr & PSR_ICC;
@@ -1580,33 +931,6 @@ int cpu_cwp_dec(CPUState *env1, int cwp)
return ret;
}
-#ifdef TARGET_SPARC64
-GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
-GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
-
-GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
-GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
-
-GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
-GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
-
-GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
-GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
-
-GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
-GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
-
-GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
-GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
-#endif
-#undef GEN_FCMPS
-
#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) && \
defined(DEBUG_MXCC)
static void dump_mxcc(CPUState *env)
@@ -3748,42 +3072,6 @@ void helper_stqf(target_ulong addr, int mem_idx)
#endif
}
-static inline void set_fsr(void)
-{
- int rnd_mode;
-
- switch (env->fsr & FSR_RD_MASK) {
- case FSR_RD_NEAREST:
- rnd_mode = float_round_nearest_even;
- break;
- default:
- case FSR_RD_ZERO:
- rnd_mode = float_round_to_zero;
- break;
- case FSR_RD_POS:
- rnd_mode = float_round_up;
- break;
- case FSR_RD_NEG:
- rnd_mode = float_round_down;
- break;
- }
- set_float_rounding_mode(rnd_mode, &env->fp_status);
-}
-
-void helper_ldfsr(uint32_t new_fsr)
-{
- env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
- set_fsr();
-}
-
-#ifdef TARGET_SPARC64
-void helper_ldxfsr(uint64_t new_fsr)
-{
- env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
- set_fsr();
-}
-#endif
-
#ifndef TARGET_SPARC64
/* XXX: use another pointer for %iN registers to avoid slow wrapping
handling ? */
@@ -3993,37 +3281,6 @@ void helper_wrcwp(target_ulong new_cwp)
put_cwp64(new_cwp);
}
-/* This function uses non-native bit order */
-#define GET_FIELD(X, FROM, TO) \
- ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
-
-/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
-#define GET_FIELD_SP(X, FROM, TO) \
- GET_FIELD(X, 63 - (TO), 63 - (FROM))
-
-target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
-{
- return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
- (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
- (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
- (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
- (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
- (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
- (((pixel_addr >> 55) & 1) << 4) |
- (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
- GET_FIELD_SP(pixel_addr, 11, 12);
-}
-
-target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
-{
- uint64_t tmp;
-
- tmp = addr + offset;
- env->gsr &= ~7ULL;
- env->gsr |= tmp & 7ULL;
- return tmp & ~7ULL;
-}
-
static inline uint64_t *get_gregset(uint32_t pstate)
{
switch (pstate) {
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
new file mode 100644
index 0000000..87a86ef
--- /dev/null
+++ b/target-sparc/vis_helper.c
@@ -0,0 +1,403 @@
+/*
+ * VIS op helpers
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+/* This function uses non-native bit order */
+#define GET_FIELD(X, FROM, TO) \
+ ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
+
+/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
+#define GET_FIELD_SP(X, FROM, TO) \
+ GET_FIELD(X, 63 - (TO), 63 - (FROM))
+
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+{
+ return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
+ (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
+ (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
+ (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
+ (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
+ (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
+ (((pixel_addr >> 55) & 1) << 4) |
+ (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
+ GET_FIELD_SP(pixel_addr, 11, 12);
+}
+
+target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
+{
+ uint64_t tmp;
+
+ tmp = addr + offset;
+ env->gsr &= ~7ULL;
+ env->gsr |= tmp & 7ULL;
+ return tmp & ~7ULL;
+}
+
+void helper_faligndata(void)
+{
+ uint64_t tmp;
+
+ tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
+ /* on many architectures a shift of 64 does nothing */
+ if ((env->gsr & 7) != 0) {
+ tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
+ }
+ *((uint64_t *)&DT0) = tmp;
+}
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define VIS_B64(n) b[7 - (n)]
+#define VIS_W64(n) w[3 - (n)]
+#define VIS_SW64(n) sw[3 - (n)]
+#define VIS_L64(n) l[1 - (n)]
+#define VIS_B32(n) b[3 - (n)]
+#define VIS_W32(n) w[1 - (n)]
+#else
+#define VIS_B64(n) b[n]
+#define VIS_W64(n) w[n]
+#define VIS_SW64(n) sw[n]
+#define VIS_L64(n) l[n]
+#define VIS_B32(n) b[n]
+#define VIS_W32(n) w[n]
+#endif
+
+typedef union {
+ uint8_t b[8];
+ uint16_t w[4];
+ int16_t sw[4];
+ uint32_t l[2];
+ uint64_t ll;
+ float64 d;
+} VIS64;
+
+typedef union {
+ uint8_t b[4];
+ uint16_t w[2];
+ uint32_t l;
+ float32 f;
+} VIS32;
+
+void helper_fpmerge(void)
+{
+ VIS64 s, d;
+
+ s.d = DT0;
+ d.d = DT1;
+
+ /* Reverse calculation order to handle overlap */
+ d.VIS_B64(7) = s.VIS_B64(3);
+ d.VIS_B64(6) = d.VIS_B64(3);
+ d.VIS_B64(5) = s.VIS_B64(2);
+ d.VIS_B64(4) = d.VIS_B64(2);
+ d.VIS_B64(3) = s.VIS_B64(1);
+ d.VIS_B64(2) = d.VIS_B64(1);
+ d.VIS_B64(1) = s.VIS_B64(0);
+ /* d.VIS_B64(0) = d.VIS_B64(0); */
+
+ DT0 = d.d;
+}
+
+void helper_fmul8x16(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_W64(r) = tmp >> 8;
+
+ PMUL(0);
+ PMUL(1);
+ PMUL(2);
+ PMUL(3);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmul8x16al(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_W64(r) = tmp >> 8;
+
+ PMUL(0);
+ PMUL(1);
+ PMUL(2);
+ PMUL(3);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmul8x16au(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_W64(r) = tmp >> 8;
+
+ PMUL(0);
+ PMUL(1);
+ PMUL(2);
+ PMUL(3);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmul8sux16(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_W64(r) = tmp >> 8;
+
+ PMUL(0);
+ PMUL(1);
+ PMUL(2);
+ PMUL(3);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmul8ulx16(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_W64(r) = tmp >> 8;
+
+ PMUL(0);
+ PMUL(1);
+ PMUL(2);
+ PMUL(3);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmuld8sux16(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_L64(r) = tmp;
+
+ /* Reverse calculation order to handle overlap */
+ PMUL(1);
+ PMUL(0);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fmuld8ulx16(void)
+{
+ VIS64 s, d;
+ uint32_t tmp;
+
+ s.d = DT0;
+ d.d = DT1;
+
+#define PMUL(r) \
+ tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
+ if ((tmp & 0xff) > 0x7f) { \
+ tmp += 0x100; \
+ } \
+ d.VIS_L64(r) = tmp;
+
+ /* Reverse calculation order to handle overlap */
+ PMUL(1);
+ PMUL(0);
+#undef PMUL
+
+ DT0 = d.d;
+}
+
+void helper_fexpand(void)
+{
+ VIS32 s;
+ VIS64 d;
+
+ s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
+ d.d = DT1;
+ d.VIS_W64(0) = s.VIS_B32(0) << 4;
+ d.VIS_W64(1) = s.VIS_B32(1) << 4;
+ d.VIS_W64(2) = s.VIS_B32(2) << 4;
+ d.VIS_W64(3) = s.VIS_B32(3) << 4;
+
+ DT0 = d.d;
+}
+
+#define VIS_HELPER(name, F) \
+ void name##16(void) \
+ { \
+ VIS64 s, d; \
+ \
+ s.d = DT0; \
+ d.d = DT1; \
+ \
+ d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \
+ d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \
+ d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \
+ d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \
+ \
+ DT0 = d.d; \
+ } \
+ \
+ uint32_t name##16s(uint32_t src1, uint32_t src2) \
+ { \
+ VIS32 s, d; \
+ \
+ s.l = src1; \
+ d.l = src2; \
+ \
+ d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \
+ d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \
+ \
+ return d.l; \
+ } \
+ \
+ void name##32(void) \
+ { \
+ VIS64 s, d; \
+ \
+ s.d = DT0; \
+ d.d = DT1; \
+ \
+ d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \
+ d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \
+ \
+ DT0 = d.d; \
+ } \
+ \
+ uint32_t name##32s(uint32_t src1, uint32_t src2) \
+ { \
+ VIS32 s, d; \
+ \
+ s.l = src1; \
+ d.l = src2; \
+ \
+ d.l = F(d.l, s.l); \
+ \
+ return d.l; \
+ }
+
+#define FADD(a, b) ((a) + (b))
+#define FSUB(a, b) ((a) - (b))
+VIS_HELPER(helper_fpadd, FADD)
+VIS_HELPER(helper_fpsub, FSUB)
+
+#define VIS_CMPHELPER(name, F) \
+ uint64_t name##16(void) \
+ { \
+ VIS64 s, d; \
+ \
+ s.d = DT0; \
+ d.d = DT1; \
+ \
+ d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
+ d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
+ d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
+ d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
+ d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
+ \
+ return d.ll; \
+ } \
+ \
+ uint64_t name##32(void) \
+ { \
+ VIS64 s, d; \
+ \
+ s.d = DT0; \
+ d.d = DT1; \
+ \
+ d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
+ d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
+ d.VIS_L64(1) = 0; \
+ \
+ return d.ll; \
+ }
+
+#define FCMPGT(a, b) ((a) > (b))
+#define FCMPEQ(a, b) ((a) == (b))
+#define FCMPLE(a, b) ((a) <= (b))
+#define FCMPNE(a, b) ((a) != (b))
+
+VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
+VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
+VIS_CMPHELPER(helper_fcmple, FCMPLE)
+VIS_CMPHELPER(helper_fcmpne, FCMPNE)