Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20150521' into staging

Rewrite fp exceptions # gpg: Signature made Thu May 21 18:35:52 2015 BST using RSA key ID 4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" * remotes/rth/tags/pull-axp-20150521: target-alpha: Add vector implementation for CMPBGE target-alpha: Rewrite helper_zapnot target-alpha: Raise IOV from CVTQL target-alpha: Suppress underflow from CVTTQ if DNZ target-alpha: Raise EXC_M_INV properly for fp inputs target-alpha: Disallow literal operand to 1C.30 to 1C.37 target-alpha: Implement WH64EN target-alpha: Fix integer overflow checking insns target-alpha: Fix cvttq vs inf target-alpha: Fix cvttq vs large integers target-alpha: Raise IOV from CVTTQ target-alpha: Set EXC_M_SWC for exceptions from /S insns target-alpha: Set fpcr_exc_status even for disabled exceptions target-alpha: Tidy FPCR representation target-alpha: Set PC correctly for floating-point exceptions target-alpha: Forget installed round mode after MT_FPCR target-alpha: Rename floating-point subroutines target-alpha: Move VAX helpers to a new file Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2015-05-22 10:06:33 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2015-05-22 10:06:33 +0100
commit: 27e1259a69c49ee2dd53385f4ca4ca14b822191d (patch)
tree: 3b6d8b7c48c3d1afa7e6ce6ae29bd0c5b01cdf9c /target-alpha
parent: 9e549d36e989b14423279fb991b71728a2a4ae7c (diff)
parent: 32ad48abd74a997220b841e4e913edeb267aa362 (diff)
download: qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.zip
qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.tar.gz
qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.tar.bz2
9 files changed, 752 insertions, 774 deletions
diff --git a/target-alpha/Makefile.objs b/target-alpha/Makefile.objs
index b96c5da..6366462 100644
--- a/target-alpha/Makefile.objs
+++ b/target-alpha/Makefile.objs
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-y += translate.o helper.o cpu.o
-obj-y += int_helper.o fpu_helper.o sys_helper.o mem_helper.o
+obj-y += int_helper.o fpu_helper.o vax_helper.o sys_helper.o mem_helper.o
 obj-y += gdbstub.o
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 9538f19..2a4d5cb 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -150,54 +150,54 @@ enum {
     FP_ROUND_DYNAMIC = 0x3,
 };
 
-/* FPCR bits */
-#define FPCR_SUM		(1ULL << 63)
-#define FPCR_INED		(1ULL << 62)
-#define FPCR_UNFD		(1ULL << 61)
-#define FPCR_UNDZ		(1ULL << 60)
-#define FPCR_DYN_SHIFT		58
-#define FPCR_DYN_CHOPPED	(0ULL << FPCR_DYN_SHIFT)
-#define FPCR_DYN_MINUS		(1ULL << FPCR_DYN_SHIFT)
-#define FPCR_DYN_NORMAL		(2ULL << FPCR_DYN_SHIFT)
-#define FPCR_DYN_PLUS		(3ULL << FPCR_DYN_SHIFT)
-#define FPCR_DYN_MASK		(3ULL << FPCR_DYN_SHIFT)
-#define FPCR_IOV		(1ULL << 57)
-#define FPCR_INE		(1ULL << 56)
-#define FPCR_UNF		(1ULL << 55)
-#define FPCR_OVF		(1ULL << 54)
-#define FPCR_DZE		(1ULL << 53)
-#define FPCR_INV		(1ULL << 52)
-#define FPCR_OVFD		(1ULL << 51)
-#define FPCR_DZED		(1ULL << 50)
-#define FPCR_INVD		(1ULL << 49)
-#define FPCR_DNZ		(1ULL << 48)
-#define FPCR_DNOD		(1ULL << 47)
-#define FPCR_STATUS_MASK	(FPCR_IOV | FPCR_INE | FPCR_UNF \
-				 | FPCR_OVF | FPCR_DZE | FPCR_INV)
+/* FPCR bits -- right-shifted 32 so we can use a uint32_t.  */
+#define FPCR_SUM                (1U << (63 - 32))
+#define FPCR_INED               (1U << (62 - 32))
+#define FPCR_UNFD               (1U << (61 - 32))
+#define FPCR_UNDZ               (1U << (60 - 32))
+#define FPCR_DYN_SHIFT          (58 - 32)
+#define FPCR_DYN_CHOPPED        (0U << FPCR_DYN_SHIFT)
+#define FPCR_DYN_MINUS          (1U << FPCR_DYN_SHIFT)
+#define FPCR_DYN_NORMAL         (2U << FPCR_DYN_SHIFT)
+#define FPCR_DYN_PLUS           (3U << FPCR_DYN_SHIFT)
+#define FPCR_DYN_MASK           (3U << FPCR_DYN_SHIFT)
+#define FPCR_IOV                (1U << (57 - 32))
+#define FPCR_INE                (1U << (56 - 32))
+#define FPCR_UNF                (1U << (55 - 32))
+#define FPCR_OVF                (1U << (54 - 32))
+#define FPCR_DZE                (1U << (53 - 32))
+#define FPCR_INV                (1U << (52 - 32))
+#define FPCR_OVFD               (1U << (51 - 32))
+#define FPCR_DZED               (1U << (50 - 32))
+#define FPCR_INVD               (1U << (49 - 32))
+#define FPCR_DNZ                (1U << (48 - 32))
+#define FPCR_DNOD               (1U << (47 - 32))
+#define FPCR_STATUS_MASK        (FPCR_IOV | FPCR_INE | FPCR_UNF \
+                                 | FPCR_OVF | FPCR_DZE | FPCR_INV)
 
 /* The silly software trap enables implemented by the kernel emulation.
    These are more or less architecturally required, since the real hardware
    has read-as-zero bits in the FPCR when the features aren't implemented.
    For the purposes of QEMU, we pretend the FPCR can hold everything.  */
-#define SWCR_TRAP_ENABLE_INV	(1ULL << 1)
-#define SWCR_TRAP_ENABLE_DZE	(1ULL << 2)
-#define SWCR_TRAP_ENABLE_OVF	(1ULL << 3)
-#define SWCR_TRAP_ENABLE_UNF	(1ULL << 4)
-#define SWCR_TRAP_ENABLE_INE	(1ULL << 5)
-#define SWCR_TRAP_ENABLE_DNO	(1ULL << 6)
-#define SWCR_TRAP_ENABLE_MASK	((1ULL << 7) - (1ULL << 1))
-
-#define SWCR_MAP_DMZ		(1ULL << 12)
-#define SWCR_MAP_UMZ		(1ULL << 13)
-#define SWCR_MAP_MASK		(SWCR_MAP_DMZ | SWCR_MAP_UMZ)
-
-#define SWCR_STATUS_INV		(1ULL << 17)
-#define SWCR_STATUS_DZE		(1ULL << 18)
-#define SWCR_STATUS_OVF		(1ULL << 19)
-#define SWCR_STATUS_UNF		(1ULL << 20)
-#define SWCR_STATUS_INE		(1ULL << 21)
-#define SWCR_STATUS_DNO		(1ULL << 22)
-#define SWCR_STATUS_MASK	((1ULL << 23) - (1ULL << 17))
+#define SWCR_TRAP_ENABLE_INV    (1U << 1)
+#define SWCR_TRAP_ENABLE_DZE    (1U << 2)
+#define SWCR_TRAP_ENABLE_OVF    (1U << 3)
+#define SWCR_TRAP_ENABLE_UNF    (1U << 4)
+#define SWCR_TRAP_ENABLE_INE    (1U << 5)
+#define SWCR_TRAP_ENABLE_DNO    (1U << 6)
+#define SWCR_TRAP_ENABLE_MASK   ((1U << 7) - (1U << 1))
+
+#define SWCR_MAP_DMZ            (1U << 12)
+#define SWCR_MAP_UMZ            (1U << 13)
+#define SWCR_MAP_MASK           (SWCR_MAP_DMZ | SWCR_MAP_UMZ)
+
+#define SWCR_STATUS_INV         (1U << 17)
+#define SWCR_STATUS_DZE         (1U << 18)
+#define SWCR_STATUS_OVF         (1U << 19)
+#define SWCR_STATUS_UNF         (1U << 20)
+#define SWCR_STATUS_INE         (1U << 21)
+#define SWCR_STATUS_DNO         (1U << 22)
+#define SWCR_STATUS_MASK        ((1U << 23) - (1U << 17))
 
 #define SWCR_MASK  (SWCR_TRAP_ENABLE_MASK | SWCR_MAP_MASK | SWCR_STATUS_MASK)
 
@@ -238,14 +238,13 @@ struct CPUAlphaState {
     uint64_t lock_addr;
     uint64_t lock_st_addr;
     uint64_t lock_value;
+
+    /* The FPCR, and disassembled portions thereof.  */
+    uint32_t fpcr;
+    uint32_t fpcr_exc_enable;
     float_status fp_status;
-    /* The following fields make up the FPCR, but in FP_STATUS format.  */
-    uint8_t fpcr_exc_status;
-    uint8_t fpcr_exc_mask;
     uint8_t fpcr_dyn_round;
     uint8_t fpcr_flush_to_zero;
-    uint8_t fpcr_dnod;
-    uint8_t fpcr_undz;
 
     /* The Internal Processor Registers.  Some of these we assume always
        exist for use in user-mode.  */
diff --git a/target-alpha/fpu_helper.c b/target-alpha/fpu_helper.c
index d2d776c..b091aa8 100644
--- a/target-alpha/fpu_helper.c
+++ b/target-alpha/fpu_helper.c
@@ -34,57 +34,65 @@ void helper_setflushzero(CPUAlphaState *env, uint32_t val)
     set_flush_to_zero(val, &FP_STATUS);
 }
 
-void helper_fp_exc_clear(CPUAlphaState *env)
-{
-    set_float_exception_flags(0, &FP_STATUS);
-}
+#define CONVERT_BIT(X, SRC, DST) \
+    (SRC > DST ? (X) / (SRC / DST) & (DST) : ((X) & SRC) * (DST / SRC))
 
-uint32_t helper_fp_exc_get(CPUAlphaState *env)
+static uint32_t soft_to_fpcr_exc(CPUAlphaState *env)
 {
-    return get_float_exception_flags(&FP_STATUS);
+    uint8_t exc = get_float_exception_flags(&FP_STATUS);
+    uint32_t ret = 0;
+
+    if (unlikely(exc)) {
+        set_float_exception_flags(0, &FP_STATUS);
+        ret |= CONVERT_BIT(exc, float_flag_invalid, FPCR_INV);
+        ret |= CONVERT_BIT(exc, float_flag_divbyzero, FPCR_DZE);
+        ret |= CONVERT_BIT(exc, float_flag_overflow, FPCR_OVF);
+        ret |= CONVERT_BIT(exc, float_flag_underflow, FPCR_UNF);
+        ret |= CONVERT_BIT(exc, float_flag_inexact, FPCR_INE);
+    }
+
+    return ret;
 }
 
-static inline void inline_fp_exc_raise(CPUAlphaState *env, uintptr_t retaddr,
-                                       uint32_t exc, uint32_t regno)
+static void fp_exc_raise1(CPUAlphaState *env, uintptr_t retaddr,
+                          uint32_t exc, uint32_t regno, uint32_t hw_exc)
 {
-    if (exc) {
-        uint32_t hw_exc = 0;
-
-        if (exc & float_flag_invalid) {
-            hw_exc |= EXC_M_INV;
-        }
-        if (exc & float_flag_divbyzero) {
-            hw_exc |= EXC_M_DZE;
-        }
-        if (exc & float_flag_overflow) {
-            hw_exc |= EXC_M_FOV;
-        }
-        if (exc & float_flag_underflow) {
-            hw_exc |= EXC_M_UNF;
-        }
-        if (exc & float_flag_inexact) {
-            hw_exc |= EXC_M_INE;
-        }
+    hw_exc |= CONVERT_BIT(exc, FPCR_INV, EXC_M_INV);
+    hw_exc |= CONVERT_BIT(exc, FPCR_DZE, EXC_M_DZE);
+    hw_exc |= CONVERT_BIT(exc, FPCR_OVF, EXC_M_FOV);
+    hw_exc |= CONVERT_BIT(exc, FPCR_UNF, EXC_M_UNF);
+    hw_exc |= CONVERT_BIT(exc, FPCR_INE, EXC_M_INE);
+    hw_exc |= CONVERT_BIT(exc, FPCR_IOV, EXC_M_IOV);
 
-        arith_excp(env, retaddr, hw_exc, 1ull << regno);
-    }
+    arith_excp(env, retaddr, hw_exc, 1ull << regno);
 }
 
 /* Raise exceptions for ieee fp insns without software completion.
    In that case there are no exceptions that don't trap; the mask
    doesn't apply.  */
-void helper_fp_exc_raise(CPUAlphaState *env, uint32_t exc, uint32_t regno)
+void helper_fp_exc_raise(CPUAlphaState *env, uint32_t ignore, uint32_t regno)
 {
-    inline_fp_exc_raise(env, GETPC(), exc, regno);
+    uint32_t exc = env->error_code;
+    if (exc) {
+        env->fpcr |= exc;
+        exc &= ~ignore;
+        if (exc) {
+            fp_exc_raise1(env, GETPC(), exc, regno, 0);
+        }
+    }
 }
 
 /* Raise exceptions for ieee fp insns with software completion.  */
-void helper_fp_exc_raise_s(CPUAlphaState *env, uint32_t exc, uint32_t regno)
+void helper_fp_exc_raise_s(CPUAlphaState *env, uint32_t ignore, uint32_t regno)
 {
+    uint32_t exc = env->error_code & ~ignore;
     if (exc) {
-        env->fpcr_exc_status |= exc;
-        exc &= ~env->fpcr_exc_mask;
-        inline_fp_exc_raise(env, GETPC(), exc, regno);
+        env->fpcr |= exc;
+        exc &= ~ignore;
+        if (exc) {
+            exc &= env->fpcr_exc_enable;
+            fp_exc_raise1(env, GETPC(), exc, regno, EXC_M_SWC);
+        }
     }
 }
 
@@ -96,16 +104,14 @@ void helper_ieee_input(CPUAlphaState *env, uint64_t val)
     uint64_t frac = val & 0xfffffffffffffull;
 
     if (exp == 0) {
-        /* Denormals without DNZ set raise an exception.  */
-        if (frac != 0 && !env->fp_status.flush_inputs_to_zero) {
-            arith_excp(env, GETPC(), EXC_M_UNF, 0);
+        /* Denormals without /S raise an exception.  */
+        if (frac != 0) {
+            arith_excp(env, GETPC(), EXC_M_INV, 0);
         }
     } else if (exp == 0x7ff) {
         /* Infinity or NaN.  */
-        /* ??? I'm not sure these exception bit flags are correct.  I do
-           know that the Linux kernel, at least, doesn't rely on them and
-           just emulates the insn to figure out what exception to use.  */
-        arith_excp(env, GETPC(), frac ? EXC_M_INV : EXC_M_FOV, 0);
+        env->fpcr |= FPCR_INV;
+        arith_excp(env, GETPC(), EXC_M_INV, 0);
     }
 }
 
@@ -116,274 +122,31 @@ void helper_ieee_input_cmp(CPUAlphaState *env, uint64_t val)
     uint64_t frac = val & 0xfffffffffffffull;
 
     if (exp == 0) {
-        /* Denormals without DNZ set raise an exception.  */
-        if (frac != 0 && !env->fp_status.flush_inputs_to_zero) {
-            arith_excp(env, GETPC(), EXC_M_UNF, 0);
+        /* Denormals without /S raise an exception.  */
+        if (frac != 0) {
+            arith_excp(env, GETPC(), EXC_M_INV, 0);
         }
     } else if (exp == 0x7ff && frac) {
         /* NaN.  */
+        env->fpcr |= FPCR_INV;
         arith_excp(env, GETPC(), EXC_M_INV, 0);
     }
 }
 
-/* F floating (VAX) */
-static uint64_t float32_to_f(float32 fa)
+/* Input handing with software completion.  Trap for denorms, unless DNZ
+   is set.  If we try to support DNOD (which none of the produced hardware
+   did, AFAICS), we'll need to suppress the trap when FPCR.DNOD is set;
+   then the code downstream of that will need to cope with denorms sans
+   flush_input_to_zero.  Most of it should work sanely, but there's
+   nothing to compare with.  */
+void helper_ieee_input_s(CPUAlphaState *env, uint64_t val)
 {
-    uint64_t r, exp, mant, sig;
-    CPU_FloatU a;
-
-    a.f = fa;
-    sig = ((uint64_t)a.l & 0x80000000) << 32;
-    exp = (a.l >> 23) & 0xff;
-    mant = ((uint64_t)a.l & 0x007fffff) << 29;
-
-    if (exp == 255) {
-        /* NaN or infinity */
-        r = 1; /* VAX dirty zero */
-    } else if (exp == 0) {
-        if (mant == 0) {
-            /* Zero */
-            r = 0;
-        } else {
-            /* Denormalized */
-            r = sig | ((exp + 1) << 52) | mant;
-        }
-    } else {
-        if (exp >= 253) {
-            /* Overflow */
-            r = 1; /* VAX dirty zero */
-        } else {
-            r = sig | ((exp + 2) << 52);
-        }
+    if (unlikely(2 * val - 1 < 0x1fffffffffffffull)
+        && !env->fp_status.flush_inputs_to_zero) {
+        arith_excp(env, GETPC(), EXC_M_INV | EXC_M_SWC, 0);
     }
-
-    return r;
-}
-
-static float32 f_to_float32(CPUAlphaState *env, uintptr_t retaddr, uint64_t a)
-{
-    uint32_t exp, mant_sig;
-    CPU_FloatU r;
-
-    exp = ((a >> 55) & 0x80) | ((a >> 52) & 0x7f);
-    mant_sig = ((a >> 32) & 0x80000000) | ((a >> 29) & 0x007fffff);
-
-    if (unlikely(!exp && mant_sig)) {
-        /* Reserved operands / Dirty zero */
-        dynamic_excp(env, retaddr, EXCP_OPCDEC, 0);
-    }
-
-    if (exp < 3) {
-        /* Underflow */
-        r.l = 0;
-    } else {
-        r.l = ((exp - 2) << 23) | mant_sig;
-    }
-
-    return r.f;
-}
-
-uint32_t helper_f_to_memory(uint64_t a)
-{
-    uint32_t r;
-    r =  (a & 0x00001fffe0000000ull) >> 13;
-    r |= (a & 0x07ffe00000000000ull) >> 45;
-    r |= (a & 0xc000000000000000ull) >> 48;
-    return r;
-}
-
-uint64_t helper_memory_to_f(uint32_t a)
-{
-    uint64_t r;
-    r =  ((uint64_t)(a & 0x0000c000)) << 48;
-    r |= ((uint64_t)(a & 0x003fffff)) << 45;
-    r |= ((uint64_t)(a & 0xffff0000)) << 13;
-    if (!(a & 0x00004000)) {
-        r |= 0x7ll << 59;
-    }
-    return r;
-}
-
-/* ??? Emulating VAX arithmetic with IEEE arithmetic is wrong.  We should
-   either implement VAX arithmetic properly or just signal invalid opcode.  */
-
-uint64_t helper_addf(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float32 fa, fb, fr;
-
-    fa = f_to_float32(env, GETPC(), a);
-    fb = f_to_float32(env, GETPC(), b);
-    fr = float32_add(fa, fb, &FP_STATUS);
-    return float32_to_f(fr);
 }
 
-uint64_t helper_subf(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float32 fa, fb, fr;
-
-    fa = f_to_float32(env, GETPC(), a);
-    fb = f_to_float32(env, GETPC(), b);
-    fr = float32_sub(fa, fb, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-uint64_t helper_mulf(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float32 fa, fb, fr;
-
-    fa = f_to_float32(env, GETPC(), a);
-    fb = f_to_float32(env, GETPC(), b);
-    fr = float32_mul(fa, fb, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-uint64_t helper_divf(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float32 fa, fb, fr;
-
-    fa = f_to_float32(env, GETPC(), a);
-    fb = f_to_float32(env, GETPC(), b);
-    fr = float32_div(fa, fb, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-uint64_t helper_sqrtf(CPUAlphaState *env, uint64_t t)
-{
-    float32 ft, fr;
-
-    ft = f_to_float32(env, GETPC(), t);
-    fr = float32_sqrt(ft, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-
-/* G floating (VAX) */
-static uint64_t float64_to_g(float64 fa)
-{
-    uint64_t r, exp, mant, sig;
-    CPU_DoubleU a;
-
-    a.d = fa;
-    sig = a.ll & 0x8000000000000000ull;
-    exp = (a.ll >> 52) & 0x7ff;
-    mant = a.ll & 0x000fffffffffffffull;
-
-    if (exp == 2047) {
-        /* NaN or infinity */
-        r = 1; /* VAX dirty zero */
-    } else if (exp == 0) {
-        if (mant == 0) {
-            /* Zero */
-            r = 0;
-        } else {
-            /* Denormalized */
-            r = sig | ((exp + 1) << 52) | mant;
-        }
-    } else {
-        if (exp >= 2045) {
-            /* Overflow */
-            r = 1; /* VAX dirty zero */
-        } else {
-            r = sig | ((exp + 2) << 52);
-        }
-    }
-
-    return r;
-}
-
-static float64 g_to_float64(CPUAlphaState *env, uintptr_t retaddr, uint64_t a)
-{
-    uint64_t exp, mant_sig;
-    CPU_DoubleU r;
-
-    exp = (a >> 52) & 0x7ff;
-    mant_sig = a & 0x800fffffffffffffull;
-
-    if (!exp && mant_sig) {
-        /* Reserved operands / Dirty zero */
-        dynamic_excp(env, retaddr, EXCP_OPCDEC, 0);
-    }
-
-    if (exp < 3) {
-        /* Underflow */
-        r.ll = 0;
-    } else {
-        r.ll = ((exp - 2) << 52) | mant_sig;
-    }
-
-    return r.d;
-}
-
-uint64_t helper_g_to_memory(uint64_t a)
-{
-    uint64_t r;
-    r =  (a & 0x000000000000ffffull) << 48;
-    r |= (a & 0x00000000ffff0000ull) << 16;
-    r |= (a & 0x0000ffff00000000ull) >> 16;
-    r |= (a & 0xffff000000000000ull) >> 48;
-    return r;
-}
-
-uint64_t helper_memory_to_g(uint64_t a)
-{
-    uint64_t r;
-    r =  (a & 0x000000000000ffffull) << 48;
-    r |= (a & 0x00000000ffff0000ull) << 16;
-    r |= (a & 0x0000ffff00000000ull) >> 16;
-    r |= (a & 0xffff000000000000ull) >> 48;
-    return r;
-}
-
-uint64_t helper_addg(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb, fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-    fr = float64_add(fa, fb, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-uint64_t helper_subg(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb, fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-    fr = float64_sub(fa, fb, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-uint64_t helper_mulg(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb, fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-    fr = float64_mul(fa, fb, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-uint64_t helper_divg(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb, fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-    fr = float64_div(fa, fb, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-uint64_t helper_sqrtg(CPUAlphaState *env, uint64_t a)
-{
-    float64 fa, fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fr = float64_sqrt(fa, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-
 /* S floating (single) */
 
 /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
@@ -447,6 +210,8 @@ uint64_t helper_adds(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = s_to_float32(a);
     fb = s_to_float32(b);
     fr = float32_add(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -457,6 +222,8 @@ uint64_t helper_subs(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = s_to_float32(a);
     fb = s_to_float32(b);
     fr = float32_sub(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -467,6 +234,8 @@ uint64_t helper_muls(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = s_to_float32(a);
     fb = s_to_float32(b);
     fr = float32_mul(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -477,6 +246,8 @@ uint64_t helper_divs(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = s_to_float32(a);
     fb = s_to_float32(b);
     fr = float32_div(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -486,6 +257,8 @@ uint64_t helper_sqrts(CPUAlphaState *env, uint64_t a)
 
     fa = s_to_float32(a);
     fr = float32_sqrt(fa, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -514,6 +287,8 @@ uint64_t helper_addt(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = t_to_float64(a);
     fb = t_to_float64(b);
     fr = float64_add(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
@@ -524,6 +299,8 @@ uint64_t helper_subt(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = t_to_float64(a);
     fb = t_to_float64(b);
     fr = float64_sub(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
@@ -534,6 +311,8 @@ uint64_t helper_mult(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = t_to_float64(a);
     fb = t_to_float64(b);
     fr = float64_mul(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
@@ -544,6 +323,8 @@ uint64_t helper_divt(CPUAlphaState *env, uint64_t a, uint64_t b)
     fa = t_to_float64(a);
     fb = t_to_float64(b);
     fr = float64_div(fa, fb, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
@@ -553,6 +334,8 @@ uint64_t helper_sqrtt(CPUAlphaState *env, uint64_t a)
 
     fa = t_to_float64(a);
     fr = float64_sqrt(fa, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
@@ -560,99 +343,65 @@ uint64_t helper_sqrtt(CPUAlphaState *env, uint64_t a)
 uint64_t helper_cmptun(CPUAlphaState *env, uint64_t a, uint64_t b)
 {
     float64 fa, fb;
+    uint64_t ret = 0;
 
     fa = t_to_float64(a);
     fb = t_to_float64(b);
 
     if (float64_unordered_quiet(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
+        ret = 0x4000000000000000ULL;
     }
+    env->error_code = soft_to_fpcr_exc(env);
+
+    return ret;
 }
 
 uint64_t helper_cmpteq(CPUAlphaState *env, uint64_t a, uint64_t b)
 {
     float64 fa, fb;
+    uint64_t ret = 0;
 
     fa = t_to_float64(a);
     fb = t_to_float64(b);
 
     if (float64_eq_quiet(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
+        ret = 0x4000000000000000ULL;
     }
+    env->error_code = soft_to_fpcr_exc(env);
+
+    return ret;
 }
 
 uint64_t helper_cmptle(CPUAlphaState *env, uint64_t a, uint64_t b)
 {
     float64 fa, fb;
+    uint64_t ret = 0;
 
     fa = t_to_float64(a);
     fb = t_to_float64(b);
 
     if (float64_le(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
+        ret = 0x4000000000000000ULL;
     }
+    env->error_code = soft_to_fpcr_exc(env);
+
+    return ret;
 }
 
 uint64_t helper_cmptlt(CPUAlphaState *env, uint64_t a, uint64_t b)
 {
     float64 fa, fb;
+    uint64_t ret = 0;
 
     fa = t_to_float64(a);
     fb = t_to_float64(b);
 
     if (float64_lt(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
-    }
-}
-
-uint64_t helper_cmpgeq(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-
-    if (float64_eq_quiet(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
+        ret = 0x4000000000000000ULL;
     }
-}
-
-uint64_t helper_cmpgle(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb;
+    env->error_code = soft_to_fpcr_exc(env);
 
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-
-    if (float64_le(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
-    }
-}
-
-uint64_t helper_cmpglt(CPUAlphaState *env, uint64_t a, uint64_t b)
-{
-    float64 fa, fb;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fb = g_to_float64(env, GETPC(), b);
-
-    if (float64_lt(fa, fb, &FP_STATUS)) {
-        return 0x4000000000000000ULL;
-    } else {
-        return 0;
-    }
+    return ret;
 }
 
 /* Floating point format conversion */
@@ -663,6 +412,8 @@ uint64_t helper_cvtts(CPUAlphaState *env, uint64_t a)
 
     fa = t_to_float64(a);
     fr = float64_to_float32(fa, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
@@ -673,23 +424,24 @@ uint64_t helper_cvtst(CPUAlphaState *env, uint64_t a)
 
     fa = s_to_float32(a);
     fr = float32_to_float64(fa, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float64_to_t(fr);
 }
 
 uint64_t helper_cvtqs(CPUAlphaState *env, uint64_t a)
 {
     float32 fr = int64_to_float32(a, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
+
     return float32_to_s(fr);
 }
 
 /* Implement float64 to uint64 conversion without saturation -- we must
    supply the truncated result.  This behaviour is used by the compiler
-   to get unsigned conversion for free with the same instruction.
-
-   The VI flag is set when overflow or inexact exceptions should be raised.  */
+   to get unsigned conversion for free with the same instruction.  */
 
-static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a,
-                                    int roundmode, int VI)
+static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode)
 {
     uint64_t frac, ret = 0;
     uint32_t exp, sign, exc = 0;
@@ -700,11 +452,11 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a,
     frac = a & 0xfffffffffffffull;
 
     if (exp == 0) {
-        if (unlikely(frac != 0)) {
+        if (unlikely(frac != 0) && !env->fp_status.flush_inputs_to_zero) {
             goto do_underflow;
         }
     } else if (exp == 0x7ff) {
-        exc = (frac ? float_flag_invalid : VI ? float_flag_overflow : 0);
+        exc = FPCR_INV;
     } else {
         /* Restore implicit bit.  */
         frac |= 0x10000000000000ull;
@@ -713,11 +465,12 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a,
         if (shift >= 0) {
             /* In this case the number is so large that we must shift
                the fraction left.  There is no rounding to do.  */
-            if (shift < 63) {
+            if (shift < 64) {
                 ret = frac << shift;
-                if (VI && (ret >> shift) != frac) {
-                    exc = float_flag_overflow;
-                }
+            }
+            /* Check for overflow.  Note the special case of -0x1p63.  */
+            if (shift >= 11 && a != 0xC3E0000000000000ull) {
+                exc = FPCR_IOV | FPCR_INE;
             }
         } else {
             uint64_t round;
@@ -739,7 +492,7 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a,
             }
 
             if (round) {
-                exc = (VI ? float_flag_inexact : 0);
+                exc = FPCR_INE;
                 switch (roundmode) {
                 case float_round_nearest_even:
                     if (round == (1ull << 63)) {
@@ -764,66 +517,35 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a,
             ret = -ret;
         }
     }
-    if (unlikely(exc)) {
-        float_raise(exc, &FP_STATUS);
-    }
+    env->error_code = exc;
 
     return ret;
 }
 
 uint64_t helper_cvttq(CPUAlphaState *env, uint64_t a)
 {
-    return inline_cvttq(env, a, FP_STATUS.float_rounding_mode, 1);
+    return do_cvttq(env, a, FP_STATUS.float_rounding_mode);
 }
 
 uint64_t helper_cvttq_c(CPUAlphaState *env, uint64_t a)
 {
-    return inline_cvttq(env, a, float_round_to_zero, 0);
-}
-
-uint64_t helper_cvttq_svic(CPUAlphaState *env, uint64_t a)
-{
-    return inline_cvttq(env, a, float_round_to_zero, 1);
+    return do_cvttq(env, a, float_round_to_zero);
 }
 
 uint64_t helper_cvtqt(CPUAlphaState *env, uint64_t a)
 {
     float64 fr = int64_to_float64(a, &FP_STATUS);
+    env->error_code = soft_to_fpcr_exc(env);
     return float64_to_t(fr);
 }
 
-uint64_t helper_cvtqf(CPUAlphaState *env, uint64_t a)
-{
-    float32 fr = int64_to_float32(a, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-uint64_t helper_cvtgf(CPUAlphaState *env, uint64_t a)
-{
-    float64 fa;
-    float32 fr;
-
-    fa = g_to_float64(env, GETPC(), a);
-    fr = float64_to_float32(fa, &FP_STATUS);
-    return float32_to_f(fr);
-}
-
-uint64_t helper_cvtgq(CPUAlphaState *env, uint64_t a)
-{
-    float64 fa = g_to_float64(env, GETPC(), a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
-}
-
-uint64_t helper_cvtqg(CPUAlphaState *env, uint64_t a)
-{
-    float64 fr;
-    fr = int64_to_float64(a, &FP_STATUS);
-    return float64_to_g(fr);
-}
-
-void helper_fcvtql_v_input(CPUAlphaState *env, uint64_t val)
+uint64_t helper_cvtql(CPUAlphaState *env, uint64_t val)
 {
+    uint32_t exc = 0;
     if (val != (int32_t)val) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
+        exc = FPCR_IOV | FPCR_INE;
     }
+    env->error_code = exc;
+
+    return ((val & 0xc0000000) << 32) | ((val & 0x3fffffff) << 29);
 }
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index a8aa782..46b8ef9 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -25,136 +25,48 @@
 #include "fpu/softfloat.h"
 #include "exec/helper-proto.h"
 
-uint64_t cpu_alpha_load_fpcr (CPUAlphaState *env)
-{
-    uint64_t r = 0;
-    uint8_t t;
-
-    t = env->fpcr_exc_status;
-    if (t) {
-        r = FPCR_SUM;
-        if (t & float_flag_invalid) {
-            r |= FPCR_INV;
-        }
-        if (t & float_flag_divbyzero) {
-            r |= FPCR_DZE;
-        }
-        if (t & float_flag_overflow) {
-            r |= FPCR_OVF;
-        }
-        if (t & float_flag_underflow) {
-            r |= FPCR_UNF;
-        }
-        if (t & float_flag_inexact) {
-            r |= FPCR_INE;
-        }
-    }
 
-    t = env->fpcr_exc_mask;
-    if (t & float_flag_invalid) {
-        r |= FPCR_INVD;
-    }
-    if (t & float_flag_divbyzero) {
-        r |= FPCR_DZED;
-    }
-    if (t & float_flag_overflow) {
-        r |= FPCR_OVFD;
-    }
-    if (t & float_flag_underflow) {
-        r |= FPCR_UNFD;
-    }
-    if (t & float_flag_inexact) {
-        r |= FPCR_INED;
-    }
-
-    switch (env->fpcr_dyn_round) {
-    case float_round_nearest_even:
-        r |= FPCR_DYN_NORMAL;
-        break;
-    case float_round_down:
-        r |= FPCR_DYN_MINUS;
-        break;
-    case float_round_up:
-        r |= FPCR_DYN_PLUS;
-        break;
-    case float_round_to_zero:
-        r |= FPCR_DYN_CHOPPED;
-        break;
-    }
-
-    if (env->fp_status.flush_inputs_to_zero) {
-        r |= FPCR_DNZ;
-    }
-    if (env->fpcr_dnod) {
-        r |= FPCR_DNOD;
-    }
-    if (env->fpcr_undz) {
-        r |= FPCR_UNDZ;
-    }
+#define CONVERT_BIT(X, SRC, DST) \
+    (SRC > DST ? (X) / (SRC / DST) & (DST) : ((X) & SRC) * (DST / SRC))
 
-    return r;
+uint64_t cpu_alpha_load_fpcr (CPUAlphaState *env)
+{
+    return (uint64_t)env->fpcr << 32;
 }
 
 void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val)
 {
-    uint8_t t;
+    uint32_t fpcr = val >> 32;
+    uint32_t t = 0;
 
-    t = 0;
-    if (val & FPCR_INV) {
-        t |= float_flag_invalid;
-    }
-    if (val & FPCR_DZE) {
-        t |= float_flag_divbyzero;
-    }
-    if (val & FPCR_OVF) {
-        t |= float_flag_overflow;
-    }
-    if (val & FPCR_UNF) {
-        t |= float_flag_underflow;
-    }
-    if (val & FPCR_INE) {
-        t |= float_flag_inexact;
-    }
-    env->fpcr_exc_status = t;
+    t |= CONVERT_BIT(fpcr, FPCR_INED, FPCR_INE);
+    t |= CONVERT_BIT(fpcr, FPCR_UNFD, FPCR_UNF);
+    t |= CONVERT_BIT(fpcr, FPCR_OVFD, FPCR_OVF);
+    t |= CONVERT_BIT(fpcr, FPCR_DZED, FPCR_DZE);
+    t |= CONVERT_BIT(fpcr, FPCR_INVD, FPCR_INV);
 
-    t = 0;
-    if (val & FPCR_INVD) {
-        t |= float_flag_invalid;
-    }
-    if (val & FPCR_DZED) {
-        t |= float_flag_divbyzero;
-    }
-    if (val & FPCR_OVFD) {
-        t |= float_flag_overflow;
-    }
-    if (val & FPCR_UNFD) {
-        t |= float_flag_underflow;
-    }
-    if (val & FPCR_INED) {
-        t |= float_flag_inexact;
-    }
-    env->fpcr_exc_mask = t;
+    env->fpcr = fpcr;
+    env->fpcr_exc_enable = ~t & FPCR_STATUS_MASK;
 
-    switch (val & FPCR_DYN_MASK) {
+    switch (fpcr & FPCR_DYN_MASK) {
+    case FPCR_DYN_NORMAL:
+    default:
+        t = float_round_nearest_even;
+        break;
     case FPCR_DYN_CHOPPED:
         t = float_round_to_zero;
         break;
     case FPCR_DYN_MINUS:
         t = float_round_down;
         break;
-    case FPCR_DYN_NORMAL:
-        t = float_round_nearest_even;
-        break;
     case FPCR_DYN_PLUS:
         t = float_round_up;
         break;
     }
     env->fpcr_dyn_round = t;
 
-    env->fpcr_dnod = (val & FPCR_DNOD) != 0;
-    env->fpcr_undz = (val & FPCR_UNDZ) != 0;
-    env->fpcr_flush_to_zero = env->fpcr_dnod & env->fpcr_undz;
-    env->fp_status.flush_inputs_to_zero = (val & FPCR_DNZ) != 0;
+    env->fpcr_flush_to_zero = (fpcr & FPCR_UNFD) && (fpcr & FPCR_UNDZ);
+    env->fp_status.flush_inputs_to_zero = (fpcr & FPCR_DNZ) != 0;
 }
 
 uint64_t helper_load_fpcr(CPUAlphaState *env)
@@ -571,6 +483,8 @@ void QEMU_NORETURN dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
     env->error_code = error;
     if (retaddr) {
         cpu_restore_state(cs, retaddr);
+        /* Floating-point exceptions (our only users) point to the next PC.  */
+        env->pc += 4;
     }
     cpu_loop_exit(cs);
 }
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index a451cfe..d221f0d 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -1,12 +1,7 @@
 DEF_HELPER_3(excp, noreturn, env, int, int)
 DEF_HELPER_FLAGS_1(load_pcc, TCG_CALL_NO_RWG_SE, i64, env)
 
-DEF_HELPER_FLAGS_3(addqv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(addlv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(subqv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(sublv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(mullv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(mulqv, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(check_overflow, TCG_CALL_NO_WG, void, env, i64, i64)
 
 DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_NO_RWG_SE, i64, i64)
@@ -83,18 +78,17 @@ DEF_HELPER_FLAGS_2(cvtqg, TCG_CALL_NO_RWG, i64, env, i64)
 
 DEF_HELPER_FLAGS_2(cvttq, TCG_CALL_NO_RWG, i64, env, i64)
 DEF_HELPER_FLAGS_2(cvttq_c, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvttq_svic, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(cvtql, TCG_CALL_NO_RWG, i64, env, i64)
 
 DEF_HELPER_FLAGS_2(setroundmode, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_2(setflushzero, TCG_CALL_NO_RWG, void, env, i32)
-DEF_HELPER_FLAGS_1(fp_exc_clear, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_1(fp_exc_get, TCG_CALL_NO_RWG_SE, i32, env)
 DEF_HELPER_FLAGS_3(fp_exc_raise, TCG_CALL_NO_WG, void, env, i32, i32)
 DEF_HELPER_FLAGS_3(fp_exc_raise_s, TCG_CALL_NO_WG, void, env, i32, i32)
 
 DEF_HELPER_FLAGS_2(ieee_input, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64)
-DEF_HELPER_FLAGS_2(fcvtql_v_input, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64)
 
 #if !defined (CONFIG_USER_ONLY)
 DEF_HELPER_2(hw_ret, void, env, i64)
diff --git a/target-alpha/int_helper.c b/target-alpha/int_helper.c
index 7a205eb..29e927f 100644
--- a/target-alpha/int_helper.c
+++ b/target-alpha/int_helper.c
@@ -37,35 +37,65 @@ uint64_t helper_cttz(uint64_t arg)
     return ctz64(arg);
 }
 
-static inline uint64_t byte_zap(uint64_t op, uint8_t mskb)
+uint64_t helper_zapnot(uint64_t val, uint64_t mskb)
 {
     uint64_t mask;
 
-    mask = 0;
-    mask |= ((mskb >> 0) & 1) * 0x00000000000000FFULL;
-    mask |= ((mskb >> 1) & 1) * 0x000000000000FF00ULL;
-    mask |= ((mskb >> 2) & 1) * 0x0000000000FF0000ULL;
-    mask |= ((mskb >> 3) & 1) * 0x00000000FF000000ULL;
-    mask |= ((mskb >> 4) & 1) * 0x000000FF00000000ULL;
-    mask |= ((mskb >> 5) & 1) * 0x0000FF0000000000ULL;
-    mask |= ((mskb >> 6) & 1) * 0x00FF000000000000ULL;
-    mask |= ((mskb >> 7) & 1) * 0xFF00000000000000ULL;
-
-    return op & ~mask;
-}
+    mask  = -(mskb & 0x01) & 0x00000000000000ffull;
+    mask |= -(mskb & 0x02) & 0x000000000000ff00ull;
+    mask |= -(mskb & 0x04) & 0x0000000000ff0000ull;
+    mask |= -(mskb & 0x08) & 0x00000000ff000000ull;
+    mask |= -(mskb & 0x10) & 0x000000ff00000000ull;
+    mask |= -(mskb & 0x20) & 0x0000ff0000000000ull;
+    mask |= -(mskb & 0x40) & 0x00ff000000000000ull;
+    mask |= -(mskb & 0x80) & 0xff00000000000000ull;
 
-uint64_t helper_zap(uint64_t val, uint64_t mask)
-{
-    return byte_zap(val, mask);
+    return val & mask;
 }
 
-uint64_t helper_zapnot(uint64_t val, uint64_t mask)
+uint64_t helper_zap(uint64_t val, uint64_t mask)
 {
-    return byte_zap(val, ~mask);
+    return helper_zapnot(val, ~mask);
 }
 
 uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
 {
+#if defined(__SSE2__)
+    uint64_t r;
+
+    /* The cmpbge instruction is heavily used in the implementation of
+       every string function on Alpha.  We can do much better than either
+       the default loop below, or even an unrolled version by using the
+       native vector support.  */
+    {
+        typedef uint64_t Q __attribute__((vector_size(16)));
+        typedef uint8_t B __attribute__((vector_size(16)));
+
+        Q q1 = (Q){ op1, 0 };
+        Q q2 = (Q){ op2, 0 };
+
+        q1 = (Q)((B)q1 >= (B)q2);
+
+        r = q1[0];
+    }
+
+    /* Select only one bit from each byte.  */
+    r &= 0x0101010101010101;
+
+    /* Collect the bits into the bottom byte.  */
+    /* .......A.......B.......C.......D.......E.......F.......G.......H */
+    r |= r >> (8 - 1);
+
+    /* .......A......AB......BC......CD......DE......EF......FG......GH */
+    r |= r >> (16 - 2);
+
+    /* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */
+    r |= r >> (32 - 4);
+
+    /* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */
+    /* Return only the low 8 bits.  */
+    return r & 0xff;
+#else
     uint8_t opa, opb, res;
     int i;
 
@@ -78,6 +108,7 @@ uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
         }
     }
     return res;
+#endif
 }
 
 uint64_t helper_minub8(uint64_t op1, uint64_t op2)
@@ -249,64 +280,9 @@ uint64_t helper_unpkbw(uint64_t op1)
             | ((op1 & 0xff000000) << 24));
 }
 
-uint64_t helper_addqv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
+void helper_check_overflow(CPUAlphaState *env, uint64_t op1, uint64_t op2)
 {
-    uint64_t tmp = op1;
-    op1 += op2;
-    if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
-    }
-    return op1;
-}
-
-uint64_t helper_addlv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
-{
-    uint64_t tmp = op1;
-    op1 = (uint32_t)(op1 + op2);
-    if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
-    }
-    return op1;
-}
-
-uint64_t helper_subqv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
-{
-    uint64_t res;
-    res = op1 - op2;
-    if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
-    }
-    return res;
-}
-
-uint64_t helper_sublv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
-{
-    uint32_t res;
-    res = op1 - op2;
-    if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
-    }
-    return res;
-}
-
-uint64_t helper_mullv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
-{
-    int64_t res = (int64_t)op1 * (int64_t)op2;
-
-    if (unlikely((int32_t)res != res)) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
-    }
-    return (int64_t)((int32_t)res);
-}
-
-uint64_t helper_mulqv(CPUAlphaState *env, uint64_t op1, uint64_t op2)
-{
-    uint64_t tl, th;
-
-    muls64(&tl, &th, op1, op2);
-    /* If th != 0 && th != -1, then we had an overflow */
-    if (unlikely((th + 1) > 1)) {
+    if (unlikely(op1 != op2)) {
         arith_excp(env, GETPC(), EXC_M_IOV, 0);
     }
-    return tl;
 }
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index fc4f57a..7b5e30d 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -128,7 +128,14 @@ void alpha_cpu_unassigned_access(CPUState *cs, hwaddr addr,
 
     env->trap_arg0 = addr;
     env->trap_arg1 = is_write ? 1 : 0;
-    dynamic_excp(env, 0, EXCP_MCHK, 0);
+    cs->exception_index = EXCP_MCHK;
+    env->error_code = 0;
+
+    /* ??? We should cpu_restore_state to the faulting insn, but this hook
+       does not have access to the retaddr value from the orignal helper.
+       It's all moot until the QEMU PALcode grows an MCHK handler.  */
+
+    cpu_loop_exit(cs);
 }
 
 /* try to fill the TLB and return an exception if error. If retaddr is
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index efeeb05..e9927b5 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -658,43 +658,36 @@ static TCGv gen_ieee_input(DisasContext *ctx, int reg, int fn11, int is_cmp)
             } else {
                 gen_helper_ieee_input(cpu_env, val);
             }
+        } else {
+#ifndef CONFIG_USER_ONLY
+            /* In system mode, raise exceptions for denormals like real
+               hardware.  In user mode, proceed as if the OS completion
+               handler is handling the denormal as per spec.  */
+            gen_helper_ieee_input_s(cpu_env, val);
+#endif
         }
     }
     return val;
 }
 
-static void gen_fp_exc_clear(void)
-{
-#if defined(CONFIG_SOFTFLOAT_INLINE)
-    TCGv_i32 zero = tcg_const_i32(0);
-    tcg_gen_st8_i32(zero, cpu_env,
-                    offsetof(CPUAlphaState, fp_status.float_exception_flags));
-    tcg_temp_free_i32(zero);
-#else
-    gen_helper_fp_exc_clear(cpu_env);
-#endif
-}
-
-static void gen_fp_exc_raise_ignore(int rc, int fn11, int ignore)
+static void gen_fp_exc_raise(int rc, int fn11)
 {
     /* ??? We ought to be able to do something with imprecise exceptions.
        E.g. notice we're still in the trap shadow of something within the
        TB and do not generate the code to signal the exception; end the TB
        when an exception is forced to arrive, either by consumption of a
        register value or TRAPB or EXCB.  */
-    TCGv_i32 exc = tcg_temp_new_i32();
-    TCGv_i32 reg;
+    TCGv_i32 reg, ign;
+    uint32_t ignore = 0;
 
-#if defined(CONFIG_SOFTFLOAT_INLINE)
-    tcg_gen_ld8u_i32(exc, cpu_env,
-                     offsetof(CPUAlphaState, fp_status.float_exception_flags));
-#else
-    gen_helper_fp_exc_get(exc, cpu_env);
-#endif
-
-    if (ignore) {
-        tcg_gen_andi_i32(exc, exc, ~ignore);
+    if (!(fn11 & QUAL_U)) {
+        /* Note that QUAL_U == QUAL_V, so ignore either.  */
+        ignore |= FPCR_UNF | FPCR_IOV;
     }
+    if (!(fn11 & QUAL_I)) {
+        ignore |= FPCR_INE;
+    }
+    ign = tcg_const_i32(ignore);
 
     /* ??? Pass in the regno of the destination so that the helper can
        set EXC_MASK, which contains a bitmask of destination registers
@@ -702,23 +695,17 @@ static void gen_fp_exc_raise_ignore(int rc, int fn11, int ignore)
        does not require this.  We do need it for a guest kernel's entArith,
        or if we were to do something clever with imprecise exceptions.  */
     reg = tcg_const_i32(rc + 32);
-
     if (fn11 & QUAL_S) {
-        gen_helper_fp_exc_raise_s(cpu_env, exc, reg);
+        gen_helper_fp_exc_raise_s(cpu_env, ign, reg);
     } else {
-        gen_helper_fp_exc_raise(cpu_env, exc, reg);
+        gen_helper_fp_exc_raise(cpu_env, ign, reg);
     }
 
     tcg_temp_free_i32(reg);
-    tcg_temp_free_i32(exc);
+    tcg_temp_free_i32(ign);
 }
 
-static inline void gen_fp_exc_raise(int rc, int fn11)
-{
-    gen_fp_exc_raise_ignore(rc, fn11, fn11 & QUAL_I ? 0 : float_flag_inexact);
-}
-
-static void gen_fcvtlq(TCGv vc, TCGv vb)
+static void gen_cvtlq(TCGv vc, TCGv vb)
 {
     TCGv tmp = tcg_temp_new();
 
@@ -733,19 +720,6 @@ static void gen_fcvtlq(TCGv vc, TCGv vb)
     tcg_temp_free(tmp);
 }
 
-static void gen_fcvtql(TCGv vc, TCGv vb)
-{
-    TCGv tmp = tcg_temp_new();
-
-    tcg_gen_andi_i64(tmp, vb, (int32_t)0xc0000000);
-    tcg_gen_andi_i64(vc, vb, 0x3FFFFFFF);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_shli_i64(vc, vc, 29);
-    tcg_gen_or_i64(vc, vc, tmp);
-
-    tcg_temp_free(tmp);
-}
-
 static void gen_ieee_arith2(DisasContext *ctx,
                             void (*helper)(TCGv, TCGv_ptr, TCGv),
                             int rb, int rc, int fn11)
@@ -754,7 +728,6 @@ static void gen_ieee_arith2(DisasContext *ctx,
 
     gen_qual_roundmode(ctx, fn11);
     gen_qual_flushzero(ctx, fn11);
-    gen_fp_exc_clear();
 
     vb = gen_ieee_input(ctx, rb, fn11, 0);
     helper(dest_fpr(ctx, rc), cpu_env, vb);
@@ -763,8 +736,8 @@ static void gen_ieee_arith2(DisasContext *ctx,
 }
 
 #define IEEE_ARITH2(name)                                       \
-static inline void glue(gen_f, name)(DisasContext *ctx,         \
-                                     int rb, int rc, int fn11)  \
+static inline void glue(gen_, name)(DisasContext *ctx,          \
+                                    int rb, int rc, int fn11)   \
 {                                                               \
     gen_ieee_arith2(ctx, gen_helper_##name, rb, rc, fn11);      \
 }
@@ -773,38 +746,23 @@ IEEE_ARITH2(sqrtt)
 IEEE_ARITH2(cvtst)
 IEEE_ARITH2(cvtts)
 
-static void gen_fcvttq(DisasContext *ctx, int rb, int rc, int fn11)
+static void gen_cvttq(DisasContext *ctx, int rb, int rc, int fn11)
 {
     TCGv vb, vc;
-    int ignore = 0;
 
     /* No need to set flushzero, since we have an integer output.  */
-    gen_fp_exc_clear();
     vb = gen_ieee_input(ctx, rb, fn11, 0);
     vc = dest_fpr(ctx, rc);
 
-    /* Almost all integer conversions use cropped rounding, and most
-       also do not have integer overflow enabled.  Special case that.  */
-    switch (fn11) {
-    case QUAL_RM_C:
+    /* Almost all integer conversions use cropped rounding;
+       special case that.  */
+    if ((fn11 & QUAL_RM_MASK) == QUAL_RM_C) {
         gen_helper_cvttq_c(vc, cpu_env, vb);
-        break;
-    case QUAL_V | QUAL_RM_C:
-    case QUAL_S | QUAL_V | QUAL_RM_C:
-        ignore = float_flag_inexact;
-        /* FALLTHRU */
-    case QUAL_S | QUAL_V | QUAL_I | QUAL_RM_C:
-        gen_helper_cvttq_svic(vc, cpu_env, vb);
-        break;
-    default:
+    } else {
         gen_qual_roundmode(ctx, fn11);
         gen_helper_cvttq(vc, cpu_env, vb);
-        ignore |= (fn11 & QUAL_V ? 0 : float_flag_overflow);
-        ignore |= (fn11 & QUAL_I ? 0 : float_flag_inexact);
-        break;
     }
-
-    gen_fp_exc_raise_ignore(rc, fn11, ignore);
+    gen_fp_exc_raise(rc, fn11);
 }
 
 static void gen_ieee_intcvt(DisasContext *ctx,
@@ -821,7 +779,6 @@ static void gen_ieee_intcvt(DisasContext *ctx,
        is inexact.  Thus we only need to worry about exceptions when
        inexact handling is requested.  */
     if (fn11 & QUAL_I) {
-        gen_fp_exc_clear();
         helper(vc, cpu_env, vb);
         gen_fp_exc_raise(rc, fn11);
     } else {
@@ -830,8 +787,8 @@ static void gen_ieee_intcvt(DisasContext *ctx,
 }
 
 #define IEEE_INTCVT(name)                                       \
-static inline void glue(gen_f, name)(DisasContext *ctx,         \
-                                     int rb, int rc, int fn11)  \
+static inline void glue(gen_, name)(DisasContext *ctx,          \
+                                    int rb, int rc, int fn11)   \
 {                                                               \
     gen_ieee_intcvt(ctx, gen_helper_##name, rb, rc, fn11);      \
 }
@@ -864,7 +821,6 @@ static void gen_ieee_arith3(DisasContext *ctx,
 
     gen_qual_roundmode(ctx, fn11);
     gen_qual_flushzero(ctx, fn11);
-    gen_fp_exc_clear();
 
     va = gen_ieee_input(ctx, ra, fn11, 0);
     vb = gen_ieee_input(ctx, rb, fn11, 0);
@@ -875,8 +831,8 @@ static void gen_ieee_arith3(DisasContext *ctx,
 }
 
 #define IEEE_ARITH3(name)                                               \
-static inline void glue(gen_f, name)(DisasContext *ctx,                 \
-                                     int ra, int rb, int rc, int fn11)  \
+static inline void glue(gen_, name)(DisasContext *ctx,                  \
+                                    int ra, int rb, int rc, int fn11)   \
 {                                                                       \
     gen_ieee_arith3(ctx, gen_helper_##name, ra, rb, rc, fn11);          \
 }
@@ -895,8 +851,6 @@ static void gen_ieee_compare(DisasContext *ctx,
 {
     TCGv va, vb, vc;
 
-    gen_fp_exc_clear();
-
     va = gen_ieee_input(ctx, ra, fn11, 1);
     vb = gen_ieee_input(ctx, rb, fn11, 1);
     vc = dest_fpr(ctx, rc);
@@ -906,8 +860,8 @@ static void gen_ieee_compare(DisasContext *ctx,
 }
 
 #define IEEE_CMP3(name)                                                 \
-static inline void glue(gen_f, name)(DisasContext *ctx,                 \
-                                     int ra, int rb, int rc, int fn11)  \
+static inline void glue(gen_, name)(DisasContext *ctx,                  \
+                                    int ra, int rb, int rc, int fn11)   \
 {                                                                       \
     gen_ieee_compare(ctx, gen_helper_##name, ra, rb, rc, fn11);         \
 }
@@ -1382,6 +1336,13 @@ static ExitStatus gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
 }
 #endif /* !USER_ONLY*/
 
+#define REQUIRE_NO_LIT                          \
+    do {                                        \
+        if (real_islit) {                       \
+            goto invalid_opc;                   \
+        }                                       \
+    } while (0)
+
 #define REQUIRE_TB_FLAG(FLAG)                   \
     do {                                        \
         if ((ctx->tb->flags & (FLAG)) == 0) {   \
@@ -1401,8 +1362,8 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
     int32_t disp21, disp16, disp12 __attribute__((unused));
     uint16_t fn11;
     uint8_t opc, ra, rb, rc, fpfn, fn7, lit;
-    bool islit;
-    TCGv va, vb, vc, tmp;
+    bool islit, real_islit;
+    TCGv va, vb, vc, tmp, tmp2;
     TCGv_i32 t32;
     ExitStatus ret;
 
@@ -1411,7 +1372,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
     ra = extract32(insn, 21, 5);
     rb = extract32(insn, 16, 5);
     rc = extract32(insn, 0, 5);
-    islit = extract32(insn, 12, 1);
+    real_islit = islit = extract32(insn, 12, 1);
     lit = extract32(insn, 13, 8);
 
     disp21 = sextract32(insn, 0, 21);
@@ -1614,11 +1575,23 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x40:
             /* ADDL/V */
-            gen_helper_addlv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tcg_gen_ext32s_i64(tmp, va);
+            tcg_gen_ext32s_i64(vc, vb);
+            tcg_gen_add_i64(tmp, tmp, vc);
+            tcg_gen_ext32s_i64(vc, tmp);
+            gen_helper_check_overflow(cpu_env, vc, tmp);
+            tcg_temp_free(tmp);
             break;
         case 0x49:
             /* SUBL/V */
-            gen_helper_sublv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tcg_gen_ext32s_i64(tmp, va);
+            tcg_gen_ext32s_i64(vc, vb);
+            tcg_gen_sub_i64(tmp, tmp, vc);
+            tcg_gen_ext32s_i64(vc, tmp);
+            gen_helper_check_overflow(cpu_env, vc, tmp);
+            tcg_temp_free(tmp);
             break;
         case 0x4D:
             /* CMPLT */
@@ -1626,11 +1599,33 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x60:
             /* ADDQ/V */
-            gen_helper_addqv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tmp2 = tcg_temp_new();
+            tcg_gen_eqv_i64(tmp, va, vb);
+            tcg_gen_mov_i64(tmp2, va);
+            tcg_gen_add_i64(vc, va, vb);
+            tcg_gen_xor_i64(tmp2, tmp2, vc);
+            tcg_gen_and_i64(tmp, tmp, tmp2);
+            tcg_gen_shri_i64(tmp, tmp, 63);
+            tcg_gen_movi_i64(tmp2, 0);
+            gen_helper_check_overflow(cpu_env, tmp, tmp2);
+            tcg_temp_free(tmp);
+            tcg_temp_free(tmp2);
             break;
         case 0x69:
             /* SUBQ/V */
-            gen_helper_subqv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tmp2 = tcg_temp_new();
+            tcg_gen_xor_i64(tmp, va, vb);
+            tcg_gen_mov_i64(tmp2, va);
+            tcg_gen_sub_i64(vc, va, vb);
+            tcg_gen_xor_i64(tmp2, tmp2, vc);
+            tcg_gen_and_i64(tmp, tmp, tmp2);
+            tcg_gen_shri_i64(tmp, tmp, 63);
+            tcg_gen_movi_i64(tmp2, 0);
+            gen_helper_check_overflow(cpu_env, tmp, tmp2);
+            tcg_temp_free(tmp);
+            tcg_temp_free(tmp2);
             break;
         case 0x6D:
             /* CMPLE */
@@ -1925,11 +1920,23 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x40:
             /* MULL/V */
-            gen_helper_mullv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tcg_gen_ext32s_i64(tmp, va);
+            tcg_gen_ext32s_i64(vc, vb);
+            tcg_gen_mul_i64(tmp, tmp, vc);
+            tcg_gen_ext32s_i64(vc, tmp);
+            gen_helper_check_overflow(cpu_env, vc, tmp);
+            tcg_temp_free(tmp);
             break;
         case 0x60:
             /* MULQ/V */
-            gen_helper_mulqv(vc, cpu_env, va, vb);
+            tmp = tcg_temp_new();
+            tmp2 = tcg_temp_new();
+            tcg_gen_muls2_i64(vc, tmp, va, vb);
+            tcg_gen_sari_i64(tmp2, vc, 63);
+            gen_helper_check_overflow(cpu_env, tmp, tmp2);
+            tcg_temp_free(tmp);
+            tcg_temp_free(tmp2);
             break;
         default:
             goto invalid_opc;
@@ -1958,7 +1965,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
         case 0x0B:
             /* SQRTS */
             REQUIRE_REG_31(ra);
-            gen_fsqrts(ctx, rb, rc, fn11);
+            gen_sqrts(ctx, rb, rc, fn11);
             break;
         case 0x14:
             /* ITOFF */
@@ -1984,7 +1991,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
         case 0x02B:
             /* SQRTT */
             REQUIRE_REG_31(ra);
-            gen_fsqrtt(ctx, rb, rc, fn11);
+            gen_sqrtt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -2080,76 +2087,76 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
         switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDS */
-            gen_fadds(ctx, ra, rb, rc, fn11);
+            gen_adds(ctx, ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBS */
-            gen_fsubs(ctx, ra, rb, rc, fn11);
+            gen_subs(ctx, ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULS */
-            gen_fmuls(ctx, ra, rb, rc, fn11);
+            gen_muls(ctx, ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVS */
-            gen_fdivs(ctx, ra, rb, rc, fn11);
+            gen_divs(ctx, ra, rb, rc, fn11);
             break;
         case 0x20:
             /* ADDT */
-            gen_faddt(ctx, ra, rb, rc, fn11);
+            gen_addt(ctx, ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBT */
-            gen_fsubt(ctx, ra, rb, rc, fn11);
+            gen_subt(ctx, ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULT */
-            gen_fmult(ctx, ra, rb, rc, fn11);
+            gen_mult(ctx, ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVT */
-            gen_fdivt(ctx, ra, rb, rc, fn11);
+            gen_divt(ctx, ra, rb, rc, fn11);
             break;
         case 0x24:
             /* CMPTUN */
-            gen_fcmptun(ctx, ra, rb, rc, fn11);
+            gen_cmptun(ctx, ra, rb, rc, fn11);
             break;
         case 0x25:
             /* CMPTEQ */
-            gen_fcmpteq(ctx, ra, rb, rc, fn11);
+            gen_cmpteq(ctx, ra, rb, rc, fn11);
             break;
         case 0x26:
             /* CMPTLT */
-            gen_fcmptlt(ctx, ra, rb, rc, fn11);
+            gen_cmptlt(ctx, ra, rb, rc, fn11);
             break;
         case 0x27:
             /* CMPTLE */
-            gen_fcmptle(ctx, ra, rb, rc, fn11);
+            gen_cmptle(ctx, ra, rb, rc, fn11);
             break;
         case 0x2C:
             REQUIRE_REG_31(ra);
             if (fn11 == 0x2AC || fn11 == 0x6AC) {
                 /* CVTST */
-                gen_fcvtst(ctx, rb, rc, fn11);
+                gen_cvtst(ctx, rb, rc, fn11);
             } else {
                 /* CVTTS */
-                gen_fcvtts(ctx, rb, rc, fn11);
+                gen_cvtts(ctx, rb, rc, fn11);
             }
             break;
         case 0x2F:
             /* CVTTQ */
             REQUIRE_REG_31(ra);
-            gen_fcvttq(ctx, rb, rc, fn11);
+            gen_cvttq(ctx, rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQS */
             REQUIRE_REG_31(ra);
-            gen_fcvtqs(ctx, rb, rc, fn11);
+            gen_cvtqs(ctx, rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQT */
             REQUIRE_REG_31(ra);
-            gen_fcvtqt(ctx, rb, rc, fn11);
+            gen_cvtqt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -2163,7 +2170,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             REQUIRE_REG_31(ra);
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
-            gen_fcvtlq(vc, vb);
+            gen_cvtlq(vc, vb);
             break;
         case 0x020:
             /* CPYS */
@@ -2199,6 +2206,11 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             /* MT_FPCR */
             va = load_fpr(ctx, ra);
             gen_helper_store_fpcr(cpu_env, va);
+            if (ctx->tb_rm == QUAL_RM_D) {
+                /* Re-do the copy of the rounding mode to fp_status
+                   the next time we use dynamic rounding.  */
+                ctx->tb_rm = -1;
+            }
             break;
         case 0x025:
             /* MF_FPCR */
@@ -2229,25 +2241,14 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             /* FCMOVGT */
             gen_fcmov(ctx, TCG_COND_GT, ra, rb, rc);
             break;
-        case 0x030:
-            /* CVTQL */
-            REQUIRE_REG_31(ra);
-            vc = dest_fpr(ctx, rc);
-            vb = load_fpr(ctx, rb);
-            gen_fcvtql(vc, vb);
-            break;
-        case 0x130:
-            /* CVTQL/V */
-        case 0x530:
-            /* CVTQL/SV */
+        case 0x030: /* CVTQL */
+        case 0x130: /* CVTQL/V */
+        case 0x530: /* CVTQL/SV */
             REQUIRE_REG_31(ra);
-            /* ??? I'm pretty sure there's nothing that /sv needs to do that
-               /v doesn't do.  The only thing I can think is that /sv is a
-               valid instruction merely for completeness in the ISA.  */
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
-            gen_helper_fcvtql_v_input(cpu_env, vb);
-            gen_fcvtql(vc, vb);
+            gen_helper_cvtql(vc, cpu_env, vb);
+            gen_fp_exc_raise(rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -2307,6 +2308,10 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             /* WH64 */
             /* No-op */
             break;
+        case 0xFC00:
+            /* WH64EN */
+            /* No-op */
+            break;
         default:
             goto invalid_opc;
         }
@@ -2451,11 +2456,13 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             /* CTPOP */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_ctpop(vc, vb);
             break;
         case 0x31:
             /* PERR */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_MVI);
+            REQUIRE_NO_LIT;
             va = load_gpr(ctx, ra);
             gen_helper_perr(vc, va, vb);
             break;
@@ -2463,36 +2470,42 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             /* CTLZ */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_ctlz(vc, vb);
             break;
         case 0x33:
             /* CTTZ */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_CIX);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_cttz(vc, vb);
             break;
         case 0x34:
             /* UNPKBW */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_MVI);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_unpkbw(vc, vb);
             break;
         case 0x35:
             /* UNPKBL */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_MVI);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_unpkbl(vc, vb);
             break;
         case 0x36:
             /* PKWB */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_MVI);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_pkwb(vc, vb);
             break;
         case 0x37:
             /* PKLB */
             REQUIRE_TB_FLAG(TB_FLAGS_AMASK_MVI);
             REQUIRE_REG_31(ra);
+            REQUIRE_NO_LIT;
             gen_helper_pklb(vc, vb);
             break;
         case 0x38:
diff --git a/target-alpha/vax_helper.c b/target-alpha/vax_helper.c
new file mode 100644
index 0000000..2e2f499
--- /dev/null
+++ b/target-alpha/vax_helper.c
@@ -0,0 +1,353 @@
+/*
+ *  Helpers for vax floating point instructions.
+ *
+ *  Copyright (c) 2007 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define FP_STATUS (env->fp_status)
+
+
+/* F floating (VAX) */
+static uint64_t float32_to_f(float32 fa)
+{
+    uint64_t r, exp, mant, sig;
+    CPU_FloatU a;
+
+    a.f = fa;
+    sig = ((uint64_t)a.l & 0x80000000) << 32;
+    exp = (a.l >> 23) & 0xff;
+    mant = ((uint64_t)a.l & 0x007fffff) << 29;
+
+    if (exp == 255) {
+        /* NaN or infinity */
+        r = 1; /* VAX dirty zero */
+    } else if (exp == 0) {
+        if (mant == 0) {
+            /* Zero */
+            r = 0;
+        } else {
+            /* Denormalized */
+            r = sig | ((exp + 1) << 52) | mant;
+        }
+    } else {
+        if (exp >= 253) {
+            /* Overflow */
+            r = 1; /* VAX dirty zero */
+        } else {
+            r = sig | ((exp + 2) << 52);
+        }
+    }
+
+    return r;
+}
+
+static float32 f_to_float32(CPUAlphaState *env, uintptr_t retaddr, uint64_t a)
+{
+    uint32_t exp, mant_sig;
+    CPU_FloatU r;
+
+    exp = ((a >> 55) & 0x80) | ((a >> 52) & 0x7f);
+    mant_sig = ((a >> 32) & 0x80000000) | ((a >> 29) & 0x007fffff);
+
+    if (unlikely(!exp && mant_sig)) {
+        /* Reserved operands / Dirty zero */
+        dynamic_excp(env, retaddr, EXCP_OPCDEC, 0);
+    }
+
+    if (exp < 3) {
+        /* Underflow */
+        r.l = 0;
+    } else {
+        r.l = ((exp - 2) << 23) | mant_sig;
+    }
+
+    return r.f;
+}
+
+uint32_t helper_f_to_memory(uint64_t a)
+{
+    uint32_t r;
+    r =  (a & 0x00001fffe0000000ull) >> 13;
+    r |= (a & 0x07ffe00000000000ull) >> 45;
+    r |= (a & 0xc000000000000000ull) >> 48;
+    return r;
+}
+
+uint64_t helper_memory_to_f(uint32_t a)
+{
+    uint64_t r;
+    r =  ((uint64_t)(a & 0x0000c000)) << 48;
+    r |= ((uint64_t)(a & 0x003fffff)) << 45;
+    r |= ((uint64_t)(a & 0xffff0000)) << 13;
+    if (!(a & 0x00004000)) {
+        r |= 0x7ll << 59;
+    }
+    return r;
+}
+
+/* ??? Emulating VAX arithmetic with IEEE arithmetic is wrong.  We should
+   either implement VAX arithmetic properly or just signal invalid opcode.  */
+
+uint64_t helper_addf(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float32 fa, fb, fr;
+
+    fa = f_to_float32(env, GETPC(), a);
+    fb = f_to_float32(env, GETPC(), b);
+    fr = float32_add(fa, fb, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_subf(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float32 fa, fb, fr;
+
+    fa = f_to_float32(env, GETPC(), a);
+    fb = f_to_float32(env, GETPC(), b);
+    fr = float32_sub(fa, fb, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_mulf(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float32 fa, fb, fr;
+
+    fa = f_to_float32(env, GETPC(), a);
+    fb = f_to_float32(env, GETPC(), b);
+    fr = float32_mul(fa, fb, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_divf(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float32 fa, fb, fr;
+
+    fa = f_to_float32(env, GETPC(), a);
+    fb = f_to_float32(env, GETPC(), b);
+    fr = float32_div(fa, fb, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_sqrtf(CPUAlphaState *env, uint64_t t)
+{
+    float32 ft, fr;
+
+    ft = f_to_float32(env, GETPC(), t);
+    fr = float32_sqrt(ft, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+
+/* G floating (VAX) */
+static uint64_t float64_to_g(float64 fa)
+{
+    uint64_t r, exp, mant, sig;
+    CPU_DoubleU a;
+
+    a.d = fa;
+    sig = a.ll & 0x8000000000000000ull;
+    exp = (a.ll >> 52) & 0x7ff;
+    mant = a.ll & 0x000fffffffffffffull;
+
+    if (exp == 2047) {
+        /* NaN or infinity */
+        r = 1; /* VAX dirty zero */
+    } else if (exp == 0) {
+        if (mant == 0) {
+            /* Zero */
+            r = 0;
+        } else {
+            /* Denormalized */
+            r = sig | ((exp + 1) << 52) | mant;
+        }
+    } else {
+        if (exp >= 2045) {
+            /* Overflow */
+            r = 1; /* VAX dirty zero */
+        } else {
+            r = sig | ((exp + 2) << 52);
+        }
+    }
+
+    return r;
+}
+
+static float64 g_to_float64(CPUAlphaState *env, uintptr_t retaddr, uint64_t a)
+{
+    uint64_t exp, mant_sig;
+    CPU_DoubleU r;
+
+    exp = (a >> 52) & 0x7ff;
+    mant_sig = a & 0x800fffffffffffffull;
+
+    if (!exp && mant_sig) {
+        /* Reserved operands / Dirty zero */
+        dynamic_excp(env, retaddr, EXCP_OPCDEC, 0);
+    }
+
+    if (exp < 3) {
+        /* Underflow */
+        r.ll = 0;
+    } else {
+        r.ll = ((exp - 2) << 52) | mant_sig;
+    }
+
+    return r.d;
+}
+
+uint64_t helper_g_to_memory(uint64_t a)
+{
+    uint64_t r;
+    r =  (a & 0x000000000000ffffull) << 48;
+    r |= (a & 0x00000000ffff0000ull) << 16;
+    r |= (a & 0x0000ffff00000000ull) >> 16;
+    r |= (a & 0xffff000000000000ull) >> 48;
+    return r;
+}
+
+uint64_t helper_memory_to_g(uint64_t a)
+{
+    uint64_t r;
+    r =  (a & 0x000000000000ffffull) << 48;
+    r |= (a & 0x00000000ffff0000ull) << 16;
+    r |= (a & 0x0000ffff00000000ull) >> 16;
+    r |= (a & 0xffff000000000000ull) >> 48;
+    return r;
+}
+
+uint64_t helper_addg(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb, fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+    fr = float64_add(fa, fb, &FP_STATUS);
+    return float64_to_g(fr);
+}
+
+uint64_t helper_subg(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb, fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+    fr = float64_sub(fa, fb, &FP_STATUS);
+    return float64_to_g(fr);
+}
+
+uint64_t helper_mulg(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb, fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+    fr = float64_mul(fa, fb, &FP_STATUS);
+    return float64_to_g(fr);
+}
+
+uint64_t helper_divg(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb, fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+    fr = float64_div(fa, fb, &FP_STATUS);
+    return float64_to_g(fr);
+}
+
+uint64_t helper_sqrtg(CPUAlphaState *env, uint64_t a)
+{
+    float64 fa, fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fr = float64_sqrt(fa, &FP_STATUS);
+    return float64_to_g(fr);
+}
+
+uint64_t helper_cmpgeq(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+
+    if (float64_eq_quiet(fa, fb, &FP_STATUS)) {
+        return 0x4000000000000000ULL;
+    } else {
+        return 0;
+    }
+}
+
+uint64_t helper_cmpgle(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+
+    if (float64_le(fa, fb, &FP_STATUS)) {
+        return 0x4000000000000000ULL;
+    } else {
+        return 0;
+    }
+}
+
+uint64_t helper_cmpglt(CPUAlphaState *env, uint64_t a, uint64_t b)
+{
+    float64 fa, fb;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fb = g_to_float64(env, GETPC(), b);
+
+    if (float64_lt(fa, fb, &FP_STATUS)) {
+        return 0x4000000000000000ULL;
+    } else {
+        return 0;
+    }
+}
+
+uint64_t helper_cvtqf(CPUAlphaState *env, uint64_t a)
+{
+    float32 fr = int64_to_float32(a, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_cvtgf(CPUAlphaState *env, uint64_t a)
+{
+    float64 fa;
+    float32 fr;
+
+    fa = g_to_float64(env, GETPC(), a);
+    fr = float64_to_float32(fa, &FP_STATUS);
+    return float32_to_f(fr);
+}
+
+uint64_t helper_cvtgq(CPUAlphaState *env, uint64_t a)
+{
+    float64 fa = g_to_float64(env, GETPC(), a);
+    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+}
+
+uint64_t helper_cvtqg(CPUAlphaState *env, uint64_t a)
+{
+    float64 fr;
+    fr = int64_to_float64(a, &FP_STATUS);
+    return float64_to_g(fr);
+}
author	Peter Maydell <peter.maydell@linaro.org>	2015-05-22 10:06:33 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2015-05-22 10:06:33 +0100
commit	27e1259a69c49ee2dd53385f4ca4ca14b822191d (patch)
tree	3b6d8b7c48c3d1afa7e6ce6ae29bd0c5b01cdf9c /target-alpha
parent	9e549d36e989b14423279fb991b71728a2a4ae7c (diff)
parent	32ad48abd74a997220b841e4e913edeb267aa362 (diff)
download	qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.zip qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.tar.gz qemu-27e1259a69c49ee2dd53385f4ca4ca14b822191d.tar.bz2