softfloat: Support halving the result of muladd operation

The ARMv8 instruction set includes a fused floating point reciprocal square root step instruction which demands an "(x * y + z) / 2" fused operation. Support this by adding a flag to the softfloat muladd operations which requests that the result is halved before rounding. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net>
author: Peter Maydell <peter.maydell@linaro.org> 2014-02-20 10:35:50 +0000
committer: Peter Maydell <peter.maydell@linaro.org> 2014-02-20 10:35:50 +0000
commit: 67d43538aee10b6cfe8f3606c69187a3e142a2ba (patch)
tree: 4eb0637464a3011cc1ecc91b4e4ae9e6867d8445 /fpu
parent: bc242f9bb6324a50e7572c0997904b66b630f73a (diff)
download: qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.zip
qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.tar.gz
qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.tar.bz2
1 files changed, 38 insertions, 0 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index e0ea599..fc0b179 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2372,6 +2372,17 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+            }
+            /* Subtract one to halve, and one again because roundAndPackFloat32
+             * wants one less than the true exponent.
+             */
+            cExp -= 2;
+            cSig = (cSig | 0x00800000) << 7;
+            return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR);
+        }
         return packFloat32(cSign ^ signflip, cExp, cSig);
     }
 
@@ -2408,6 +2419,9 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             /* Throw out the special case of c being an exact zero now */
             shift64RightJamming(pSig64, 32, &pSig64);
             pSig = pSig64;
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat32(zSign, pExp - 1,
                                        pSig STATUS_VAR);
         }
@@ -2472,6 +2486,10 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
         zSig64 <<= shiftcount;
         zExp -= shiftcount;
     }
+    if (flags & float_muladd_halve_result) {
+        zExp--;
+    }
+
     shift64RightJamming(zSig64, 32, &zSig64);
     return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
 }
@@ -4088,6 +4106,17 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+            }
+            /* Subtract one to halve, and one again because roundAndPackFloat64
+             * wants one less than the true exponent.
+             */
+            cExp -= 2;
+            cSig = (cSig | 0x0010000000000000ULL) << 10;
+            return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR);
+        }
         return packFloat64(cSign ^ signflip, cExp, cSig);
     }
 
@@ -4123,6 +4152,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
         if (!cSig) {
             /* Throw out the special case of c being an exact zero now */
             shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat64(zSign, pExp - 1,
                                        pSig1 STATUS_VAR);
         }
@@ -4159,6 +4191,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             zExp--;
         }
         shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
     } else {
         /* Subtraction */
@@ -4209,6 +4244,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
                 zExp -= (shiftcount + 64);
             }
         }
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
     }
 }
author	Peter Maydell <peter.maydell@linaro.org>	2014-02-20 10:35:50 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	2014-02-20 10:35:50 +0000
commit	67d43538aee10b6cfe8f3606c69187a3e142a2ba (patch)
tree	4eb0637464a3011cc1ecc91b4e4ae9e6867d8445 /fpu
parent	bc242f9bb6324a50e7572c0997904b66b630f73a (diff)
download	qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.zip qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.tar.gz qemu-67d43538aee10b6cfe8f3606c69187a3e142a2ba.tar.bz2