aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-05-17 20:02:55 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-05-17 20:02:55 +0100
commit1acbc0fdf238d5c6c51ee3ef502f6a0ce589304a (patch)
tree61deea5c5d34d8d8ab4f50534f6aa26c0371d38c /include
parent367196caa07ac31443bc360145cc10fbef4fdf92 (diff)
parent463b3f0d7fa11054daeb5ca22346f77d566795bf (diff)
downloadqemu-1acbc0fdf238d5c6c51ee3ef502f6a0ce589304a.zip
qemu-1acbc0fdf238d5c6c51ee3ef502f6a0ce589304a.tar.gz
qemu-1acbc0fdf238d5c6c51ee3ef502f6a0ce589304a.tar.bz2
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-fp-20210516' into staging
Reorg FloatParts to use QEMU_GENERIC. Begin replacing the Berkeley float128 routines with FloatParts128. - includes a new implementation of float128_muladd - includes the snan silencing that was missing from float{32,64}_to_float128 and float128_to_float{32,64}. - does not include float128_min/max* (written but not yet reviewed). # gpg: Signature made Sun 16 May 2021 13:27:10 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-fp-20210516: (46 commits) softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc softfloat: Move round_to_int to softfloat-parts.c.inc softfloat: Convert float-to-float conversions with float128 softfloat: Split float_to_float softfloat: Move div_floats to softfloat-parts.c.inc softfloat: Introduce sh[lr]_double primitives softfloat: Tidy mul128By64To192 softfloat: Use add192 in mul128To256 softfloat: Use mulu64 for mul64To128 softfloat: Move muladd_floats to softfloat-parts.c.inc softfloat: Move mul_floats to softfloat-parts.c.inc softfloat: Implement float128_add/sub via parts softfloat: Move addsub_floats to softfloat-parts.c.inc softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h softfloat: Move round_canonical to softfloat-parts.c.inc softfloat: Move sf_canonicalize to softfloat-parts.c.inc softfloat: Move pick_nan_muladd to softfloat-parts.c.inc softfloat: Move pick_nan to softfloat-parts.c.inc softfloat: Move return_nan to softfloat-parts.c.inc softfloat: Convert float128_default_nan to parts ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include')
-rw-r--r--include/fpu/softfloat-macros.h215
-rw-r--r--include/fpu/softfloat.h7
-rw-r--r--include/qemu/host-utils.h291
3 files changed, 380 insertions, 133 deletions
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index a35ec28..ec4e27a 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -83,6 +83,43 @@ this code that are retained.
#define FPU_SOFTFLOAT_MACROS_H
#include "fpu/softfloat-types.h"
+#include "qemu/host-utils.h"
+
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+ return l;
+#else
+ return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+ asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+ return r;
+#else
+ return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
@@ -403,16 +440,12 @@ static inline void
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- add128(
- uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr)
{
- uint64_t z1;
-
- z1 = a1 + b1;
- *z1Ptr = z1;
- *z0Ptr = a0 + b0 + ( z1 < a1 );
-
+ bool c = 0;
+ *z1Ptr = uadd64_carry(a1, b1, &c);
+ *z0Ptr = uadd64_carry(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -423,34 +456,14 @@ static inline void
| `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- add192(
- uint64_t a0,
- uint64_t a1,
- uint64_t a2,
- uint64_t b0,
- uint64_t b1,
- uint64_t b2,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t b0, uint64_t b1, uint64_t b2,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2;
- int8_t carry0, carry1;
-
- z2 = a2 + b2;
- carry1 = ( z2 < a2 );
- z1 = a1 + b1;
- carry0 = ( z1 < a1 );
- z0 = a0 + b0;
- z1 += carry1;
- z0 += ( z1 < carry1 );
- z0 += carry0;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ bool c = 0;
+ *z2Ptr = uadd64_carry(a2, b2, &c);
+ *z1Ptr = uadd64_carry(a1, b1, &c);
+ *z0Ptr = uadd64_carry(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -461,14 +474,12 @@ static inline void
| `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- sub128(
- uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr)
{
-
- *z1Ptr = a1 - b1;
- *z0Ptr = a0 - b0 - ( a1 < b1 );
-
+ bool c = 0;
+ *z1Ptr = usub64_borrow(a1, b1, &c);
+ *z0Ptr = usub64_borrow(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -479,34 +490,14 @@ static inline void
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- sub192(
- uint64_t a0,
- uint64_t a1,
- uint64_t a2,
- uint64_t b0,
- uint64_t b1,
- uint64_t b2,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t b0, uint64_t b1, uint64_t b2,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2;
- int8_t borrow0, borrow1;
-
- z2 = a2 - b2;
- borrow1 = ( a2 < b2 );
- z1 = a1 - b1;
- borrow0 = ( a1 < b1 );
- z0 = a0 - b0;
- z0 -= ( z1 < borrow1 );
- z1 -= borrow1;
- z0 -= borrow0;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ bool c = 0;
+ *z2Ptr = usub64_borrow(a2, b2, &c);
+ *z1Ptr = usub64_borrow(a1, b1, &c);
+ *z0Ptr = usub64_borrow(a0, b0, &c);
}
/*----------------------------------------------------------------------------
@@ -515,27 +506,10 @@ static inline void
| `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void
+mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
- uint32_t aHigh, aLow, bHigh, bLow;
- uint64_t z0, zMiddleA, zMiddleB, z1;
-
- aLow = a;
- aHigh = a>>32;
- bLow = b;
- bHigh = b>>32;
- z1 = ( (uint64_t) aLow ) * bLow;
- zMiddleA = ( (uint64_t) aLow ) * bHigh;
- zMiddleB = ( (uint64_t) aHigh ) * bLow;
- z0 = ( (uint64_t) aHigh ) * bHigh;
- zMiddleA += zMiddleB;
- z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
- zMiddleA <<= 32;
- z1 += zMiddleA;
- z0 += ( z1 < zMiddleA );
- *z1Ptr = z1;
- *z0Ptr = z0;
-
+ mulu64(z1Ptr, z0Ptr, a, b);
}
/*----------------------------------------------------------------------------
@@ -546,24 +520,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t
*----------------------------------------------------------------------------*/
static inline void
- mul128By64To192(
- uint64_t a0,
- uint64_t a1,
- uint64_t b,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr
- )
+mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
+ uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
{
- uint64_t z0, z1, z2, more1;
-
- mul64To128( a1, b, &z1, &z2 );
- mul64To128( a0, b, &z0, &more1 );
- add128( z0, more1, 0, z1, &z0, &z1 );
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
+ uint64_t z0, z1, m1;
+ mul64To128(a1, b, &m1, z2Ptr);
+ mul64To128(a0, b, &z0, &z1);
+ add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
}
/*----------------------------------------------------------------------------
@@ -573,34 +537,21 @@ static inline void
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
*----------------------------------------------------------------------------*/
-static inline void
- mul128To256(
- uint64_t a0,
- uint64_t a1,
- uint64_t b0,
- uint64_t b1,
- uint64_t *z0Ptr,
- uint64_t *z1Ptr,
- uint64_t *z2Ptr,
- uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+ uint64_t b0, uint64_t b1,
+ uint64_t *z0Ptr, uint64_t *z1Ptr,
+ uint64_t *z2Ptr, uint64_t *z3Ptr)
{
- uint64_t z0, z1, z2, z3;
- uint64_t more1, more2;
-
- mul64To128( a1, b1, &z2, &z3 );
- mul64To128( a1, b0, &z1, &more2 );
- add128( z1, more2, 0, z2, &z1, &z2 );
- mul64To128( a0, b0, &z0, &more1 );
- add128( z0, more1, 0, z1, &z0, &z1 );
- mul64To128( a0, b1, &more1, &more2 );
- add128( more1, more2, 0, z2, &more1, &z2 );
- add128( z0, z1, 0, more1, &z0, &z1 );
- *z3Ptr = z3;
- *z2Ptr = z2;
- *z1Ptr = z1;
- *z0Ptr = z0;
+ uint64_t z0, z1, z2;
+ uint64_t m0, m1, m2, n1, n2;
+
+ mul64To128(a1, b0, &m1, &m2);
+ mul64To128(a0, b1, &n1, &n2);
+ mul64To128(a1, b1, &z2, z3Ptr);
+ mul64To128(a0, b0, &z0, &z1);
+ add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
+ add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
}
/*----------------------------------------------------------------------------
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 78ad5ca..53f2c2e 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -100,7 +100,10 @@ typedef enum {
| Routine to raise any or all of the software IEC/IEEE floating-point
| exception flags.
*----------------------------------------------------------------------------*/
-void float_raise(uint8_t flags, float_status *status);
+static inline void float_raise(uint8_t flags, float_status *status)
+{
+ status->float_exception_flags |= flags;
+}
/*----------------------------------------------------------------------------
| If `a' is denormal and we are in flush-to-zero mode then set the
@@ -1194,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
float128 float128_add(float128, float128, float_status *status);
float128 float128_sub(float128, float128, float_status *status);
float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+ float_status *status);
float128 float128_div(float128, float128, float_status *status);
float128 float128_rem(float128, float128, float_status *status);
float128 float128_sqrt(float128, float_status *status);
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index cdca299..711b221 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
#ifndef HOST_UTILS_H
#define HOST_UTILS_H
+#include "qemu/compiler.h"
#include "qemu/bswap.h"
#ifdef CONFIG_INT128
@@ -272,6 +273,9 @@ static inline int ctpop64(uint64_t val)
*/
static inline uint8_t revbit8(uint8_t x)
{
+#if __has_builtin(__builtin_bitreverse8)
+ return __builtin_bitreverse8(x);
+#else
/* Assign the correct nibble position. */
x = ((x & 0xf0) >> 4)
| ((x & 0x0f) << 4);
@@ -281,6 +285,7 @@ static inline uint8_t revbit8(uint8_t x)
| ((x & 0x22) << 1)
| ((x & 0x11) << 3);
return x;
+#endif
}
/**
@@ -289,6 +294,9 @@ static inline uint8_t revbit8(uint8_t x)
*/
static inline uint16_t revbit16(uint16_t x)
{
+#if __has_builtin(__builtin_bitreverse16)
+ return __builtin_bitreverse16(x);
+#else
/* Assign the correct byte position. */
x = bswap16(x);
/* Assign the correct nibble position. */
@@ -300,6 +308,7 @@ static inline uint16_t revbit16(uint16_t x)
| ((x & 0x2222) << 1)
| ((x & 0x1111) << 3);
return x;
+#endif
}
/**
@@ -308,6 +317,9 @@ static inline uint16_t revbit16(uint16_t x)
*/
static inline uint32_t revbit32(uint32_t x)
{
+#if __has_builtin(__builtin_bitreverse32)
+ return __builtin_bitreverse32(x);
+#else
/* Assign the correct byte position. */
x = bswap32(x);
/* Assign the correct nibble position. */
@@ -319,6 +331,7 @@ static inline uint32_t revbit32(uint32_t x)
| ((x & 0x22222222u) << 1)
| ((x & 0x11111111u) << 3);
return x;
+#endif
}
/**
@@ -327,6 +340,9 @@ static inline uint32_t revbit32(uint32_t x)
*/
static inline uint64_t revbit64(uint64_t x)
{
+#if __has_builtin(__builtin_bitreverse64)
+ return __builtin_bitreverse64(x);
+#else
/* Assign the correct byte position. */
x = bswap64(x);
/* Assign the correct nibble position. */
@@ -338,6 +354,281 @@ static inline uint64_t revbit64(uint64_t x)
| ((x & 0x2222222222222222ull) << 1)
| ((x & 0x1111111111111111ull) << 3);
return x;
+#endif
+}
+
+/**
+ * sadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * sadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * uadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return *ret < x;
+#endif
+}
+
+/**
+ * uadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+ return __builtin_add_overflow(x, y, ret);
+#else
+ *ret = x + y;
+ return *ret < x;
+#endif
+}
+
+/**
+ * ssub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for difference
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * ssub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * usub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return x < y;
+#endif
+}
+
+/**
+ * usub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+ return __builtin_sub_overflow(x, y, ret);
+#else
+ *ret = x - y;
+ return x < y;
+#endif
+}
+
+/**
+ * smul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ int64_t z = (int64_t)x * y;
+ *ret = z;
+ return *ret != z;
+#endif
+}
+
+/**
+ * smul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t hi, lo;
+ muls64(&lo, &hi, x, y);
+ *ret = lo;
+ return hi != ((int64_t)lo >> 63);
+#endif
+}
+
+/**
+ * umul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t z = (uint64_t)x * y;
+ *ret = z;
+ return z > UINT32_MAX;
+#endif
+}
+
+/**
+ * umul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+ return __builtin_mul_overflow(x, y, ret);
+#else
+ uint64_t hi;
+ mulu64(ret, &hi, x, y);
+ return hi != 0;
+#endif
+}
+
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+ unsigned long long c = *pcarry;
+ x = __builtin_addcll(x, y, c, &c);
+ *pcarry = c & 1;
+ return x;
+#else
+ bool c = *pcarry;
+ /* This is clang's internal expansion of __builtin_addc. */
+ c = uadd64_overflow(x, c, &x);
+ c |= uadd64_overflow(x, y, &x);
+ *pcarry = c;
+ return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+ unsigned long long b = *pborrow;
+ x = __builtin_subcll(x, y, b, &b);
+ *pborrow = b & 1;
+ return x;
+#else
+ bool b = *pborrow;
+ b = usub64_overflow(x, b, &x);
+ b |= usub64_overflow(x, y, &x);
+ *pborrow = b;
+ return x;
+#endif
}
/* Host type specific sizes of these routines. */