aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg/vec_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg/vec_internal.h')
-rw-r--r--target/arm/tcg/vec_internal.h150
1 files changed, 150 insertions, 0 deletions
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index 6b93b5a..cf41b03 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -22,6 +22,8 @@
#include "fpu/softfloat.h"
+typedef struct CPUArchState CPUARMState;
+
/*
* Note that vector data is stored in host-endian 64-bit chunks,
* so addressing units smaller than that needs a host-endian fixup.
@@ -221,6 +223,34 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
+#define do_ssat_b(val) MIN(MAX(val, INT8_MIN), INT8_MAX)
+#define do_ssat_h(val) MIN(MAX(val, INT16_MIN), INT16_MAX)
+#define do_ssat_s(val) MIN(MAX(val, INT32_MIN), INT32_MAX)
+#define do_usat_b(val) MIN(MAX(val, 0), UINT8_MAX)
+#define do_usat_h(val) MIN(MAX(val, 0), UINT16_MAX)
+#define do_usat_s(val) MIN(MAX(val, 0), UINT32_MAX)
+
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else if (sh == 64) {
+ return x >> 63;
+ } else {
+ return 0;
+ }
+}
+
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else {
+ /* Rounding the sign bit always produces 0. */
+ return 0;
+ }
+}
+
/**
* bfdotadd:
* @sum: addend
@@ -270,6 +300,11 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
/*
* Negate as for FPCR.AH=1 -- do not negate NaNs.
*/
+static inline float16 bfloat16_ah_chs(float16 a)
+{
+ return bfloat16_is_any_nan(a) ? a : bfloat16_chs(a);
+}
+
static inline float16 float16_ah_chs(float16 a)
{
return float16_is_any_nan(a) ? a : float16_chs(a);
@@ -300,4 +335,119 @@ static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
}
+/* Not actually called directly as a helper, but uses similar machinery. */
+bfloat16 helper_sme2_ah_fmax_b16(bfloat16 a, bfloat16 b, float_status *fpst);
+bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst);
+
+float32 sve_f16_to_f32(float16 f, float_status *fpst);
+float16 sve_f32_to_f16(float32 f, float_status *fpst);
+
+/*
+ * Decode helper functions for predicate as counter.
+ */
+
+typedef struct {
+ unsigned count;
+ unsigned lg2_stride;
+ bool invert;
+} DecodeCounter;
+
+static inline DecodeCounter
+decode_counter(unsigned png, unsigned vl, unsigned v_esz)
+{
+ DecodeCounter ret = { };
+
+ /* C.f. Arm pseudocode CounterToPredicate. */
+ if (likely(png & 0xf)) {
+ unsigned p_esz = ctz32(png);
+
+ /*
+ * maxbit = log2(pl(bits) * 4)
+ * = log2(vl(bytes) * 4)
+ * = log2(vl) + 2
+ * maxbit_mask = ones<maxbit:0>
+ * = (1 << (maxbit + 1)) - 1
+ * = (1 << (log2(vl) + 2 + 1)) - 1
+ * = (1 << (log2(vl) + 3)) - 1
+ * = (pow2ceil(vl) << 3) - 1
+ */
+ ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1);
+ ret.count >>= p_esz + 1;
+
+ ret.invert = (png >> 15) & 1;
+
+ /*
+ * The Arm pseudocode for CounterToPredicate expands the count to
+ * a set of bits, and then the operation proceeds as for the original
+ * interpretation of predicates as a set of bits.
+ *
+ * We can avoid the expansion by adjusting the count and supplying
+ * an element stride.
+ */
+ if (unlikely(p_esz != v_esz)) {
+ if (p_esz < v_esz) {
+ /*
+ * For predicate esz < vector esz, the expanded predicate
+ * will have more bits set than will be consumed.
+ * Adjust the count down, rounding up.
+ * Consider p_esz = MO_8, v_esz = MO_64, count 14:
+ * The expanded predicate would be
+ * 0011 1111 1111 1111
+ * The significant bits are
+ * ...1 ...1 ...1 ...1
+ */
+ unsigned shift = v_esz - p_esz;
+ unsigned trunc = ret.count >> shift;
+ ret.count = trunc + (ret.count != (trunc << shift));
+ } else {
+ /*
+ * For predicate esz > vector esz, the expanded predicate
+ * will have bits set only at power-of-two multiples of
+ * the vector esz. Bits at other multiples will all be
+ * false. Adjust the count up, and supply the caller
+ * with a stride of elements to skip.
+ */
+ unsigned shift = p_esz - v_esz;
+ ret.count <<= shift;
+ ret.lg2_stride = shift;
+ }
+ }
+ }
+ return ret;
+}
+
+/* Extract @len bits from an array of uint64_t at offset @pos bits. */
+static inline uint64_t extractn(uint64_t *p, unsigned pos, unsigned len)
+{
+ uint64_t x;
+
+ p += pos / 64;
+ pos = pos % 64;
+
+ x = p[0];
+ if (pos + len > 64) {
+ x = (x >> pos) | (p[1] << (-pos & 63));
+ pos = 0;
+ }
+ return extract64(x, pos, len);
+}
+
+/* Deposit @len bits into an array of uint64_t at offset @pos bits. */
+static inline void depositn(uint64_t *p, unsigned pos,
+ unsigned len, uint64_t val)
+{
+ p += pos / 64;
+ pos = pos % 64;
+
+ if (pos + len <= 64) {
+ p[0] = deposit64(p[0], pos, len, val);
+ } else {
+ unsigned len0 = 64 - pos;
+ unsigned len1 = len - len0;
+
+ p[0] = deposit64(p[0], pos, len0, val);
+ p[1] = deposit64(p[1], 0, len1, val >> len0);
+ }
+}
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */