aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg/sve_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg/sve_helper.c')
-rw-r--r--target/arm/tcg/sve_helper.c520
1 files changed, 375 insertions, 145 deletions
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index dd49e67..a2c363a 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -20,15 +20,22 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "internals.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
#include "exec/helper-proto.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
#include "tcg/tcg-gvec-desc.h"
#include "fpu/softfloat.h"
#include "tcg/tcg.h"
#include "vec_internal.h"
#include "sve_ldst_internal.h"
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/helper-retaddr.h"
+#include "accel/tcg/cpu-ops.h"
+#include "accel/tcg/probe.h"
+#ifdef CONFIG_USER_ONLY
+#include "user/page-protection.h"
+#endif
/* Return a value for NZCV as per the ARM PredTest pseudofunction.
@@ -730,7 +737,7 @@ DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN)
#define DO_ZPZZ_PAIR_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc); \
for (i = 0; i < opr_sz; ) { \
@@ -876,12 +883,28 @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
+#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N))
+#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N))
+#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N))
+
+DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H)
+DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S)
+DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D)
+
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
+#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N))
+#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N))
+#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N))
+
+DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H)
+DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S)
+DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D)
+
#define DO_NOT(N) (~N)
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
@@ -2536,6 +2559,7 @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint16_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint16_t nn = n[i];
@@ -2543,13 +2567,17 @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float16_one;
}
- d[i] = nn ^ (mm & 2) << 14;
+ if (mm & 2) {
+ nn = float16_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint32_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint32_t nn = n[i];
@@ -2557,13 +2585,17 @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float32_one;
}
- d[i] = nn ^ (mm & 2) << 30;
+ if (mm & 2) {
+ nn = float32_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
uint64_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz; i += 1) {
uint64_t nn = n[i];
@@ -2571,7 +2603,10 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float64_one;
}
- d[i] = nn ^ (mm & 2) << 62;
+ if (mm & 2) {
+ nn = float64_maybe_ah_chs(nn, fpcr_ah);
+ }
+ d[i] = nn;
}
}
@@ -4187,10 +4222,10 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
uintptr_t half = n / 2; \
TYPE lo = NAME##_reduce(data, status, half); \
TYPE hi = NAME##_reduce(data + half, status, half); \
- return TYPE##_##FUNC(lo, hi, status); \
+ return FUNC(lo, hi, status); \
} \
} \
-uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
+uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
{ \
uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \
TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
@@ -4205,34 +4240,45 @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
for (; i < maxsz; i += sizeof(TYPE)) { \
*(TYPE *)((void *)data + i) = IDENT; \
} \
- return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \
+ return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
}
-DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
-DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
-DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
+DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero)
+DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero)
+DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero)
/* Identity is floatN_default_nan, without the function call. */
-DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
-DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
-DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00)
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000)
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00)
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000)
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
-DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
-DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
-DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity)
+DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity)
+DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity)
-DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
-DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
-DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity))
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity))
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity))
-DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
-DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
-DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
+DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity)
+DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity)
+DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity)
+
+DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh,
+ float16_chs(float16_infinity))
+DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs,
+ float32_chs(float32_infinity))
+DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd,
+ float64_chs(float64_infinity))
#undef DO_REDUCE
uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc);
float16 result = nn;
@@ -4252,7 +4298,7 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
}
uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc);
float32 result = nn;
@@ -4272,7 +4318,7 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
}
uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
uint64_t *m = vm;
@@ -4292,7 +4338,7 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
*/
#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4333,6 +4379,14 @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
+DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
+DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
+DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
+
+DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
+DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
+DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
+
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
@@ -4356,9 +4410,31 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
return float64_abs(float64_sub(a, b, s));
}
+/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
+static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat)
+{
+ float16 r = float16_sub(op1, op2, stat);
+ return float16_is_any_nan(r) ? r : float16_abs(r);
+}
+
+static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat)
+{
+ float32 r = float32_sub(op1, op2, stat);
+ return float32_is_any_nan(r) ? r : float32_abs(r);
+}
+
+static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat)
+{
+ float64 r = float64_sub(op1, op2, stat);
+ return float64_is_any_nan(r) ? r : float64_abs(r);
+}
+
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
+DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h)
+DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s)
+DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d)
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
{
@@ -4381,7 +4457,7 @@ DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd)
*/
#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4445,11 +4521,20 @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
+DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh)
+DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs)
+DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd)
+
+DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh)
+DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins)
+DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind)
+
/* Fully general two-operand expander, controlled by a predicate,
* With the extra float_status parameter.
*/
#define DO_ZPZ_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -4654,7 +4739,7 @@ static int16_t do_float16_logb_as_int(float16 a, float_status *s)
return -15 - clz32(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0x1f)) {
if (frac == 0) {
@@ -4682,7 +4767,7 @@ static int32_t do_float32_logb_as_int(float32 a, float_status *s)
return -127 - clz32(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0xff)) {
if (frac == 0) {
@@ -4710,7 +4795,7 @@ static int64_t do_float64_logb_as_int(float64 a, float_status *s)
return -1023 - clz64(frac);
}
/* flush to zero */
- float_raise(float_flag_input_denormal, s);
+ float_raise(float_flag_input_denormal_flushed, s);
}
} else if (unlikely(exp == 0x7ff)) {
if (frac == 0) {
@@ -4733,7 +4818,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4748,7 +4833,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
- r = float16_muladd(e1, e2, e3, 0, status);
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4756,32 +4841,53 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4796,7 +4902,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
- r = float32_muladd(e1, e2, e3, 0, status);
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4804,32 +4910,53 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4844,7 +4971,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
e3 = *(uint64_t *)(va + i) ^ neg3;
- r = float64_muladd(e1, e2, e3, 0, status);
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4852,27 +4979,48 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
}
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
@@ -4882,7 +5030,7 @@ void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
*/
#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
uint64_t *d = vd, *g = vg; \
@@ -4944,7 +5092,7 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
*/
#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \
void HELPER(NAME)(void *vd, void *vn, void *vg, \
- void *status, uint32_t desc) \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
uint64_t *d = vd, *g = vg; \
@@ -4982,27 +5130,37 @@ DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE)
/* FP Trig Multiply-Add. */
-void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float16 coeff[16] = {
0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float16 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float16 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float16_is_neg(mm)) {
- mm = float16_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float16_abs(mm);
+ }
xx += 8;
}
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
}
}
-void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float32 coeff[16] = {
0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
@@ -5011,20 +5169,29 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float32 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float32 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float32_is_neg(mm)) {
- mm = float32_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float32_abs(mm);
+ }
xx += 8;
}
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
}
}
-void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
+ float_status *s, uint32_t desc)
{
static const float64 coeff[16] = {
0x3ff0000000000000ull, 0xbfc5555555555543ull,
@@ -5037,16 +5204,24 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
};
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
- intptr_t x = simd_data(desc);
+ intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float64 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float64 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float64_is_neg(mm)) {
- mm = float64_abs(mm);
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
+ mm = float64_abs(mm);
+ }
xx += 8;
}
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs);
+ d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
}
}
@@ -5055,12 +5230,12 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
*/
void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
- float16 neg_real = float16_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5072,27 +5247,33 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float16);
e0 = *(float16 *)(vn + H1_2(i));
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float16 *)(vm + H1_2(j));
e2 = *(float16 *)(vn + H1_2(j));
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float16 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, vs);
+ *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, vs);
+ *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
}
void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
- float32 neg_real = float32_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5104,27 +5285,33 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float32);
e0 = *(float32 *)(vn + H1_2(i));
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float32 *)(vm + H1_2(j));
e2 = *(float32 *)(vn + H1_2(j));
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float32 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, vs);
+ *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, vs);
+ *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
}
void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
+ float_status *s, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
- float64 neg_real = float64_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5136,15 +5323,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
i -= 2 * sizeof(float64);
e0 = *(float64 *)(vn + H1_2(i));
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float64 *)(vm + H1_2(j));
e2 = *(float64 *)(vn + H1_2(j));
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float64 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
- *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, vs);
+ *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
}
if (likely((pg >> (j & 63)) & 1)) {
- *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, vs);
+ *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, s);
}
} while (i & 63);
} while (i != 0);
@@ -5155,16 +5348,21 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
*/
void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float16 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float16 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5181,18 +5379,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
mi = *(float16 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float16 *)(va + H1_2(i));
- d = float16_muladd(e2, e1, d, 0, status);
+ d = float16_muladd(e2, e1, d, negf_real, status);
*(float16 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float16 *)(va + H1_2(j));
- d = float16_muladd(e4, e3, d, 0, status);
+ d = float16_muladd(e4, e3, d, negf_imag, status);
*(float16 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5200,16 +5398,21 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float32 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float32 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5226,18 +5429,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
mi = *(float32 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float32 *)(va + H1_2(i));
- d = float32_muladd(e2, e1, d, 0, status);
+ d = float32_muladd(e2, e1, d, negf_real, status);
*(float32 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float32 *)(va + H1_2(j));
- d = float32_muladd(e4, e3, d, 0, status);
+ d = float32_muladd(e4, e3, d, negf_imag, status);
*(float32 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5245,16 +5448,21 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
- void *vg, void *status, uint32_t desc)
+ void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float64 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float64 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5271,18 +5479,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
mi = *(float64 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float64 *)(va + H1_2(i));
- d = float64_muladd(e2, e1, d, 0, status);
+ d = float64_muladd(e2, e1, d, negf_real, status);
*(float64 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float64 *)(va + H1_2(j));
- d = float64_muladd(e4, e3, d, 0, status);
+ d = float64_muladd(e4, e3, d, negf_imag, status);
*(float64 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5738,6 +5946,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5752,6 +5962,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -5771,6 +5983,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5784,6 +5998,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -5934,15 +6150,11 @@ DO_LDN_2(4, dd, MO_64)
/*
* Load contiguous data, first-fault and no-fault.
*
- * For user-only, one could argue that we should hold the mmap_lock during
- * the operation so that there is no race between page_check_range and the
- * load operation. However, unmapping pages out from under a running thread
- * is extraordinarily unlikely. This theoretical race condition also affects
- * linux-user/ in its get_user/put_user macros.
- *
- * TODO: Construct some helpers, written in assembly, that interact with
- * host_signal_handler to produce memory ops which can properly report errors
- * without racing.
+ * For user-only, we control the race between page_check_range and
+ * another thread's munmap by using set/clear_helper_retaddr. Any
+ * SEGV that occurs between those markers is assumed to be because
+ * the guest page vanished. Keep that block as small as possible
+ * so that unrelated QEMU bugs are not blamed on the guest.
*/
/* Fault on byte I. All bits in FFR from I are cleared. The vector
@@ -6093,6 +6305,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
do {
@@ -6101,9 +6315,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
(cpu_watchpoint_address_matches
(env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
+ clear_helper_retaddr();
goto do_fault;
}
if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ clear_helper_retaddr();
goto do_fault;
}
host_fn(vd, reg_off, host + mem_off);
@@ -6113,6 +6329,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
} while (reg_off <= reg_last);
+ clear_helper_retaddr();
+
/*
* MemSingleNF is allowed to fail for any reason. We have special
* code above to handle the first element crossing a page boundary.
@@ -6307,9 +6525,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
flags = info.page[0].flags | info.page[1].flags;
if (unlikely(flags != 0)) {
-#ifdef CONFIG_USER_ONLY
- g_assert_not_reached();
-#else
/*
* At least one page includes MMIO.
* Any bus operation can fail with cpu_transaction_failed,
@@ -6340,7 +6555,6 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off & 63);
} while (reg_off <= reg_last);
return;
-#endif
}
mem_off = info.mem_off_first[0];
@@ -6348,6 +6562,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6362,6 +6578,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -6381,6 +6599,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6394,6 +6614,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -6560,7 +6782,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
if (unlikely(info.flags & TLB_MMIO)) {
tlb_fn(env, &scratch, reg_off, addr, retaddr);
} else {
+ set_helper_retaddr(retaddr);
host_fn(&scratch, reg_off, info.host);
+ clear_helper_retaddr();
}
} else {
/* Element crosses the page boundary. */
@@ -6782,7 +7006,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
goto fault;
}
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, info.host);
+ clear_helper_retaddr();
}
reg_off += esize;
} while (reg_off & 63);
@@ -6986,7 +7212,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
do {
void *h = host[i];
if (likely(h != NULL)) {
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, h);
+ clear_helper_retaddr();
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
@@ -7369,7 +7597,7 @@ void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc)
}
void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
@@ -7407,7 +7635,7 @@ void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
}
void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
- void *status, uint32_t desc)
+ float_status *status, uint32_t desc)
{
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
@@ -7443,7 +7671,8 @@ void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
}
#define DO_FCVTNT(NAME, TYPEW, TYPEN, HW, HN, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \
@@ -7464,7 +7693,8 @@ DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32)
#define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ float_status *status, uint32_t desc) \
{ \
intptr_t i = simd_oprsz(desc); \
uint64_t *g = vg; \