diff options
Diffstat (limited to 'target/arm/tcg/helper-a64.c')
-rw-r--r-- | target/arm/tcg/helper-a64.c | 463 |
1 files changed, 184 insertions, 279 deletions
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 0ea8668..4f618ae 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -28,12 +28,20 @@ #include "qemu/bitops.h" #include "internals.h" #include "qemu/crc32c.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "exec/cpu-common.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/helper-retaddr.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "fpu/softfloat.h" -#include <zlib.h> /* For crc32 */ +#include <zlib.h> /* for crc32 */ +#ifdef CONFIG_USER_ONLY +#include "user/page-protection.h" +#endif +#include "vec_internal.h" /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -130,40 +138,38 @@ static inline uint32_t float_rel_to_flags(int res) return flags; } -uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) +uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, float_status *fp_status) { return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) +uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, float_status *fp_status) { return float_rel_to_flags(float16_compare(x, y, fp_status)); } -uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) +uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, float_status *fp_status) { return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) +uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, float_status *fp_status) { return float_rel_to_flags(float32_compare(x, y, fp_status)); } -uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) +uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, float_status *fp_status) { return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); } -uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) +uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, float_status *fp_status) { return float_rel_to_flags(float64_compare(x, y, fp_status)); } -float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) +float32 HELPER(vfp_mulxs)(float32 a, float32 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float32_squash_input_denormal(a, fpst); b = float32_squash_input_denormal(b, fpst); @@ -176,10 +182,8 @@ float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) return float32_mul(a, b, fpst); } -float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) +float64 HELPER(vfp_mulxd)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float64_squash_input_denormal(a, fpst); b = float64_squash_input_denormal(b, fpst); @@ -193,184 +197,71 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) } /* 64bit/double versions of the neon float compare functions */ -uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_eq_quiet(a, b, fpst); } -uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_le(b, a, fpst); } -uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) +uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) { - float_status *fpst = fpstp; return -float64_lt(b, a, fpst); } -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 +/* + * Reciprocal step and sqrt step. Note that unlike the A32/T32 * versions, these do a fully fused multiply-add or * multiply-add-and-halve. + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. */ - -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_two; - } - return float16_muladd(a, b, float16_two, 0, fpst); -} - -float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_two; - } - return float32_muladd(a, b, float32_two, 0, fpst); -} - -float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_two; - } - return float64_muladd(a, b, float64_two, 0, fpst); -} - -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_one_point_five; - } - return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); -} - -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_one_point_five; - } - return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); -} - -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_one_point_five; - } - return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); -} - -/* Pairwise long add: add pairs of adjacent elements into - * double-width elements in the result (eg _s8 is an 8x8->16 op) - */ -uint64_t HELPER(neon_addlp_s8)(uint64_t a) -{ - uint64_t nsignmask = 0x0080008000800080ULL; - uint64_t wsignmask = 0x8000800080008000ULL; - uint64_t elementmask = 0x00ff00ff00ff00ffULL; - uint64_t tmp1, tmp2; - uint64_t res, signres; - - /* Extract odd elements, sign extend each to a 16 bit field */ - tmp1 = a & elementmask; - tmp1 ^= nsignmask; - tmp1 |= wsignmask; - tmp1 = (tmp1 - nsignmask) ^ wsignmask; - /* Ditto for the even elements */ - tmp2 = (a >> 8) & elementmask; - tmp2 ^= nsignmask; - tmp2 |= wsignmask; - tmp2 = (tmp2 - nsignmask) ^ wsignmask; - - /* calculate the result by summing bits 0..14, 16..22, etc, - * and then adjusting the sign bits 15, 23, etc manually. - * This ensures the addition can't overflow the 16 bit field. - */ - signres = (tmp1 ^ tmp2) & wsignmask; - res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); - res ^= signres; - - return res; -} - -uint64_t HELPER(neon_addlp_u8)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x00ff00ff00ff00ffULL; - tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; - return tmp; -} - -uint64_t HELPER(neon_addlp_s16)(uint64_t a) -{ - int32_t reslo, reshi; - - reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); - reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); - - return (uint32_t)reslo | (((uint64_t)reshi) << 32); -} - -uint64_t HELPER(neon_addlp_u16)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x0000ffff0000ffffULL; - tmp += (a >> 16) & 0x0000ffff0000ffffULL; - return tmp; -} +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _two; \ + } \ + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ + } + +DO_RECPS(recpsf_f16, uint32_t, float16, chs) +DO_RECPS(recpsf_f32, float32, float32, chs) +DO_RECPS(recpsf_f64, float64, float64, chs) +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) + +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _one_point_five; \ + } \ + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ + -1, 0, fpst); \ + } \ + +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ -uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) +uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) { - float_status *fpst = fpstp; uint16_t val16, sbit; int16_t exp; @@ -401,9 +292,8 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) } } -float32 HELPER(frecpx_f32)(float32 a, void *fpstp) +float32 HELPER(frecpx_f32)(float32 a, float_status *fpst) { - float_status *fpst = fpstp; uint32_t val32, sbit; int32_t exp; @@ -434,9 +324,8 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp) } } -float64 HELPER(frecpx_f64)(float64 a, void *fpstp) +float64 HELPER(frecpx_f64)(float64 a, float_status *fpst) { - float_status *fpst = fpstp; uint64_t val64, sbit; int64_t exp; @@ -467,28 +356,53 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp) } } -float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) +float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst) { - /* Von Neumann rounding is implemented by using round-to-zero - * and then setting the LSB of the result if Inexact was raised. - */ float32 r; - float_status *fpst = &env->vfp.fp_status; - float_status tstat = *fpst; - int exflags; - - set_float_rounding_mode(float_round_to_zero, &tstat); - set_float_exception_flags(0, &tstat); - r = float64_to_float32(a, &tstat); - exflags = get_float_exception_flags(&tstat); - if (exflags & float_flag_inexact) { - r = make_float32(float32_val(r) | 1); - } - exflags |= get_float_exception_flags(fpst); - set_float_exception_flags(exflags, fpst); + int old = get_float_rounding_mode(fpst); + + set_float_rounding_mode(float_round_to_odd, fpst); + r = float64_to_float32(a, fpst); + set_float_rounding_mode(old, fpst); return r; } +/* + * AH=1 min/max have some odd special cases: + * comparing two zeroes (regardless of sign), (NaN, anything), + * or (anything, NaN) should return the second argument (possibly + * squashed to zero). + * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16. + */ +#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + bool save; \ + CTYPE r; \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \ + return b; \ + } \ + if (FLOATTYPE ## _is_any_nan(a) || \ + FLOATTYPE ## _is_any_nan(b)) { \ + float_raise(float_flag_invalid, fpst); \ + return b; \ + } \ + save = get_flush_to_zero(fpst); \ + set_flush_to_zero(false, fpst); \ + r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \ + set_flush_to_zero(save, fpst); \ + return r; \ + } + +AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min) +AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min) +AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) +AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) +AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) +AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) + /* 64-bit versions of the CRC helpers. Note that although the operation * (and the prototypes of crc32c() and crc32() mean that only the bottom * 32 bits of the accumulator and result are used, we pass and return @@ -524,27 +438,17 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) #define ADVSIMD_HALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ +uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, float_status *fpst) \ { \ - float_status *fpst = fpstp; \ return float16_ ## name(a, b, fpst); \ } -ADVSIMD_HALFOP(add) -ADVSIMD_HALFOP(sub) -ADVSIMD_HALFOP(mul) -ADVSIMD_HALFOP(div) -ADVSIMD_HALFOP(min) -ADVSIMD_HALFOP(max) -ADVSIMD_HALFOP(minnum) -ADVSIMD_HALFOP(maxnum) - #define ADVSIMD_TWOHALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ +uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, \ + float_status *fpst) \ { \ float16 a1, a2, b1, b2; \ uint32_t r1, r2; \ - float_status *fpst = fpstp; \ a1 = extract32(two_a, 0, 16); \ a2 = extract32(two_a, 16, 16); \ b1 = extract32(two_b, 0, 16); \ @@ -564,10 +468,8 @@ ADVSIMD_TWOHALFOP(minnum) ADVSIMD_TWOHALFOP(maxnum) /* Data processing - scalar floating-point and advanced SIMD */ -static float16 float16_mulx(float16 a, float16 b, void *fpstp) +static float16 float16_mulx(float16 a, float16 b, float_status *fpst) { - float_status *fpst = fpstp; - a = float16_squash_input_denormal(a, fpst); b = float16_squash_input_denormal(b, fpst); @@ -585,16 +487,14 @@ ADVSIMD_TWOHALFOP(mulx) /* fused multiply-accumulate */ uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, - void *fpstp) + float_status *fpst) { - float_status *fpst = fpstp; return float16_muladd(a, b, c, 0, fpst); } uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, - uint32_t two_c, void *fpstp) + uint32_t two_c, float_status *fpst) { - float_status *fpst = fpstp; float16 a1, a2, b1, b2, c1, c2; uint32_t r1, r2; a1 = extract32(two_a, 0, 16); @@ -616,31 +516,27 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 -uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare_quiet(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_equal); } -uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_greater || compare == float_relation_equal); } -uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; int compare = float16_compare(a, b, fpst); return ADVSIMD_CMPRES(compare == float_relation_greater); } -uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float16 f0 = float16_abs(a); float16 f1 = float16_abs(b); int compare = float16_compare(f0, f1, fpst); @@ -648,9 +544,8 @@ uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) compare == float_relation_equal); } -uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) +uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, float_status *fpst) { - float_status *fpst = fpstp; float16 f0 = float16_abs(a); float16 f1 = float16_abs(b); int compare = float16_compare(f0, f1, fpst); @@ -658,12 +553,12 @@ uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) } /* round to integral */ -uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) +uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, float_status *fp_status) { return float16_round_to_int(x, fp_status); } -uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) +uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; float16 ret; @@ -679,38 +574,6 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) return ret; } -/* - * Half-precision floating point conversion functions - * - * There are a multitude of conversion functions with various - * different rounding modes. This is dealt with by the calling code - * setting the mode appropriately before calling the helper. - */ - -uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int16(a, fpst); -} - -uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint16(a, fpst); -} - static int el_from_spsr(uint32_t spsr) { /* Return the exception level that this SPSR is requesting a return to, @@ -771,6 +634,7 @@ static void cpsr_write_from_spsr_elx(CPUARMState *env, void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) { + ARMCPU *cpu = env_archcpu(env); int cur_el = arm_current_el(env); unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); uint32_t spsr = env->banked_spsr[spsr_idx]; @@ -817,12 +681,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + if (!return_to_aa64 && !cpu_isar_feature(aa64_aa32, cpu)) { + /* Return to AArch32 when CPU is AArch64-only */ + goto illegal_return; + } + if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { goto illegal_return; } bql_lock(); - arm_call_pre_el_change_hook(env_archcpu(env)); + arm_call_pre_el_change_hook(cpu); bql_unlock(); if (!return_to_aa64) { @@ -850,7 +719,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) int tbii; env->aarch64 = true; - spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); + spsr &= aarch64_pstate_valid_mask(&cpu->isar); pstate_write(env, spsr); if (!arm_singlestep_active(env)) { env->pstate &= ~PSTATE_SS; @@ -889,7 +758,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); bql_lock(); - arm_call_el_change_hook(env_archcpu(env)); + arm_call_el_change_hook(cpu); bql_unlock(); return; @@ -915,19 +784,10 @@ illegal_return: "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); } -/* - * Square Root and Reciprocal square root - */ - -uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) -{ - float_status *s = fpstp; - - return float16_sqrt(a, s); -} - void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) { + uintptr_t ra = GETPC(); + /* * Implement DC ZVA, which zeroes a fixed-length block of memory. * Note that we do not implement the (architecturally mandated) @@ -948,8 +808,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) #ifndef CONFIG_USER_ONLY if (unlikely(!mem)) { - uintptr_t ra = GETPC(); - /* * Trap if accessing an invalid page. DC_ZVA requires that we supply * the original pointer for an invalid page. But watchpoints require @@ -971,7 +829,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) } #endif + set_helper_retaddr(ra); memset(mem, 0, blocklen); + clear_helper_retaddr(); } void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, @@ -1120,7 +980,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); return setsize; } @@ -1163,7 +1025,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, } #endif /* Easy case: just memset the host memory */ + set_helper_retaddr(ra); memset(mem, data, setsize); + clear_helper_retaddr(); mte_mops_set_tags(env, toaddr, setsize, *mtedesc); return setsize; } @@ -1286,7 +1150,6 @@ static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) @@ -1342,7 +1205,7 @@ static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, /* Do the actual memset: we leave the last partial page to SETE */ stagesetsize = setsize & TARGET_PAGE_MASK; while (stagesetsize > 0) { - step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); toaddr += step; setsize -= step; stagesetsize -= step; @@ -1497,7 +1360,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, } #endif /* Easy case: just memmove the host memory */ + set_helper_retaddr(ra); memmove(wmem, rmem, copysize); + clear_helper_retaddr(); return copysize; } @@ -1572,7 +1437,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, * Easy case: just memmove the host memory. Note that wmem and * rmem here point to the *last* byte to copy. */ + set_helper_retaddr(ra); memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); + clear_helper_retaddr(); return copysize; } @@ -1682,7 +1549,6 @@ static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, env->ZF = 1; /* our env->ZF encoding is inverted */ env->CF = 0; env->VF = 0; - return; } void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, @@ -1867,3 +1733,42 @@ void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, { do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); } + +static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + return page_get_flags(addr) & PAGE_BTI; +#else + CPUTLBEntryFull *full; + void *host; + int mmu_idx = cpu_mmu_index(env_cpu(env), true); + int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, + false, &host, &full, ra); + + assert(!(flags & TLB_INVALID_MASK)); + return full->extra.arm.guarded; +#endif +} + +void HELPER(guarded_page_check)(CPUARMState *env) +{ + /* + * We have already verified that bti is enabled, and that the + * instruction at PC is not ok for BTYPE. This is always at + * the beginning of a block, so PC is always up-to-date and + * no unwind is required. + */ + if (is_guarded_page(env, env->pc, 0)) { + raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), + exception_target_el(env)); + } +} + +void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) +{ + /* + * We have already checked for branch via x16 and x17. + * What remains for choosing BTYPE is checking for a guarded page. + */ + env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; +} |