aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/arm/sve_helper.c5
-rw-r--r--target/arm/translate-sve.c49
2 files changed, 32 insertions, 22 deletions
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 9bd0694..87594a8 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -2846,11 +2846,6 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
return flags;
}
- /* Scale from predicate element count to bits. */
- count <<= esz;
- /* Bound to the bits in the predicate. */
- count = MIN(count, oprsz * 8);
-
/* Set all of the requested bits. */
for (i = 0; i < count / 64; ++i) {
d->p[i] = esz_mask;
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 9dd4c38..89efc80 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3173,19 +3173,19 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
{
- if (!sve_access_check(s)) {
- return true;
- }
-
- TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
- TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
- TCGv_i64 t0 = tcg_temp_new_i64();
- TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 op0, op1, t0, t1, tmax;
TCGv_i32 t2, t3;
TCGv_ptr ptr;
unsigned desc, vsz = vec_full_reg_size(s);
TCGCond cond;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ op0 = read_cpu_reg(s, a->rn, 1);
+ op1 = read_cpu_reg(s, a->rm, 1);
+
if (!a->sf) {
if (a->u) {
tcg_gen_ext32u_i64(op0, op0);
@@ -3198,32 +3198,47 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
/* For the helper, compress the different conditions into a computation
* of how many iterations for which the condition is true.
- *
- * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
- * 2**64 iterations, overflowing to 0. Of course, predicate registers
- * aren't that large, so any value >= predicate size is sufficient.
*/
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
tcg_gen_sub_i64(t0, op1, op0);
- /* t0 = MIN(op1 - op0, vsz). */
- tcg_gen_movi_i64(t1, vsz);
- tcg_gen_umin_i64(t0, t0, t1);
+ tmax = tcg_const_i64(vsz >> a->esz);
if (a->eq) {
/* Equality means one more iteration. */
tcg_gen_addi_i64(t0, t0, 1);
+
+ /* If op1 is max (un)signed integer (and the only time the addition
+ * above could overflow), then we produce an all-true predicate by
+ * setting the count to the vector length. This is because the
+ * pseudocode is described as an increment + compare loop, and the
+ * max integer would always compare true.
+ */
+ tcg_gen_movi_i64(t1, (a->sf
+ ? (a->u ? UINT64_MAX : INT64_MAX)
+ : (a->u ? UINT32_MAX : INT32_MAX)));
+ tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
}
- /* t0 = (condition true ? t0 : 0). */
+ /* Bound to the maximum. */
+ tcg_gen_umin_i64(t0, t0, tmax);
+ tcg_temp_free_i64(tmax);
+
+ /* Set the count to zero if the condition is false. */
cond = (a->u
? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
: (a->eq ? TCG_COND_LE : TCG_COND_LT));
tcg_gen_movi_i64(t1, 0);
tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
+ tcg_temp_free_i64(t1);
+ /* Since we're bounded, pass as a 32-bit type. */
t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, t0);
tcg_temp_free_i64(t0);
- tcg_temp_free_i64(t1);
+
+ /* Scale elements to bits. */
+ tcg_gen_shli_i32(t2, t2, a->esz);
desc = (vsz / 8) - 2;
desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);