diff options
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/translate.c | 60 |
1 files changed, 49 insertions, 11 deletions
diff --git a/target/arm/translate.c b/target/arm/translate.c index d34c1d3..d8729e4 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7401,22 +7401,60 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) gen_smul_dual(t1, t2); if (sub) { - /* This subtraction cannot overflow. */ - tcg_gen_sub_i32(t1, t1, t2); - } else { /* - * This addition cannot overflow 32 bits; however it may - * overflow considered as a signed operation, in which case - * we must set the Q flag. + * This subtraction cannot overflow, so we can do a simple + * 32-bit subtraction and then a possible 32-bit saturating + * addition of Ra. */ - gen_helper_add_setq(t1, cpu_env, t1, t2); - } - tcg_temp_free_i32(t2); + tcg_gen_sub_i32(t1, t1, t2); + tcg_temp_free_i32(t2); - if (a->ra != 15) { - t2 = load_reg(s, a->ra); + if (a->ra != 15) { + t2 = load_reg(s, a->ra); + gen_helper_add_setq(t1, cpu_env, t1, t2); + tcg_temp_free_i32(t2); + } + } else if (a->ra == 15) { + /* Single saturation-checking addition */ gen_helper_add_setq(t1, cpu_env, t1, t2); tcg_temp_free_i32(t2); + } else { + /* + * We need to add the products and Ra together and then + * determine whether the final result overflowed. Doing + * this as two separate add-and-check-overflow steps incorrectly + * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1. + * Do all the arithmetic at 64-bits and then check for overflow. + */ + TCGv_i64 p64, q64; + TCGv_i32 t3, qf, one; + + p64 = tcg_temp_new_i64(); + q64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(p64, t1); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + load_reg_var(s, t2, a->ra); + tcg_gen_ext_i32_i64(q64, t2); + tcg_gen_add_i64(p64, p64, q64); + tcg_temp_free_i64(q64); + + tcg_gen_extr_i64_i32(t1, t2, p64); + tcg_temp_free_i64(p64); + /* + * t1 is the low half of the result which goes into Rd. + * We have overflow and must set Q if the high half (t2) + * is different from the sign-extension of t1. + */ + t3 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t3, t1, 31); + qf = load_cpu_field(QF); + one = tcg_const_i32(1); + tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf); + store_cpu_field(qf, QF); + tcg_temp_free_i32(one); + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); } store_reg(s, a->rd, t1); return true; |