aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2024-12-25 08:33:33 -0500
committerStefan Hajnoczi <stefanha@redhat.com>2024-12-25 08:33:33 -0500
commita7f77545d401266a6415e6e03c7738c95314f0e6 (patch)
tree20354b9913086ac3a535e06e4d28057f88710d14
parentaa3a285b5bc56a4208b3b57d4a55291e9c260107 (diff)
parente4a8e093dc74be049f4829831dce76e5edab0003 (diff)
downloadqemu-a7f77545d401266a6415e6e03c7738c95314f0e6.zip
qemu-a7f77545d401266a6415e6e03c7738c95314f0e6.tar.gz
qemu-a7f77545d401266a6415e6e03c7738c95314f0e6.tar.bz2
Merge tag 'pull-tcg-20241224' of https://gitlab.com/rth7680/qemu into staging
tcg/optimize: Remove in-flight mask data from OptContext fpu: Add float*_muladd_scalbn fpu: Remove float_muladd_halve_result fpu: Add float_round_nearest_even_max fpu: Add float_muladd_suppress_add_product_zero target/hexagon: Use float32_muladd accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmdrE7QdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+l2Qf/aECUfMn07wns7WjX # ebWxzIRKp//ktsIJg9InL8zrCStyRqrBj0VQE9LUfO2Vhvqf8faUdh+uh2ek/Ewa # f1hfo0kDK7e7oWnCicSbHmdC0FQIrKpg2i+YXIsbd4XWOkmFAhkNenISuQfCrL3k # 3UYAA12seK9uCls+fljvhK6iid3h+4ReDFW7DPg7mumFCCz6CwzYYW/4cnhcAmOn # qVehtts8W+6SFMjTE04S8NV8OBaMisf8AbCcZf2PedRl1cHGSumLOjvjOxcQU8Hw # nGUjL8/hYWkEetzU4YzJyfHOe6F9lPJBMnDattwIswwYrTOD/Sq7VbBWFbW0EwUy # 7XIZ8Q== # =DZgo # -----END PGP SIGNATURE----- # gpg: Signature made Tue 24 Dec 2024 15:04:04 EST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * tag 'pull-tcg-20241224' of https://gitlab.com/rth7680/qemu: (72 commits) accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core target/hexagon: Simplify internal_mpyhh setup target/hexagon: Use mulu64 for int128_mul_6464 target/hexagon: Remove Double target/hexagon: Remove Float target/hexagon: Expand GEN_XF_ROUND target/hexagon: Remove internal_fmafx target/hexagon: Use float32_muladd for helper_sffm[as]_lib target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc target/hexagon: Use float32_muladd for helper_sffms target/hexagon: Use float32_muladd for helper_sffma target/hexagon: Use float32_mul in helper_sfmpy softfloat: Add float_muladd_suppress_add_product_zero softfloat: Add float_round_nearest_even_max softfloat: Remove float_muladd_halve_result target/sparc: Use float*_muladd_scalbn target/arm: Use float*_muladd_scalbn softfloat: Add float{16,32,64}_muladd_scalbn tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort tcg/optimize: Move fold_bitsel_vec into alphabetic sort ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--accel/tcg/cpu-exec.c8
-rw-r--r--accel/tcg/plugin-gen.c9
-rw-r--r--accel/tcg/translate-all.c8
-rw-r--r--fpu/softfloat-parts.c.inc16
-rw-r--r--fpu/softfloat.c63
-rw-r--r--include/exec/translator.h14
-rw-r--r--include/fpu/softfloat-types.h2
-rw-r--r--include/fpu/softfloat.h14
-rw-r--r--include/hw/core/tcg-cpu-ops.h13
-rw-r--r--target/alpha/cpu.c1
-rw-r--r--target/alpha/cpu.h2
-rw-r--r--target/alpha/translate.c4
-rw-r--r--target/arm/cpu.c1
-rw-r--r--target/arm/internals.h2
-rw-r--r--target/arm/tcg/cpu-v7m.c1
-rw-r--r--target/arm/tcg/helper-a64.c6
-rw-r--r--target/arm/tcg/translate.c5
-rw-r--r--target/avr/cpu.c1
-rw-r--r--target/avr/cpu.h2
-rw-r--r--target/avr/translate.c6
-rw-r--r--target/hexagon/cpu.c1
-rw-r--r--target/hexagon/cpu.h2
-rw-r--r--target/hexagon/fma_emu.c470
-rw-r--r--target/hexagon/fma_emu.h3
-rw-r--r--target/hexagon/op_helper.c125
-rw-r--r--target/hexagon/translate.c4
-rw-r--r--target/hppa/cpu.c1
-rw-r--r--target/hppa/cpu.h2
-rw-r--r--target/hppa/translate.c4
-rw-r--r--target/i386/tcg/helper-tcg.h2
-rw-r--r--target/i386/tcg/tcg-cpu.c1
-rw-r--r--target/i386/tcg/translate.c5
-rw-r--r--target/loongarch/cpu.c1
-rw-r--r--target/loongarch/internals.h2
-rw-r--r--target/loongarch/tcg/translate.c4
-rw-r--r--target/m68k/cpu.c1
-rw-r--r--target/m68k/cpu.h2
-rw-r--r--target/m68k/translate.c4
-rw-r--r--target/microblaze/cpu.c1
-rw-r--r--target/microblaze/cpu.h2
-rw-r--r--target/microblaze/translate.c4
-rw-r--r--target/mips/cpu.c1
-rw-r--r--target/mips/tcg/tcg-internal.h2
-rw-r--r--target/mips/tcg/translate.c4
-rw-r--r--target/openrisc/cpu.c1
-rw-r--r--target/openrisc/cpu.h2
-rw-r--r--target/openrisc/translate.c4
-rw-r--r--target/ppc/cpu.h2
-rw-r--r--target/ppc/cpu_init.c1
-rw-r--r--target/ppc/translate.c4
-rw-r--r--target/riscv/cpu.h3
-rw-r--r--target/riscv/tcg/tcg-cpu.c1
-rw-r--r--target/riscv/translate.c4
-rw-r--r--target/rx/cpu.c1
-rw-r--r--target/rx/cpu.h2
-rw-r--r--target/rx/translate.c4
-rw-r--r--target/s390x/cpu.c1
-rw-r--r--target/s390x/s390x-internal.h2
-rw-r--r--target/s390x/tcg/translate.c4
-rw-r--r--target/sh4/cpu.c1
-rw-r--r--target/sh4/cpu.h2
-rw-r--r--target/sh4/translate.c4
-rw-r--r--target/sparc/cpu.c1
-rw-r--r--target/sparc/cpu.h2
-rw-r--r--target/sparc/fop_helper.c8
-rw-r--r--target/sparc/helper.h4
-rw-r--r--target/sparc/translate.c84
-rw-r--r--target/tricore/cpu.c1
-rw-r--r--target/tricore/cpu.h2
-rw-r--r--target/tricore/translate.c5
-rw-r--r--target/xtensa/cpu.c1
-rw-r--r--target/xtensa/cpu.h2
-rw-r--r--target/xtensa/translate.c4
-rw-r--r--tcg/optimize.c847
-rw-r--r--tests/tcg/multiarch/system/memory.c9
75 files changed, 848 insertions, 991 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index b507049..d48b82a 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1088,11 +1088,13 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
if (!tcg_target_initialized) {
/* Check mandatory TCGCPUOps handlers */
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
#ifndef CONFIG_USER_ONLY
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
+ assert(tcg_ops->cpu_exec_halt);
+ assert(tcg_ops->cpu_exec_interrupt);
#endif /* !CONFIG_USER_ONLY */
- cpu->cc->tcg_ops->initialize();
+ assert(tcg_ops->translate_code);
+ tcg_ops->initialize();
tcg_target_initialized = true;
}
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 1ef0755..7e5f040 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -102,6 +102,15 @@ static void gen_disable_mem_helper(void)
static TCGv_i32 gen_cpu_index(void)
{
+ /*
+ * Optimize when we run with a single vcpu. All values using cpu_index,
+ * including scoreboard index, will be optimized out.
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
+ * vcpus are created before generating code.
+ */
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
+ return tcg_constant_i32(current_cpu->cpu_index);
+ }
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
tcg_gen_ld_i32(cpu_index, tcg_env,
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 572a8a8..453eb20 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -276,8 +276,10 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
tcg_func_start(tcg_ctx);
- tcg_ctx->cpu = env_cpu(env);
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
+ CPUState *cs = env_cpu(env);
+ tcg_ctx->cpu = cs;
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
+
assert(tb->size != 0);
tcg_ctx->cpu = NULL;
*max_insns = tb->icount;
@@ -364,7 +366,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
/*
* Overflow of code_gen_buffer, or the current slice of it.
*
- * TODO: We don't need to re-do gen_intermediate_code, nor
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
* should we re-do the tcg optimization currently hidden
* inside tcg_gen_code. All that should be required is to
* flush the TBs, allocate a new TB, re-initialize it per
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index ba8de7b..ebde429 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -241,6 +241,9 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
int exp, flags = 0;
switch (s->float_rounding_mode) {
+ case float_round_nearest_even_max:
+ overflow_norm = true;
+ /* fall through */
case float_round_nearest_even:
if (N > 64 && frac_lsb == 0) {
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
@@ -562,8 +565,9 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
* Requires A and C extracted into a double-sized structure to provide the
* extra space for the widening multiply.
*/
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
- FloatPartsN *c, int flags, float_status *s)
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
+ FloatPartsN *c, int scale,
+ int flags, float_status *s)
{
int ab_mask, abc_mask;
FloatPartsW p_widen, c_widen;
@@ -611,7 +615,9 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
goto return_normal;
}
if (c->cls == float_class_zero) {
- if (a->sign != c->sign) {
+ if (flags & float_muladd_suppress_add_product_zero) {
+ a->sign = c->sign;
+ } else if (a->sign != c->sign) {
goto return_sub_zero;
}
goto return_zero;
@@ -652,9 +658,7 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
a->exp = p_widen.exp;
return_normal:
- if (flags & float_muladd_halve_result) {
- a->exp -= 1;
- }
+ a->exp += scale;
finish_sign:
if (flags & float_muladd_negate_result) {
a->sign ^= 1;
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8de8d5f..8d75d66 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -789,15 +789,15 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
#define parts_mul(A, B, S) \
PARTS_GENERIC_64_128(mul, A)(A, B, S)
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
- FloatParts64 *c, int flags,
- float_status *s);
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
- FloatParts128 *c, int flags,
- float_status *s);
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
+ FloatParts64 *c, int scale,
+ int flags, float_status *s);
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
+ FloatParts128 *c, int scale,
+ int flags, float_status *s);
-#define parts_muladd(A, B, C, Z, S) \
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
float_status *s);
@@ -2212,43 +2212,50 @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
* Fused multiply-add
*/
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
- int flags, float_status *status)
+float16 QEMU_FLATTEN
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
float16_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float16_round_pack_canonical(pr, status);
}
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
- float_status *status)
+float16 float16_muladd(float16 a, float16 b, float16 c,
+ int flags, float_status *status)
+{
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
+}
+
+float32 QEMU_SOFTFLOAT_ATTR
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
float32_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float32_round_pack_canonical(pr, status);
}
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
- float_status *status)
+float64 QEMU_SOFTFLOAT_ATTR
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float64_round_pack_canonical(pr, status);
}
@@ -2267,7 +2274,7 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
if (unlikely(!can_use_fpu(s))) {
goto soft;
}
- if (unlikely(flags & float_muladd_halve_result)) {
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
goto soft;
}
@@ -2323,7 +2330,7 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
return ur.s;
soft:
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
}
float64 QEMU_FLATTEN
@@ -2338,9 +2345,6 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
if (unlikely(!can_use_fpu(s))) {
goto soft;
}
- if (unlikely(flags & float_muladd_halve_result)) {
- goto soft;
- }
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
@@ -2394,7 +2398,7 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
return ur.s;
soft:
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
}
float64 float64r32_muladd(float64 a, float64 b, float64 c,
@@ -2405,7 +2409,7 @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return float64r32_round_pack_canonical(pr, status);
}
@@ -2418,7 +2422,7 @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
bfloat16_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -2431,7 +2435,7 @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
float128_unpack_canonical(&pa, a, status);
float128_unpack_canonical(&pb, b, status);
float128_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return float128_round_pack_canonical(pr, status);
}
@@ -5249,8 +5253,9 @@ float32 float32_exp2(float32 a, float_status *status)
float64_unpack_canonical(&rp, float64_one, status);
for (i = 0 ; i < 15 ; i++) {
+
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
xnp = *parts_mul(&xnp, &xp, status);
}
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 41e2a41..d70942a 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -22,20 +22,6 @@
#include "exec/vaddr.h"
/**
- * gen_intermediate_code
- * @cpu: cpu context
- * @tb: translation block
- * @max_insns: max number of instructions to translate
- * @pc: guest virtual program counter address
- * @host_pc: host physical program counter address
- *
- * This function must be provided by the target, which should create
- * the target-specific DisasContext, and then invoke translator_loop.
- */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc);
-
-/**
* DisasJumpType:
* @DISAS_NEXT: Next instruction in program order.
* @DISAS_TOO_MANY: Too many instructions translated.
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 79ca44d..9d37cdf 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -140,6 +140,8 @@ typedef enum __attribute__((__packed__)) {
float_round_to_odd = 5,
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
float_round_to_odd_inf = 6,
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
+ float_round_nearest_even_max = 7,
} FloatRoundMode;
/*
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index eb64075..09a40b4 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -120,14 +120,16 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
-| We also support halving the result before rounding, as a special
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
+|
+| With float_muladd_suppress_add_product_zero, if A or B is zero
+| such that the product is a true zero, then return C without addition.
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
- float_muladd_halve_result = 8,
+ float_muladd_suppress_add_product_zero = 8,
};
/*----------------------------------------------------------------------------
@@ -238,6 +240,8 @@ float16 float16_add(float16, float16, float_status *status);
float16 float16_sub(float16, float16, float_status *status);
float16 float16_mul(float16, float16, float_status *status);
float16 float16_muladd(float16, float16, float16, int, float_status *status);
+float16 float16_muladd_scalbn(float16, float16, float16,
+ int, int, float_status *status);
float16 float16_div(float16, float16, float_status *status);
float16 float16_scalbn(float16, int, float_status *status);
float16 float16_min(float16, float16, float_status *status);
@@ -597,6 +601,8 @@ float32 float32_mul(float32, float32, float_status *status);
float32 float32_div(float32, float32, float_status *status);
float32 float32_rem(float32, float32, float_status *status);
float32 float32_muladd(float32, float32, float32, int, float_status *status);
+float32 float32_muladd_scalbn(float32, float32, float32,
+ int, int, float_status *status);
float32 float32_sqrt(float32, float_status *status);
float32 float32_exp2(float32, float_status *status);
float32 float32_log2(float32, float_status *status);
@@ -792,6 +798,8 @@ float64 float64_mul(float64, float64, float_status *status);
float64 float64_div(float64, float64, float_status *status);
float64 float64_rem(float64, float64, float_status *status);
float64 float64_muladd(float64, float64, float64, int, float_status *status);
+float64 float64_muladd_scalbn(float64, float64, float64,
+ int, int, float_status *status);
float64 float64_sqrt(float64, float_status *status);
float64 float64_log2(float64, float_status *status);
FloatRelation float64_compare(float64, float64, float_status *status);
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 663efb9..2e3f169 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -25,6 +25,19 @@ struct TCGCPUOps {
*/
void (*initialize)(void);
/**
+ * @translate_code: Translate guest instructions to TCGOps
+ * @cpu: cpu context
+ * @tb: translation block
+ * @max_insns: max number of instructions to translate
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ *
+ * This function must be provided by the target, which should create
+ * the target-specific DisasContext, and then invoke translator_loop.
+ */
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
+ /**
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
*
* This is called when we abandon execution of a TB before starting it,
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 9fa506b..e1b898e 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -224,6 +224,7 @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
static const TCGCPUOps alpha_tcg_ops = {
.initialize = alpha_translate_init,
+ .translate_code = alpha_translate_code,
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
.restore_state_to_opc = alpha_restore_state_to_opc,
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 3556d32..80562ad 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -431,6 +431,8 @@ enum {
};
void alpha_translate_init(void);
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 629ff3c..2156c02 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -2955,8 +2955,8 @@ static const TranslatorOps alpha_tr_ops = {
.tb_stop = alpha_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 019183c..dcedadc 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2682,6 +2682,7 @@ static const struct SysemuCPUOps arm_sysemu_ops = {
#ifdef CONFIG_TCG
static const TCGCPUOps arm_tcg_ops = {
.initialize = arm_translate_init,
+ .translate_code = arm_translate_code,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
.debug_excp_handler = arm_debug_excp_handler,
.restore_state_to_opc = arm_restore_state_to_opc,
diff --git a/target/arm/internals.h b/target/arm/internals.h
index c3a5b13..863a84e 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -357,6 +357,8 @@ void init_cpreg_list(ARMCPU *cpu);
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
void arm_translate_init(void);
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
index 58e5457..03acdf8 100644
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@@ -234,6 +234,7 @@ static void cortex_m55_initfn(Object *obj)
static const TCGCPUOps arm_v7m_tcg_ops = {
.initialize = arm_translate_init,
+ .translate_code = arm_translate_code,
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
.debug_excp_handler = arm_debug_excp_handler,
.restore_state_to_opc = arm_restore_state_to_opc,
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 0e13050..3b226da 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -262,7 +262,7 @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
(float16_is_infinity(b) && float16_is_zero(a))) {
return float16_one_point_five;
}
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
}
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
@@ -275,7 +275,7 @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
(float32_is_infinity(b) && float32_is_zero(a))) {
return float32_one_point_five;
}
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
}
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
@@ -288,7 +288,7 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
(float64_is_infinity(b) && float64_is_zero(a))) {
return float64_one_point_five;
}
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index 9ee761f..c16b59a 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -8093,9 +8093,8 @@ static const TranslatorOps thumb_translator_ops = {
.tb_stop = arm_tr_tb_stop,
};
-/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc = { };
const TranslatorOps *ops = &arm_translator_ops;
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 2dccb09..8a126ff 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -207,6 +207,7 @@ static const struct SysemuCPUOps avr_sysemu_ops = {
static const TCGCPUOps avr_tcg_ops = {
.initialize = avr_cpu_tcg_init,
+ .translate_code = avr_cpu_translate_code,
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
.restore_state_to_opc = avr_restore_state_to_opc,
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index 4725535..06f5ae4 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -183,6 +183,8 @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
}
void avr_cpu_tcg_init(void);
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
int cpu_avr_exec(CPUState *cpu);
diff --git a/target/avr/translate.c b/target/avr/translate.c
index f13b997..4ab71d8 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -2599,7 +2599,7 @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
*
* - translate()
* - canonicalize_skip()
- * - gen_intermediate_code()
+ * - translate_code()
* - restore_state_to_opc()
*
*/
@@ -2795,8 +2795,8 @@ static const TranslatorOps avr_tr_ops = {
.tb_stop = avr_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc = { };
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index c9817c7..0b7fc98 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -325,6 +325,7 @@ static void hexagon_cpu_init(Object *obj)
static const TCGCPUOps hexagon_tcg_ops = {
.initialize = hexagon_translate_init,
+ .translate_code = hexagon_translate_code,
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
.restore_state_to_opc = hexagon_restore_state_to_opc,
};
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 14e6e81..79e60d4 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -150,6 +150,8 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
typedef HexagonCPU ArchCPU;
void hexagon_translate_init(void);
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#include "exec/cpu-all.h"
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 05a56d8..c557141 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -43,112 +43,51 @@
#define WAY_BIG_EXP 4096
-typedef union {
- double f;
- uint64_t i;
- struct {
- uint64_t mant:52;
- uint64_t exp:11;
- uint64_t sign:1;
- };
-} Double;
-
-typedef union {
- float f;
- uint32_t i;
- struct {
- uint32_t mant:23;
- uint32_t exp:8;
- uint32_t sign:1;
- };
-} Float;
-
static uint64_t float64_getmant(float64 f64)
{
- Double a = { .i = f64 };
+ uint64_t mant = extract64(f64, 0, 52);
if (float64_is_normal(f64)) {
- return a.mant | 1ULL << 52;
+ return mant | 1ULL << 52;
}
if (float64_is_zero(f64)) {
return 0;
}
if (float64_is_denormal(f64)) {
- return a.mant;
+ return mant;
}
return ~0ULL;
}
int32_t float64_getexp(float64 f64)
{
- Double a = { .i = f64 };
+ int exp = extract64(f64, 52, 11);
if (float64_is_normal(f64)) {
- return a.exp;
+ return exp;
}
if (float64_is_denormal(f64)) {
- return a.exp + 1;
+ return exp + 1;
}
return -1;
}
-static uint64_t float32_getmant(float32 f32)
-{
- Float a = { .i = f32 };
- if (float32_is_normal(f32)) {
- return a.mant | 1ULL << 23;
- }
- if (float32_is_zero(f32)) {
- return 0;
- }
- if (float32_is_denormal(f32)) {
- return a.mant;
- }
- return ~0ULL;
-}
-
int32_t float32_getexp(float32 f32)
{
- Float a = { .i = f32 };
+ int exp = float32_getexp_raw(f32);
if (float32_is_normal(f32)) {
- return a.exp;
+ return exp;
}
if (float32_is_denormal(f32)) {
- return a.exp + 1;
+ return exp + 1;
}
return -1;
}
-static uint32_t int128_getw0(Int128 x)
-{
- return int128_getlo(x);
-}
-
-static uint32_t int128_getw1(Int128 x)
-{
- return int128_getlo(x) >> 32;
-}
-
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
{
- Int128 a, b;
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
-
- a = int128_make64(ai);
- b = int128_make64(bi);
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
-
- pp1s = pp1a + pp1b;
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
- pp2 += (1ULL << 32);
- }
- uint64_t ret_low = pp0 + (pp1s << 32);
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
- pp2 += 1;
- }
+ uint64_t l, h;
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
+ mulu64(&l, &h, ai, bi);
+ return int128_make128(l, h);
}
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
@@ -369,298 +308,129 @@ float32 infinite_float32(uint8_t sign)
}
/* Return a maximum finite value with the requested sign */
-static float32 maxfinite_float32(uint8_t sign)
+static float64 accum_round_float64(Accum a, float_status *fp_status)
{
- if (sign) {
- return make_float32(SF_MINUS_MAXF);
- } else {
- return make_float32(SF_MAXF);
- }
-}
-
-/* Return a zero value with requested sign */
-static float32 zero_float32(uint8_t sign)
-{
- if (sign) {
- return make_float32(0x80000000);
- } else {
- return float32_zero;
+ uint64_t ret;
+
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
+ && ((a.guard | a.round | a.sticky) == 0)) {
+ /* result zero */
+ switch (fp_status->float_rounding_mode) {
+ case float_round_down:
+ return zero_float64(1);
+ default:
+ return zero_float64(0);
+ }
}
-}
-
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
-{ \
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
- && ((a.guard | a.round | a.sticky) == 0)) { \
- /* result zero */ \
- switch (fp_status->float_rounding_mode) { \
- case float_round_down: \
- return zero_##SUFFIX(1); \
- default: \
- return zero_##SUFFIX(0); \
- } \
- } \
- /* Normalize right */ \
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
- /* So we need to normalize right while the high word is non-zero and \
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
- while ((int128_gethi(a.mant) != 0) || \
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
- a = accum_norm_right(a, 1); \
- } \
- /* \
- * OK, now normalize left \
- * We want to normalize left until we have a leading one in bit 24 \
- * Theoretically, we only need to shift a maximum of one to the left if we \
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
- * should be 0 \
- */ \
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
- a = accum_norm_left(a); \
- } \
- /* \
- * OK, now we might need to denormalize because of potential underflow. \
- * We need to do this before rounding, and rounding might make us normal \
- * again \
- */ \
- while (a.exp <= 0) { \
- a = accum_norm_right(a, 1 - a.exp); \
- /* \
- * Do we have underflow? \
- * That's when we get an inexact answer because we ran out of bits \
- * in a denormal. \
- */ \
- if (a.guard || a.round || a.sticky) { \
- float_raise(float_flag_underflow, fp_status); \
- } \
- } \
- /* OK, we're relatively canonical... now we need to round */ \
- if (a.guard || a.round || a.sticky) { \
- float_raise(float_flag_inexact, fp_status); \
- switch (fp_status->float_rounding_mode) { \
- case float_round_to_zero: \
- /* Chop and we're done */ \
- break; \
- case float_round_up: \
- if (a.sign == 0) { \
- a.mant = int128_add(a.mant, int128_one()); \
- } \
- break; \
- case float_round_down: \
- if (a.sign != 0) { \
- a.mant = int128_add(a.mant, int128_one()); \
- } \
- break; \
- default: \
- if (a.round || a.sticky) { \
- /* round up if guard is 1, down if guard is zero */ \
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
- } else if (a.guard) { \
- /* exactly .5, round up if odd */ \
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
- } \
- break; \
- } \
- } \
- /* \
- * OK, now we might have carried all the way up. \
- * So we might need to shr once \
- * at least we know that the lsb should be zero if we rounded and \
- * got a carry out... \
- */ \
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
- a = accum_norm_right(a, 1); \
- } \
- /* Overflow? */ \
- if (a.exp >= INF_EXP) { \
- /* Yep, inf result */ \
- float_raise(float_flag_overflow, fp_status); \
- float_raise(float_flag_inexact, fp_status); \
- switch (fp_status->float_rounding_mode) { \
- case float_round_to_zero: \
- return maxfinite_##SUFFIX(a.sign); \
- case float_round_up: \
- if (a.sign == 0) { \
- return infinite_##SUFFIX(a.sign); \
- } else { \
- return maxfinite_##SUFFIX(a.sign); \
- } \
- case float_round_down: \
- if (a.sign != 0) { \
- return infinite_##SUFFIX(a.sign); \
- } else { \
- return maxfinite_##SUFFIX(a.sign); \
- } \
- default: \
- return infinite_##SUFFIX(a.sign); \
- } \
- } \
- /* Underflow? */ \
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
- /* Leading one means: No, we're normal. So, we should be done... */ \
- INTERNAL_TYPE ret; \
- ret.i = 0; \
- ret.sign = a.sign; \
- ret.exp = a.exp; \
- ret.mant = int128_getlo(a.mant); \
- return ret.i; \
- } \
- assert(a.exp == 1); \
- INTERNAL_TYPE ret; \
- ret.i = 0; \
- ret.sign = a.sign; \
- ret.exp = 0; \
- ret.mant = int128_getlo(a.mant); \
- return ret.i; \
-}
-
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
-
-static bool is_inf_prod(float64 a, float64 b)
-{
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
-}
-
-static float64 special_fma(float64 a, float64 b, float64 c,
- float_status *fp_status)
-{
- float64 ret = make_float64(0);
-
/*
- * If A multiplied by B is an exact infinity and C is also an infinity
- * but with the opposite sign, FMA returns NaN and raises invalid.
+ * Normalize right
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
+ * So we need to normalize right while the high word is non-zero and
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
*/
- uint8_t a_sign = float64_is_neg(a);
- uint8_t b_sign = float64_is_neg(b);
- uint8_t c_sign = float64_is_neg(c);
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
- if ((a_sign ^ b_sign) != c_sign) {
- ret = make_float64(DF_NAN);
- float_raise(float_flag_invalid, fp_status);
- return ret;
- }
- }
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_zero(a) && float64_is_infinity(b))) {
- ret = make_float64(DF_NAN);
- float_raise(float_flag_invalid, fp_status);
- return ret;
+ while ((int128_gethi(a.mant) != 0) ||
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
+ a = accum_norm_right(a, 1);
}
/*
- * If none of the above checks are true and C is a NaN,
- * a NaN shall be returned
- * If A or B are NaN, a NAN shall be returned.
+ * OK, now normalize left
+ * We want to normalize left until we have a leading one in bit 24
+ * Theoretically, we only need to shift a maximum of one to the left if we
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
+ * should be 0
*/
- if (float64_is_any_nan(a) ||
- float64_is_any_nan(b) ||
- float64_is_any_nan(c)) {
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- ret = make_float64(DF_NAN);
- return ret;
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
+ a = accum_norm_left(a);
}
/*
- * We have checked for adding opposite-signed infinities.
- * Other infinities return infinity with the correct sign
+ * OK, now we might need to denormalize because of potential underflow.
+ * We need to do this before rounding, and rounding might make us normal
+ * again
*/
- if (float64_is_infinity(c)) {
- ret = infinite_float64(c_sign);
- return ret;
+ while (a.exp <= 0) {
+ a = accum_norm_right(a, 1 - a.exp);
+ /*
+ * Do we have underflow?
+ * That's when we get an inexact answer because we ran out of bits
+ * in a denormal.
+ */
+ if (a.guard || a.round || a.sticky) {
+ float_raise(float_flag_underflow, fp_status);
+ }
}
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
- ret = infinite_float64(a_sign ^ b_sign);
- return ret;
+ /* OK, we're relatively canonical... now we need to round */
+ if (a.guard || a.round || a.sticky) {
+ float_raise(float_flag_inexact, fp_status);
+ switch (fp_status->float_rounding_mode) {
+ case float_round_to_zero:
+ /* Chop and we're done */
+ break;
+ case float_round_up:
+ if (a.sign == 0) {
+ a.mant = int128_add(a.mant, int128_one());
+ }
+ break;
+ case float_round_down:
+ if (a.sign != 0) {
+ a.mant = int128_add(a.mant, int128_one());
+ }
+ break;
+ default:
+ if (a.round || a.sticky) {
+ /* round up if guard is 1, down if guard is zero */
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
+ } else if (a.guard) {
+ /* exactly .5, round up if odd */
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
+ }
+ break;
+ }
}
- g_assert_not_reached();
-}
-
-static float32 special_fmaf(float32 a, float32 b, float32 c,
- float_status *fp_status)
-{
- float64 aa, bb, cc;
- aa = float32_to_float64(a, fp_status);
- bb = float32_to_float64(b, fp_status);
- cc = float32_to_float64(c, fp_status);
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
-}
-
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
- float_status *fp_status)
-{
- Accum prod;
- Accum acc;
- Accum result;
- accum_init(&prod);
- accum_init(&acc);
- accum_init(&result);
-
- uint8_t a_sign = float32_is_neg(a);
- uint8_t b_sign = float32_is_neg(b);
- uint8_t c_sign = float32_is_neg(c);
- if (float32_is_infinity(a) ||
- float32_is_infinity(b) ||
- float32_is_infinity(c)) {
- return special_fmaf(a, b, c, fp_status);
- }
- if (float32_is_any_nan(a) ||
- float32_is_any_nan(b) ||
- float32_is_any_nan(c)) {
- return special_fmaf(a, b, c, fp_status);
- }
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
- float32 tmp = float32_mul(a, b, fp_status);
- tmp = float32_add(tmp, c, fp_status);
- return tmp;
- }
-
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
-
/*
- * Note: extracting the mantissa into an int is multiplying by
- * 2**23, so adjust here
+ * OK, now we might have carried all the way up.
+ * So we might need to shr once
+ * at least we know that the lsb should be zero if we rounded and
+ * got a carry out...
*/
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
- prod.sign = a_sign ^ b_sign;
- if (float32_is_zero(a) || float32_is_zero(b)) {
- prod.exp = -2 * WAY_BIG_EXP;
- }
- if ((scale > 0) && float32_is_denormal(c)) {
- acc.mant = int128_mul_6464(0, 0);
- acc.exp = -WAY_BIG_EXP;
- acc.sign = c_sign;
- acc.sticky = 1;
- result = accum_add(prod, acc);
- } else if (!float32_is_zero(c)) {
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
- acc.exp = float32_getexp(c);
- acc.sign = c_sign;
- result = accum_add(prod, acc);
- } else {
- result = prod;
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
+ a = accum_norm_right(a, 1);
+ }
+ /* Overflow? */
+ if (a.exp >= DF_INF_EXP) {
+ /* Yep, inf result */
+ float_raise(float_flag_overflow, fp_status);
+ float_raise(float_flag_inexact, fp_status);
+ switch (fp_status->float_rounding_mode) {
+ case float_round_to_zero:
+ return maxfinite_float64(a.sign);
+ case float_round_up:
+ if (a.sign == 0) {
+ return infinite_float64(a.sign);
+ } else {
+ return maxfinite_float64(a.sign);
+ }
+ case float_round_down:
+ if (a.sign != 0) {
+ return infinite_float64(a.sign);
+ } else {
+ return maxfinite_float64(a.sign);
+ }
+ default:
+ return infinite_float64(a.sign);
+ }
}
- result.exp += scale;
- return accum_round_float32(result, fp_status);
-}
-
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
-{
- if (float32_is_zero(a) || float32_is_zero(b)) {
- return float32_mul(a, b, fp_status);
+ /* Underflow? */
+ ret = int128_getlo(a.mant);
+ if (ret & (1ULL << DF_MANTBITS)) {
+ /* Leading one means: No, we're normal. So, we should be done... */
+ ret = deposit64(ret, 52, 11, a.exp);
+ } else {
+ assert(a.exp == 1);
+ ret = deposit64(ret, 52, 11, 0);
}
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
+ ret = deposit64(ret, 63, 1, a.sign);
+ return ret;
}
float64 internal_mpyhh(float64 a, float64 b,
@@ -685,7 +455,7 @@ float64 internal_mpyhh(float64 a, float64 b,
float64_is_infinity(b)) {
return float64_mul(a, b, fp_status);
}
- x.mant = int128_mul_6464(accumulated, 1);
+ x.mant = int128_make64(accumulated);
x.sticky = sticky;
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
index 91591d6..fed054b 100644
--- a/target/hexagon/fma_emu.h
+++ b/target/hexagon/fma_emu.h
@@ -30,9 +30,6 @@ static inline uint32_t float32_getexp_raw(float32 f32)
}
int32_t float32_getexp(float32 f32);
float32 infinite_float32(uint8_t sign);
-float32 internal_fmafx(float32 a, float32 b, float32 c,
- int scale, float_status *fp_status);
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
float_status *fp_status);
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 01d1a1b..6da8db8 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1045,7 +1045,7 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
{
float32 RdV;
arch_fpop_start(env);
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
arch_fpop_end(env);
return RdV;
}
@@ -1054,41 +1054,18 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
arch_fpop_start(env);
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
arch_fpop_end(env);
return RxV;
}
-static bool is_zero_prod(float32 a, float32 b)
-{
- return ((float32_is_zero(a) && is_finite(b)) ||
- (float32_is_zero(b) && is_finite(a)));
-}
-
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
-{
- float32 ret = dst;
- if (float32_is_any_nan(x)) {
- if (extract32(x, 22, 1) == 0) {
- float_raise(float_flag_invalid, fp_status);
- }
- ret = make_float32(0xffffffff); /* nan */
- }
- return ret;
-}
-
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV, float32 PuV)
{
- size4s_t tmp;
arch_fpop_start(env);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
+ float_muladd_suppress_add_product_zero,
+ &env->fp_status);
arch_fpop_end(env);
return RxV;
}
@@ -1096,86 +1073,50 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- float32 neg_RsV;
arch_fpop_start(env);
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
+ &env->fp_status);
arch_fpop_end(env);
return RxV;
}
-static bool is_inf_prod(int32_t a, int32_t b)
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
+ float32 RsV, float32 RtV, int negate)
{
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
-}
-
-float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
- float32 RsV, float32 RtV)
-{
- bool infinp;
- bool infminusinf;
- float32 tmp;
+ int flags;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- infminusinf = float32_is_infinity(RxV) &&
- is_inf_prod(RsV, RtV) &&
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
- infinp = float32_is_infinity(RxV) ||
- float32_is_infinity(RtV) ||
- float32_is_infinity(RsV);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
- set_float_exception_flags(0, &env->fp_status);
- if (float32_is_infinity(RxV) && !infinp) {
- RxV = RxV - 1;
- }
- if (infminusinf) {
- RxV = 0;
+
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV,
+ negate | float_muladd_suppress_add_product_zero,
+ &env->fp_status);
+
+ flags = get_float_exception_flags(&env->fp_status);
+ if (flags) {
+ /* Flags are suppressed by this instruction. */
+ set_float_exception_flags(0, &env->fp_status);
+
+ /* Return 0 for Inf - Inf. */
+ if (flags & float_flag_invalid_isi) {
+ RxV = 0;
+ }
}
+
arch_fpop_end(env);
return RxV;
}
-float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
+float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- bool infinp;
- bool infminusinf;
- float32 tmp;
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
+}
- arch_fpop_start(env);
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- infminusinf = float32_is_infinity(RxV) &&
- is_inf_prod(RsV, RtV) &&
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
- infinp = float32_is_infinity(RxV) ||
- float32_is_infinity(RtV) ||
- float32_is_infinity(RsV);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
- set_float_exception_flags(0, &env->fp_status);
- if (float32_is_infinity(RxV) && !infinp) {
- RxV = RxV - 1;
- }
- if (infminusinf) {
- RxV = 0;
- }
- arch_fpop_end(env);
- return RxV;
+float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
+ float32 RsV, float32 RtV)
+{
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
}
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 5621057..fe78587 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1026,8 +1026,8 @@ static const TranslatorOps hexagon_tr_ops = {
.tb_stop = hexagon_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index c9062e6..47d0160 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -223,6 +223,7 @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
static const TCGCPUOps hppa_tcg_ops = {
.initialize = hppa_translate_init,
+ .translate_code = hppa_translate_code,
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
.restore_state_to_opc = hppa_restore_state_to_opc,
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index e45ba50..22a6510 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -303,6 +303,8 @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
}
void hppa_translate_init(void);
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index d13f80f..dc04f9f 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -4869,8 +4869,8 @@ static const TranslatorOps hppa_tr_ops = {
#endif
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx = { };
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 696d6ef..54d8453 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -59,6 +59,8 @@ static inline target_long lshift(target_long x, int n)
/* translate.c */
void tcg_x86_init(void);
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
/* excp_helper.c */
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 231ecac..14ee038 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -109,6 +109,7 @@ static bool x86_debug_check_breakpoint(CPUState *cs)
static const TCGCPUOps x86_tcg_ops = {
.initialize = tcg_x86_init,
+ .translate_code = x86_translate_code,
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
.restore_state_to_opc = x86_restore_state_to_opc,
.cpu_exec_enter = x86_cpu_exec_enter,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 903553d..834aea1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3814,9 +3814,8 @@ static const TranslatorOps i386_tr_ops = {
.tb_stop = i386_tr_tb_stop,
};
-/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index f5bc872..58415ff 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -795,6 +795,7 @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
static const TCGCPUOps loongarch_tcg_ops = {
.initialize = loongarch_translate_init,
+ .translate_code = loongarch_translate_code,
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
.restore_state_to_opc = loongarch_restore_state_to_opc,
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 0655ac9..ad9cf4f 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -17,6 +17,8 @@
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
void loongarch_translate_init(void);
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void G_NORETURN do_raise_exception(CPULoongArchState *env,
uint32_t exception,
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 1fca4af..68be999 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -333,8 +333,8 @@ static const TranslatorOps loongarch_tr_ops = {
.tb_stop = loongarch_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 9de8ce6..41dfdf5 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -551,6 +551,7 @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
static const TCGCPUOps m68k_tcg_ops = {
.initialize = m68k_tcg_init,
+ .translate_code = m68k_translate_code,
.restore_state_to_opc = m68k_restore_state_to_opc,
#ifndef CONFIG_USER_ONLY
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index b5bbeed..ddb0f29 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -193,6 +193,8 @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void m68k_tcg_init(void);
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void m68k_cpu_init_gdb(M68kCPU *cpu);
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 077151c..dec2967 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -6118,8 +6118,8 @@ static const TranslatorOps m68k_tr_ops = {
.tb_stop = m68k_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index eba8193..f114789 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -423,6 +423,7 @@ static const struct SysemuCPUOps mb_sysemu_ops = {
static const TCGCPUOps mb_tcg_ops = {
.initialize = mb_tcg_init,
+ .translate_code = mb_translate_code,
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
.restore_state_to_opc = mb_restore_state_to_opc,
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index 3e5a3e5..f6879ee 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -398,6 +398,8 @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
}
void mb_tcg_init(void);
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index d53995c..24005f0 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -1779,8 +1779,8 @@ static const TranslatorOps mb_tr_ops = {
.tb_stop = mb_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 1b0cf6d..e3af02a 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -547,6 +547,7 @@ static const Property mips_cpu_properties[] = {
#include "hw/core/tcg-cpu-ops.h"
static const TCGCPUOps mips_tcg_ops = {
.initialize = mips_tcg_init,
+ .translate_code = mips_translate_code,
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
.restore_state_to_opc = mips_restore_state_to_opc,
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
index aef032c..74fc130 100644
--- a/target/mips/tcg/tcg-internal.h
+++ b/target/mips/tcg/tcg-internal.h
@@ -16,6 +16,8 @@
#include "cpu.h"
void mips_tcg_init(void);
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index bd1ef4e..78b848a 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -15231,8 +15231,8 @@ static const TranslatorOps mips_tr_ops = {
.tb_stop = mips_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 7913a0c..b7bab0d 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -236,6 +236,7 @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
static const TCGCPUOps openrisc_tcg_ops = {
.initialize = openrisc_translate_init,
+ .translate_code = openrisc_translate_code,
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
.restore_state_to_opc = openrisc_restore_state_to_opc,
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index c9fe9ae..b97d2ff 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -301,6 +301,8 @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void openrisc_translate_init(void);
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
#ifndef CONFIG_USER_ONLY
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index 028ba66..7a6af18 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -1646,8 +1646,8 @@ static const TranslatorOps openrisc_tr_ops = {
.tb_stop = openrisc_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 2ffac2e..0b8b4c0 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1581,6 +1581,8 @@ extern const VMStateDescription vmstate_ppc_cpu;
/*****************************************************************************/
void ppc_translate_init(void);
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#if !defined(CONFIG_USER_ONLY)
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 0fcef63..c05c2dc 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7431,6 +7431,7 @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
static const TCGCPUOps ppc_tcg_ops = {
.initialize = ppc_translate_init,
+ .translate_code = ppc_translate_code,
.restore_state_to_opc = ppc_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8ab87f4..80638ab 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6669,8 +6669,8 @@ static const TranslatorOps ppc_tr_ops = {
.tb_stop = ppc_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 284b112..252fdb8 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -602,6 +602,9 @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
void riscv_translate_init(void);
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
+
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
uint32_t exception, uintptr_t pc);
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index f012981..8b89c99 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -135,6 +135,7 @@ static void riscv_restore_state_to_opc(CPUState *cs,
static const TCGCPUOps riscv_tcg_ops = {
.initialize = riscv_translate_init,
+ .translate_code = riscv_translate_code,
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
.restore_state_to_opc = riscv_restore_state_to_opc,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a76f67c..a992d4f 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1346,8 +1346,8 @@ static const TranslatorOps riscv_tr_ops = {
.tb_stop = riscv_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 558280c..8c50c7a 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -196,6 +196,7 @@ static const struct SysemuCPUOps rx_sysemu_ops = {
static const TCGCPUOps rx_tcg_ops = {
.initialize = rx_translate_init,
+ .translate_code = rx_translate_code,
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
.restore_state_to_opc = rx_restore_state_to_opc,
.tlb_fill = rx_cpu_tlb_fill,
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index c53593d..5ba1874 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -139,6 +139,8 @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void rx_translate_init(void);
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
#include "exec/cpu-all.h"
diff --git a/target/rx/translate.c b/target/rx/translate.c
index 4f43654..bbda703 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -2258,8 +2258,8 @@ static const TranslatorOps rx_tr_ops = {
.tb_stop = rx_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 0a6847b..97d41c2 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -362,6 +362,7 @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
static const TCGCPUOps s390_tcg_ops = {
.initialize = s390x_translate_init,
+ .translate_code = s390x_translate_code,
.restore_state_to_opc = s390x_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index 4cc4350..a750e7a 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -399,6 +399,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
/* translate.c */
void s390x_translate_init(void);
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void s390x_restore_state_to_opc(CPUState *cs,
const TranslationBlock *tb,
const uint64_t *data);
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 81554f2..00073c5 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6481,8 +6481,8 @@ static const TranslatorOps s390x_tr_ops = {
.disas_log = s390x_tr_disas_log,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc;
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index e9d3e12..24a2272 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -251,6 +251,7 @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
static const TCGCPUOps superh_tcg_ops = {
.initialize = sh4_translate_init,
+ .translate_code = sh4_translate_code,
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
.restore_state_to_opc = superh_restore_state_to_opc,
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index d928bcf..d536d5d 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -248,6 +248,8 @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
uintptr_t retaddr);
void sh4_translate_init(void);
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
#if !defined(CONFIG_USER_ONLY)
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index f076da9..bcdd558 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -2318,8 +2318,8 @@ static const TranslatorOps sh4_tr_ops = {
.tb_stop = sh4_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 373a335..fbd38ec 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -996,6 +996,7 @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
static const TCGCPUOps sparc_tcg_ops = {
.initialize = sparc_tcg_init,
+ .translate_code = sparc_translate_code,
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
.restore_state_to_opc = sparc_restore_state_to_opc,
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 5c98123..dda8115 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -609,6 +609,8 @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
/* translate.c */
void sparc_tcg_init(void);
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
/* fop_helper.c */
target_ulong cpu_get_fsr(CPUSPARCState *);
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 236d27b..c25097d 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -344,17 +344,17 @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
}
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
- float32 s2, float32 s3, uint32_t op)
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
{
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
check_ieee_exceptions(env, GETPC());
return ret;
}
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
- float64 s2, float64 s3, uint32_t op)
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
{
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
check_ieee_exceptions(env, GETPC());
return ret;
}
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 1ae3f0c..3a7f7dc 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -59,7 +59,7 @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
@@ -72,7 +72,7 @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 9be26c8..7e5c735 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -1359,93 +1359,109 @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+ TCGv_i32 z = tcg_constant_i32(0);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
}
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+ TCGv_i32 z = tcg_constant_i32(0);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
}
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_c;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_c;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_c | float_muladd_negate_result;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
+ float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_c | float_muladd_negate_result;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
+ float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_result;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_result;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(0);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(0);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_negate_c | float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_negate_c | float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_negate_result | float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_negate_result | float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
@@ -5803,8 +5819,8 @@ static const TranslatorOps sparc_tr_ops = {
.tb_stop = sparc_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc = {};
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index 95fb546..95202fa 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -172,6 +172,7 @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
static const TCGCPUOps tricore_tcg_ops = {
.initialize = tricore_tcg_init,
+ .translate_code = tricore_translate_code,
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
.restore_state_to_opc = tricore_restore_state_to_opc,
.tlb_fill = tricore_cpu_tlb_fill,
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 220af69..8e431d7 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -252,6 +252,8 @@ FIELD(TB_FLAGS, PRIV, 0, 2)
void cpu_state_reset(CPUTriCoreState *s);
void tricore_tcg_init(void);
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
uint64_t *cs_base, uint32_t *flags)
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index 2b67395..0ef3743 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -8460,9 +8460,8 @@ static const TranslatorOps tricore_tr_ops = {
.tb_stop = tricore_tr_tb_stop,
};
-
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext ctx;
translator_loop(cs, tb, max_insns, pc, host_pc,
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index 0d4d79b..0910a3d 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -232,6 +232,7 @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
static const TCGCPUOps xtensa_tcg_ops = {
.initialize = xtensa_translate_init,
+ .translate_code = xtensa_translate_code,
.debug_excp_handler = xtensa_breakpoint_handler,
.restore_state_to_opc = xtensa_restore_state_to_opc,
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 77e48ee..0e6302c 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -617,6 +617,8 @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
void xtensa_collect_sr_names(const XtensaConfig *config);
void xtensa_translate_init(void);
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc);
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
void xtensa_breakpoint_handler(CPUState *cs);
void xtensa_register_core(XtensaConfigList *node);
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 3c62c99..4f02cef 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -1228,8 +1228,8 @@ static const TranslatorOps xtensa_translator_ops = {
.tb_stop = xtensa_tr_tb_stop,
};
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- vaddr pc, void *host_pc)
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
+ int *max_insns, vaddr pc, void *host_pc)
{
DisasContext dc = {};
translator_loop(cpu, tb, max_insns, pc, host_pc,
diff --git a/tcg/optimize.c b/tcg/optimize.c
index e9ef16b..c23f0d1 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -52,7 +52,7 @@ typedef struct TempOptInfo {
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
uint64_t val;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
} TempOptInfo;
typedef struct OptContext {
@@ -64,70 +64,42 @@ typedef struct OptContext {
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
/* In flight values from optimization. */
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
- uint64_t s_mask; /* mask of clrsb(value) bits */
TCGType type;
} OptContext;
-/* Calculate the smask for a specific value. */
-static uint64_t smask_from_value(uint64_t value)
+static inline TempOptInfo *ts_info(TCGTemp *ts)
{
- int rep = clrsb64(value);
- return ~(~0ull >> rep);
+ return ts->state_ptr;
}
-/*
- * Calculate the smask for a given set of known-zeros.
- * If there are lots of zeros on the left, we can consider the remainder
- * an unsigned field, and thus the corresponding signed field is one bit
- * larger.
- */
-static uint64_t smask_from_zmask(uint64_t zmask)
+static inline TempOptInfo *arg_info(TCGArg arg)
{
- /*
- * Only the 0 bits are significant for zmask, thus the msb itself
- * must be zero, else we have no sign information.
- */
- int rep = clz64(zmask);
- if (rep == 0) {
- return 0;
- }
- rep -= 1;
- return ~(~0ull >> rep);
+ return ts_info(arg_temp(arg));
}
-/*
- * Recreate a properly left-aligned smask after manipulation.
- * Some bit-shuffling, particularly shifts and rotates, may
- * retain sign bits on the left, but may scatter disconnected
- * sign bits on the right. Retain only what remains to the left.
- */
-static uint64_t smask_from_smask(int64_t smask)
+static inline bool ti_is_const(TempOptInfo *ti)
{
- /* Only the 1 bits are significant for smask */
- return smask_from_zmask(~smask);
+ return ti->is_const;
}
-static inline TempOptInfo *ts_info(TCGTemp *ts)
+static inline uint64_t ti_const_val(TempOptInfo *ti)
{
- return ts->state_ptr;
+ return ti->val;
}
-static inline TempOptInfo *arg_info(TCGArg arg)
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
{
- return ts_info(arg_temp(arg));
+ return ti_is_const(ti) && ti_const_val(ti) == val;
}
static inline bool ts_is_const(TCGTemp *ts)
{
- return ts_info(ts)->is_const;
+ return ti_is_const(ts_info(ts));
}
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
{
- TempOptInfo *ti = ts_info(ts);
- return ti->is_const && ti->val == val;
+ return ti_is_const_val(ts_info(ts), val);
}
static inline bool arg_is_const(TCGArg arg)
@@ -174,7 +146,7 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
ti->is_const = true;
ti->val = ts->val;
ti->z_mask = ts->val;
- ti->s_mask = smask_from_value(ts->val);
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
} else {
ti->is_const = false;
ti->z_mask = -1;
@@ -964,37 +936,31 @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
}
}
-static void finish_folding(OptContext *ctx, TCGOp *op)
+static void finish_bb(OptContext *ctx)
+{
+ /* We only optimize memory barriers across basic blocks. */
+ ctx->prev_mb = NULL;
+}
+
+static void finish_ebb(OptContext *ctx)
+{
+ finish_bb(ctx);
+ /* We only optimize across extended basic blocks. */
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+ remove_mem_copy_all(ctx);
+}
+
+static bool finish_folding(OptContext *ctx, TCGOp *op)
{
const TCGOpDef *def = &tcg_op_defs[op->opc];
int i, nb_oargs;
- /*
- * We only optimize extended basic blocks. If the opcode ends a BB
- * and is not a conditional branch, reset all temp data.
- */
- if (def->flags & TCG_OPF_BB_END) {
- ctx->prev_mb = NULL;
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
- remove_mem_copy_all(ctx);
- }
- return;
- }
-
nb_oargs = def->nb_oargs;
for (i = 0; i < nb_oargs; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
reset_ts(ctx, ts);
- /*
- * Save the corresponding known-zero/sign bits mask for the
- * first output argument (only one supported so far).
- */
- if (i == 0) {
- ts_info(ts)->z_mask = ctx->z_mask;
- ts_info(ts)->s_mask = ctx->s_mask;
- }
}
+ return true;
}
/*
@@ -1044,11 +1010,22 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
return fold_const2(ctx, op);
}
-static bool fold_masks(OptContext *ctx, TCGOp *op)
+/*
+ * Record "zero" and "sign" masks for the single output of @op.
+ * See TempOptInfo definition of z_mask and s_mask.
+ * If z_mask allows, fold the output to constant zero.
+ * The passed s_mask may be augmented by z_mask.
+ */
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, int64_t s_mask)
{
- uint64_t a_mask = ctx->a_mask;
- uint64_t z_mask = ctx->z_mask;
- uint64_t s_mask = ctx->s_mask;
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
+ TCGTemp *ts;
+ TempOptInfo *ti;
+ int rep;
+
+ /* Only single-output opcodes are supported here. */
+ tcg_debug_assert(def->nb_oargs == 1);
/*
* 32-bit ops generate 32-bit results, which for the purpose of
@@ -1058,16 +1035,49 @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
* type changing opcodes.
*/
if (ctx->type == TCG_TYPE_I32) {
- a_mask = (int32_t)a_mask;
z_mask = (int32_t)z_mask;
- s_mask |= MAKE_64BIT_MASK(32, 32);
- ctx->z_mask = z_mask;
- ctx->s_mask = s_mask;
+ s_mask |= INT32_MIN;
}
if (z_mask == 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
}
+
+ ts = arg_temp(op->args[0]);
+ reset_ts(ctx, ts);
+
+ ti = ts_info(ts);
+ ti->z_mask = z_mask;
+
+ /* Canonicalize s_mask and incorporate data from z_mask. */
+ rep = clz64(~s_mask);
+ rep = MAX(rep, clz64(z_mask));
+ rep = MAX(rep - 1, 0);
+ ti->s_mask = INT64_MIN >> rep;
+
+ return true;
+}
+
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
+{
+ return fold_masks_zs(ctx, op, z_mask, 0);
+}
+
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
+{
+ return fold_masks_zs(ctx, op, -1, s_mask);
+}
+
+/*
+ * An "affected" mask bit is 0 if and only if the result is identical
+ * to the first input. Thus if the entire mask is 0, the operation
+ * is equivalent to a copy.
+ */
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
+{
+ if (ctx->type == TCG_TYPE_I32) {
+ a_mask = (uint32_t)a_mask;
+ }
if (a_mask == 0) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
}
@@ -1183,13 +1193,17 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
* 3) those that produce information about the result value.
*/
+static bool fold_or(OptContext *ctx, TCGOp *op);
+static bool fold_orc(OptContext *ctx, TCGOp *op);
+static bool fold_xor(OptContext *ctx, TCGOp *op);
+
static bool fold_add(OptContext *ctx, TCGOp *op)
{
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, 0)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
/* We cannot as yet do_constant_folding with vectors. */
@@ -1199,7 +1213,7 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
fold_xi_to_x(ctx, op, 0)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
@@ -1266,7 +1280,7 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
op->args[4] = arg_new_constant(ctx, bl);
op->args[5] = arg_new_constant(ctx, bh);
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_add2(OptContext *ctx, TCGOp *op)
@@ -1280,7 +1294,8 @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
static bool fold_and(OptContext *ctx, TCGOp *op)
{
- uint64_t z1, z2;
+ uint64_t z1, z2, z_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_i(ctx, op, 0) ||
@@ -1289,31 +1304,34 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
return true;
}
- z1 = arg_info(op->args[1])->z_mask;
- z2 = arg_info(op->args[2])->z_mask;
- ctx->z_mask = z1 & z2;
-
- /*
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
- * Bitwise operations preserve the relative quantity of the repetitions.
- */
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+ z1 = t1->z_mask;
+ z2 = t2->z_mask;
/*
* Known-zeros does not imply known-ones. Therefore unless
* arg2 is constant, we can't infer affected bits from it.
*/
- if (arg_is_const(op->args[2])) {
- ctx->a_mask = z1 & ~z2;
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
+ return true;
}
- return fold_masks(ctx, op);
+ z_mask = z1 & z2;
+
+ /*
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
+ * Bitwise operations preserve the relative quantity of the repetitions.
+ */
+ s_mask = t1->s_mask & t2->s_mask;
+
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_andc(OptContext *ctx, TCGOp *op)
{
- uint64_t z1;
+ uint64_t z_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
fold_xx_to_i(ctx, op, 0) ||
@@ -1322,22 +1340,79 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
return true;
}
- z1 = arg_info(op->args[1])->z_mask;
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+ z_mask = t1->z_mask;
/*
* Known-zeros does not imply known-ones. Therefore unless
* arg2 is constant, we can't infer anything from it.
*/
- if (arg_is_const(op->args[2])) {
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
- ctx->a_mask = z1 & ~z2;
- z1 &= z2;
+ if (ti_is_const(t2)) {
+ uint64_t v2 = ti_const_val(t2);
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
+ return true;
+ }
+ z_mask &= ~v2;
}
- ctx->z_mask = z1;
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks(ctx, op);
+ s_mask = t1->s_mask & t2->s_mask;
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
+}
+
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
+{
+ /* If true and false values are the same, eliminate the cmp. */
+ if (args_are_copies(op->args[2], op->args[3])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
+ }
+
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+ uint64_t tv = arg_info(op->args[2])->val;
+ uint64_t fv = arg_info(op->args[3])->val;
+
+ if (tv == -1 && fv == 0) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+ if (tv == 0 && fv == -1) {
+ if (TCG_TARGET_HAS_not_vec) {
+ op->opc = INDEX_op_not_vec;
+ return fold_not(ctx, op);
+ } else {
+ op->opc = INDEX_op_xor_vec;
+ op->args[2] = arg_new_constant(ctx, -1);
+ return fold_xor(ctx, op);
+ }
+ }
+ }
+ if (arg_is_const(op->args[2])) {
+ uint64_t tv = arg_info(op->args[2])->val;
+ if (tv == -1) {
+ op->opc = INDEX_op_or_vec;
+ op->args[2] = op->args[3];
+ return fold_or(ctx, op);
+ }
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
+ op->opc = INDEX_op_andc_vec;
+ op->args[2] = op->args[1];
+ op->args[1] = op->args[3];
+ return fold_andc(ctx, op);
+ }
+ }
+ if (arg_is_const(op->args[3])) {
+ uint64_t fv = arg_info(op->args[3])->val;
+ if (fv == 0) {
+ op->opc = INDEX_op_and_vec;
+ return fold_and(ctx, op);
+ }
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
+ op->opc = INDEX_op_orc_vec;
+ op->args[2] = op->args[1];
+ op->args[1] = op->args[3];
+ return fold_orc(ctx, op);
+ }
+ }
+ return finish_folding(ctx, op);
}
static bool fold_brcond(OptContext *ctx, TCGOp *op)
@@ -1351,8 +1426,11 @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
if (i > 0) {
op->opc = INDEX_op_br;
op->args[0] = op->args[3];
+ finish_ebb(ctx);
+ } else {
+ finish_bb(ctx);
}
- return false;
+ return true;
}
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
@@ -1443,24 +1521,27 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
}
op->opc = INDEX_op_br;
op->args[0] = label;
- break;
+ finish_ebb(ctx);
+ return true;
}
- return false;
+
+ finish_bb(ctx);
+ return true;
}
static bool fold_bswap(OptContext *ctx, TCGOp *op)
{
uint64_t z_mask, s_mask, sign;
+ TempOptInfo *t1 = arg_info(op->args[1]);
- if (arg_is_const(op->args[1])) {
- uint64_t t = arg_info(op->args[1])->val;
-
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ if (ti_is_const(t1)) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
+ do_constant_folding(op->opc, ctx->type,
+ ti_const_val(t1),
+ op->args[2]));
}
- z_mask = arg_info(op->args[1])->z_mask;
-
+ z_mask = t1->z_mask;
switch (op->opc) {
case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
@@ -1479,8 +1560,8 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
default:
g_assert_not_reached();
}
- s_mask = smask_from_zmask(z_mask);
+ s_mask = 0;
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
case TCG_BSWAP_OZ:
break;
@@ -1488,19 +1569,17 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
/* If the sign bit may be 1, force all the bits above to 1. */
if (z_mask & sign) {
z_mask |= sign;
- s_mask = sign << 1;
}
+ /* The value and therefore s_mask is explicitly sign-extended. */
+ s_mask = sign;
break;
default:
/* The high bits are undefined: force all bits above the sign to 1. */
z_mask |= sign << 1;
- s_mask = 0;
break;
}
- ctx->z_mask = z_mask;
- ctx->s_mask = s_mask;
- return fold_masks(ctx, op);
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_call(OptContext *ctx, TCGOp *op)
@@ -1540,12 +1619,44 @@ static bool fold_call(OptContext *ctx, TCGOp *op)
return true;
}
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
+{
+ /* Canonicalize the comparison to put immediate second. */
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[3] = tcg_swap_cond(op->args[3]);
+ }
+ return finish_folding(ctx, op);
+}
+
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
+{
+ /* If true and false values are the same, eliminate the cmp. */
+ if (args_are_copies(op->args[3], op->args[4])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
+ }
+
+ /* Canonicalize the comparison to put immediate second. */
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[5] = tcg_swap_cond(op->args[5]);
+ }
+ /*
+ * Canonicalize the "false" input reg to match the destination,
+ * so that the tcg backend can implement "move if true".
+ */
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+ op->args[5] = tcg_invert_cond(op->args[5]);
+ }
+ return finish_folding(ctx, op);
+}
+
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask;
+ uint64_t z_mask, s_mask;
+ TempOptInfo *t1 = arg_info(op->args[1]);
+ TempOptInfo *t2 = arg_info(op->args[2]);
- if (arg_is_const(op->args[1])) {
- uint64_t t = arg_info(op->args[1])->val;
+ if (ti_is_const(t1)) {
+ uint64_t t = ti_const_val(t1);
if (t != 0) {
t = do_constant_folding(op->opc, ctx->type, t, 0);
@@ -1564,79 +1675,91 @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
default:
g_assert_not_reached();
}
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
- return false;
+ s_mask = ~z_mask;
+ z_mask |= t2->z_mask;
+ s_mask &= t2->s_mask;
+
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
{
+ uint64_t z_mask;
+
if (fold_const1(ctx, op)) {
return true;
}
switch (ctx->type) {
case TCG_TYPE_I32:
- ctx->z_mask = 32 | 31;
+ z_mask = 32 | 31;
break;
case TCG_TYPE_I64:
- ctx->z_mask = 64 | 63;
+ z_mask = 64 | 63;
break;
default:
g_assert_not_reached();
}
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
- return false;
+ return fold_masks_z(ctx, op, z_mask);
}
static bool fold_deposit(OptContext *ctx, TCGOp *op)
{
+ TempOptInfo *t1 = arg_info(op->args[1]);
+ TempOptInfo *t2 = arg_info(op->args[2]);
+ int ofs = op->args[3];
+ int len = op->args[4];
+ int width;
TCGOpcode and_opc;
+ uint64_t z_mask, s_mask;
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t t1 = arg_info(op->args[1])->val;
- uint64_t t2 = arg_info(op->args[2])->val;
-
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+ if (ti_is_const(t1) && ti_is_const(t2)) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
+ deposit64(ti_const_val(t1), ofs, len,
+ ti_const_val(t2)));
}
switch (ctx->type) {
case TCG_TYPE_I32:
and_opc = INDEX_op_and_i32;
+ width = 32;
break;
case TCG_TYPE_I64:
and_opc = INDEX_op_and_i64;
+ width = 64;
break;
default:
g_assert_not_reached();
}
/* Inserting a value into zero at offset 0. */
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
op->opc = and_opc;
op->args[1] = op->args[2];
op->args[2] = arg_new_constant(ctx, mask);
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
- return false;
+ return fold_and(ctx, op);
}
/* Inserting zero into a value. */
- if (arg_is_const_val(op->args[2], 0)) {
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
+ if (ti_is_const_val(t2, 0)) {
+ uint64_t mask = deposit64(-1, ofs, len, 0);
op->opc = and_opc;
op->args[2] = arg_new_constant(ctx, mask);
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
- return false;
+ return fold_and(ctx, op);
}
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
- op->args[3], op->args[4],
- arg_info(op->args[2])->z_mask);
- return false;
+ /* The s_mask from the top portion of the deposit is still valid. */
+ if (ofs + len == width) {
+ s_mask = t2->s_mask << ofs;
+ } else {
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
+ }
+
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_divide(OptContext *ctx, TCGOp *op)
@@ -1645,7 +1768,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
fold_xi_to_x(ctx, op, 1)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_dup(OptContext *ctx, TCGOp *op)
@@ -1655,7 +1778,7 @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
t = dup_const(TCGOP_VECE(op), t);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_dup2(OptContext *ctx, TCGOp *op)
@@ -1670,45 +1793,43 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
op->opc = INDEX_op_dup_vec;
TCGOP_VECE(op) = MO_32;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_eqv(OptContext *ctx, TCGOp *op)
{
+ uint64_t s_mask;
+
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, -1) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return false;
+ s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks_s(ctx, op, s_mask);
}
static bool fold_extract(OptContext *ctx, TCGOp *op)
{
uint64_t z_mask_old, z_mask;
+ TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
- if (arg_is_const(op->args[1])) {
- uint64_t t;
-
- t = arg_info(op->args[1])->val;
- t = extract64(t, pos, len);
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ if (ti_is_const(t1)) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
+ extract64(ti_const_val(t1), pos, len));
}
- z_mask_old = arg_info(op->args[1])->z_mask;
+ z_mask_old = t1->z_mask;
z_mask = extract64(z_mask_old, pos, len);
- if (pos == 0) {
- ctx->a_mask = z_mask_old ^ z_mask;
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
+ return true;
}
- ctx->z_mask = z_mask;
- ctx->s_mask = smask_from_zmask(z_mask);
- return fold_masks(ctx, op);
+ return fold_masks_z(ctx, op, z_mask);
}
static bool fold_extract2(OptContext *ctx, TCGOp *op)
@@ -1727,54 +1848,49 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
}
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_exts(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask_old, s_mask, z_mask, sign;
+ uint64_t s_mask_old, s_mask, z_mask;
bool type_change = false;
+ TempOptInfo *t1;
if (fold_const1(ctx, op)) {
return true;
}
- z_mask = arg_info(op->args[1])->z_mask;
- s_mask = arg_info(op->args[1])->s_mask;
+ t1 = arg_info(op->args[1]);
+ z_mask = t1->z_mask;
+ s_mask = t1->s_mask;
s_mask_old = s_mask;
switch (op->opc) {
CASE_OP_32_64(ext8s):
- sign = INT8_MIN;
- z_mask = (uint8_t)z_mask;
+ s_mask |= INT8_MIN;
+ z_mask = (int8_t)z_mask;
break;
CASE_OP_32_64(ext16s):
- sign = INT16_MIN;
- z_mask = (uint16_t)z_mask;
+ s_mask |= INT16_MIN;
+ z_mask = (int16_t)z_mask;
break;
case INDEX_op_ext_i32_i64:
type_change = true;
QEMU_FALLTHROUGH;
case INDEX_op_ext32s_i64:
- sign = INT32_MIN;
- z_mask = (uint32_t)z_mask;
+ s_mask |= INT32_MIN;
+ z_mask = (int32_t)z_mask;
break;
default:
g_assert_not_reached();
}
- if (z_mask & sign) {
- z_mask |= sign;
- }
- s_mask |= sign << 1;
-
- ctx->z_mask = z_mask;
- ctx->s_mask = s_mask;
- if (!type_change) {
- ctx->a_mask = s_mask & ~s_mask_old;
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
+ return true;
}
- return fold_masks(ctx, op);
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_extu(OptContext *ctx, TCGOp *op)
@@ -1810,12 +1926,11 @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
g_assert_not_reached();
}
- ctx->z_mask = z_mask;
- ctx->s_mask = smask_from_zmask(z_mask);
- if (!type_change) {
- ctx->a_mask = z_mask_old ^ z_mask;
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
+ return true;
}
- return fold_masks(ctx, op);
+
+ return fold_masks_z(ctx, op, z_mask);
}
static bool fold_mb(OptContext *ctx, TCGOp *op)
@@ -1849,6 +1964,8 @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
+ uint64_t z_mask, s_mask;
+ TempOptInfo *tt, *ft;
int i;
/* If true and false values are the same, eliminate the cmp. */
@@ -1870,14 +1987,14 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
}
- ctx->z_mask = arg_info(op->args[3])->z_mask
- | arg_info(op->args[4])->z_mask;
- ctx->s_mask = arg_info(op->args[3])->s_mask
- & arg_info(op->args[4])->s_mask;
+ tt = arg_info(op->args[3]);
+ ft = arg_info(op->args[4]);
+ z_mask = tt->z_mask | ft->z_mask;
+ s_mask = tt->s_mask & ft->s_mask;
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
- uint64_t tv = arg_info(op->args[3])->val;
- uint64_t fv = arg_info(op->args[4])->val;
+ if (ti_is_const(tt) && ti_is_const(ft)) {
+ uint64_t tv = ti_const_val(tt);
+ uint64_t fv = ti_const_val(ft);
TCGOpcode opc, negopc = 0;
TCGCond cond = op->args[5];
@@ -1916,7 +2033,8 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
}
}
}
- return false;
+
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_mul(OptContext *ctx, TCGOp *op)
@@ -1926,7 +2044,7 @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
fold_xi_to_x(ctx, op, 1)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
@@ -1935,7 +2053,7 @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
fold_xi_to_i(ctx, op, 0)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
@@ -1980,33 +2098,30 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
tcg_opt_gen_movi(ctx, op2, rh, h);
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_nand(OptContext *ctx, TCGOp *op)
{
+ uint64_t s_mask;
+
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, -1)) {
return true;
}
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return false;
+ s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks_s(ctx, op, s_mask);
}
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
{
/* Set to 1 all bits to the left of the rightmost. */
uint64_t z_mask = arg_info(op->args[1])->z_mask;
- ctx->z_mask = -(z_mask & -z_mask);
+ z_mask = -(z_mask & -z_mask);
- /*
- * Because of fold_sub_to_neg, we want to always return true,
- * via finish_folding.
- */
- finish_folding(ctx, op);
- return true;
+ return fold_masks_z(ctx, op, z_mask);
}
static bool fold_neg(OptContext *ctx, TCGOp *op)
@@ -2016,14 +2131,16 @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
static bool fold_nor(OptContext *ctx, TCGOp *op)
{
+ uint64_t s_mask;
+
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return false;
+ s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks_s(ctx, op, s_mask);
}
static bool fold_not(OptContext *ctx, TCGOp *op)
@@ -2031,31 +2148,31 @@ static bool fold_not(OptContext *ctx, TCGOp *op)
if (fold_const1(ctx, op)) {
return true;
}
-
- ctx->s_mask = arg_info(op->args[1])->s_mask;
-
- /* Because of fold_to_not, we want to always return true, via finish. */
- finish_folding(ctx, op);
- return true;
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
}
static bool fold_or(OptContext *ctx, TCGOp *op)
{
+ uint64_t z_mask, s_mask;
+ TempOptInfo *t1, *t2;
+
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, 0) ||
fold_xx_to_x(ctx, op)) {
return true;
}
- ctx->z_mask = arg_info(op->args[1])->z_mask
- | arg_info(op->args[2])->z_mask;
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks(ctx, op);
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+ z_mask = t1->z_mask | t2->z_mask;
+ s_mask = t1->s_mask & t2->s_mask;
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_orc(OptContext *ctx, TCGOp *op)
{
+ uint64_t s_mask;
+
if (fold_const2(ctx, op) ||
fold_xx_to_i(ctx, op, -1) ||
fold_xi_to_x(ctx, op, -1) ||
@@ -2063,36 +2180,45 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
return true;
}
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return false;
+ s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks_s(ctx, op, s_mask);
}
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
{
const TCGOpDef *def = &tcg_op_defs[op->opc];
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
MemOp mop = get_memop(oi);
int width = 8 * memop_size(mop);
+ uint64_t z_mask = -1, s_mask = 0;
if (width < 64) {
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
- if (!(mop & MO_SIGN)) {
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
- ctx->s_mask <<= 1;
+ if (mop & MO_SIGN) {
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
+ } else {
+ z_mask = MAKE_64BIT_MASK(0, width);
}
}
/* Opcodes that touch guest memory stop the mb optimization. */
ctx->prev_mb = NULL;
- return false;
+
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
+}
+
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
+{
+ /* Opcodes that touch guest memory stop the mb optimization. */
+ ctx->prev_mb = NULL;
+ return finish_folding(ctx, op);
}
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
{
/* Opcodes that touch guest memory stop the mb optimization. */
ctx->prev_mb = NULL;
- return false;
+ return true;
}
static bool fold_remainder(OptContext *ctx, TCGOp *op)
@@ -2101,10 +2227,11 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
fold_xx_to_i(ctx, op, 0)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
{
uint64_t a_zmask, b_val;
TCGCond cond;
@@ -2199,11 +2326,10 @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
op->opc = xor_opc;
op->args[2] = arg_new_constant(ctx, 1);
}
- return false;
+ return -1;
}
}
-
- return false;
+ return 0;
}
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
@@ -2308,14 +2434,15 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
- if (fold_setcond_zmask(ctx, op, false)) {
+ i = fold_setcond_zmask(ctx, op, false);
+ if (i > 0) {
return true;
}
- fold_setcond_tst_pow2(ctx, op, false);
+ if (i == 0) {
+ fold_setcond_tst_pow2(ctx, op, false);
+ }
- ctx->z_mask = 1;
- ctx->s_mask = smask_from_zmask(1);
- return false;
+ return fold_masks_z(ctx, op, 1);
}
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
@@ -2326,14 +2453,16 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
}
- if (fold_setcond_zmask(ctx, op, true)) {
+ i = fold_setcond_zmask(ctx, op, true);
+ if (i > 0) {
return true;
}
- fold_setcond_tst_pow2(ctx, op, true);
+ if (i == 0) {
+ fold_setcond_tst_pow2(ctx, op, true);
+ }
/* Value is {0,-1} so all bits are repetitions of the sign. */
- ctx->s_mask = -1;
- return false;
+ return fold_masks_s(ctx, op, -1);
}
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
@@ -2414,77 +2543,40 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
return fold_setcond(ctx, op);
}
- ctx->z_mask = 1;
- ctx->s_mask = smask_from_zmask(1);
- return false;
+ return fold_masks_z(ctx, op, 1);
do_setcond_const:
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
-{
- /* Canonicalize the comparison to put immediate second. */
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
- op->args[3] = tcg_swap_cond(op->args[3]);
- }
- return false;
-}
-
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
-{
- /* If true and false values are the same, eliminate the cmp. */
- if (args_are_copies(op->args[3], op->args[4])) {
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
- }
-
- /* Canonicalize the comparison to put immediate second. */
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
- op->args[5] = tcg_swap_cond(op->args[5]);
- }
- /*
- * Canonicalize the "false" input reg to match the destination,
- * so that the tcg backend can implement "move if true".
- */
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
- op->args[5] = tcg_invert_cond(op->args[5]);
- }
- return false;
-}
-
static bool fold_sextract(OptContext *ctx, TCGOp *op)
{
uint64_t z_mask, s_mask, s_mask_old;
+ TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
- if (arg_is_const(op->args[1])) {
- uint64_t t;
-
- t = arg_info(op->args[1])->val;
- t = sextract64(t, pos, len);
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ if (ti_is_const(t1)) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
+ sextract64(ti_const_val(t1), pos, len));
}
- z_mask = arg_info(op->args[1])->z_mask;
- z_mask = sextract64(z_mask, pos, len);
- ctx->z_mask = z_mask;
+ s_mask_old = t1->s_mask;
+ s_mask = s_mask_old >> pos;
+ s_mask |= -1ull << (len - 1);
- s_mask_old = arg_info(op->args[1])->s_mask;
- s_mask = sextract64(s_mask_old, pos, len);
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
- ctx->s_mask = s_mask;
-
- if (pos == 0) {
- ctx->a_mask = s_mask & ~s_mask_old;
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
+ return true;
}
- return fold_masks(ctx, op);
+ z_mask = sextract64(t1->z_mask, pos, len);
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_shift(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask, z_mask, sign;
+ uint64_t s_mask, z_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
fold_ix_to_i(ctx, op, 0) ||
@@ -2492,18 +2584,18 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
return true;
}
- s_mask = arg_info(op->args[1])->s_mask;
- z_mask = arg_info(op->args[1])->z_mask;
-
- if (arg_is_const(op->args[2])) {
- int sh = arg_info(op->args[2])->val;
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+ s_mask = t1->s_mask;
+ z_mask = t1->z_mask;
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+ if (ti_is_const(t2)) {
+ int sh = ti_const_val(t2);
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
- ctx->s_mask = smask_from_smask(s_mask);
- return fold_masks(ctx, op);
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
switch (op->opc) {
@@ -2512,23 +2604,21 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
* Arithmetic right shift will not reduce the number of
* input sign repetitions.
*/
- ctx->s_mask = s_mask;
- break;
+ return fold_masks_s(ctx, op, s_mask);
CASE_OP_32_64(shr):
/*
* If the sign bit is known zero, then logical right shift
- * will not reduced the number of input sign repetitions.
+ * will not reduce the number of input sign repetitions.
*/
- sign = (s_mask & -s_mask) >> 1;
- if (sign && !(z_mask & sign)) {
- ctx->s_mask = s_mask;
+ if (~z_mask & -s_mask) {
+ return fold_masks_s(ctx, op, s_mask);
}
break;
default:
break;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
@@ -2575,12 +2665,15 @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
fold_sub_to_neg(ctx, op)) {
return true;
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_sub(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 0) ||
+ fold_sub_to_neg(ctx, op)) {
return true;
}
@@ -2592,7 +2685,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
? INDEX_op_add_i32 : INDEX_op_add_i64);
op->args[2] = arg_new_constant(ctx, -val);
}
- return false;
+ return finish_folding(ctx, op);
}
static bool fold_sub2(OptContext *ctx, TCGOp *op)
@@ -2602,33 +2695,32 @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
{
+ uint64_t z_mask = -1, s_mask = 0;
+
/* We can't do any folding with a load, but we can record bits. */
switch (op->opc) {
CASE_OP_32_64(ld8s):
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+ s_mask = INT8_MIN;
break;
CASE_OP_32_64(ld8u):
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+ z_mask = MAKE_64BIT_MASK(0, 8);
break;
CASE_OP_32_64(ld16s):
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
+ s_mask = INT16_MIN;
break;
CASE_OP_32_64(ld16u):
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+ z_mask = MAKE_64BIT_MASK(0, 16);
break;
case INDEX_op_ld32s_i64:
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
+ s_mask = INT32_MIN;
break;
case INDEX_op_ld32u_i64:
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
+ z_mask = MAKE_64BIT_MASK(0, 32);
break;
default:
g_assert_not_reached();
}
- return false;
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
@@ -2638,7 +2730,7 @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
TCGType type;
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
- return false;
+ return finish_folding(ctx, op);
}
type = ctx->type;
@@ -2661,7 +2753,7 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
remove_mem_copy_all(ctx);
- return false;
+ return true;
}
switch (op->opc) {
@@ -2685,7 +2777,7 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
g_assert_not_reached();
}
remove_mem_copy_in(ctx, ofs, ofs + lm1);
- return false;
+ return true;
}
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
@@ -2695,8 +2787,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
TCGType type;
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
- fold_tcg_st(ctx, op);
- return false;
+ return fold_tcg_st(ctx, op);
}
src = arg_temp(op->args[0]);
@@ -2718,11 +2809,14 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
last = ofs + tcg_type_size(type) - 1;
remove_mem_copy_in(ctx, ofs, last);
record_mem_copy(ctx, type, src, ofs, last);
- return false;
+ return true;
}
static bool fold_xor(OptContext *ctx, TCGOp *op)
{
+ uint64_t z_mask, s_mask;
+ TempOptInfo *t1, *t2;
+
if (fold_const2_commutative(ctx, op) ||
fold_xx_to_i(ctx, op, 0) ||
fold_xi_to_x(ctx, op, 0) ||
@@ -2730,66 +2824,11 @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
return true;
}
- ctx->z_mask = arg_info(op->args[1])->z_mask
- | arg_info(op->args[2])->z_mask;
- ctx->s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks(ctx, op);
-}
-
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
-{
- /* If true and false values are the same, eliminate the cmp. */
- if (args_are_copies(op->args[2], op->args[3])) {
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
- }
-
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t tv = arg_info(op->args[2])->val;
- uint64_t fv = arg_info(op->args[3])->val;
-
- if (tv == -1 && fv == 0) {
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
- }
- if (tv == 0 && fv == -1) {
- if (TCG_TARGET_HAS_not_vec) {
- op->opc = INDEX_op_not_vec;
- return fold_not(ctx, op);
- } else {
- op->opc = INDEX_op_xor_vec;
- op->args[2] = arg_new_constant(ctx, -1);
- return fold_xor(ctx, op);
- }
- }
- }
- if (arg_is_const(op->args[2])) {
- uint64_t tv = arg_info(op->args[2])->val;
- if (tv == -1) {
- op->opc = INDEX_op_or_vec;
- op->args[2] = op->args[3];
- return fold_or(ctx, op);
- }
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
- op->opc = INDEX_op_andc_vec;
- op->args[2] = op->args[1];
- op->args[1] = op->args[3];
- return fold_andc(ctx, op);
- }
- }
- if (arg_is_const(op->args[3])) {
- uint64_t fv = arg_info(op->args[3])->val;
- if (fv == 0) {
- op->opc = INDEX_op_and_vec;
- return fold_and(ctx, op);
- }
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
- op->opc = INDEX_op_orc_vec;
- op->args[2] = op->args[1];
- op->args[1] = op->args[3];
- return fold_orc(ctx, op);
- }
- }
- return false;
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+ z_mask = t1->z_mask | t2->z_mask;
+ s_mask = t1->s_mask & t2->s_mask;
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
}
/* Propagate constants and copies, fold constant expressions. */
@@ -2835,11 +2874,6 @@ void tcg_optimize(TCGContext *s)
ctx.type = TCG_TYPE_I32;
}
- /* Assume all bits affected, no bits known zero, no sign reps. */
- ctx.a_mask = -1;
- ctx.z_mask = -1;
- ctx.s_mask = 0;
-
/*
* Process each opcode.
* Sorted alphabetically by opcode as much as possible.
@@ -2977,11 +3011,18 @@ void tcg_optimize(TCGContext *s)
break;
case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32:
+ done = fold_qemu_ld_1reg(&ctx, op);
+ break;
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ done = fold_qemu_ld_1reg(&ctx, op);
+ break;
+ }
+ QEMU_FALLTHROUGH;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
- done = fold_qemu_ld(&ctx, op);
+ done = fold_qemu_ld_2reg(&ctx, op);
break;
case INDEX_op_qemu_st8_a32_i32:
case INDEX_op_qemu_st8_a64_i32:
@@ -3037,12 +3078,18 @@ void tcg_optimize(TCGContext *s)
CASE_OP_32_64_VEC(xor):
done = fold_xor(&ctx, op);
break;
+ case INDEX_op_set_label:
+ case INDEX_op_br:
+ case INDEX_op_exit_tb:
+ case INDEX_op_goto_tb:
+ case INDEX_op_goto_ptr:
+ finish_ebb(&ctx);
+ done = true;
+ break;
default:
+ done = finish_folding(&ctx, op);
break;
}
-
- if (!done) {
- finish_folding(&ctx, op);
- }
+ tcg_debug_assert(done);
}
}
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
index 65a6038..7508f6b 100644
--- a/tests/tcg/multiarch/system/memory.c
+++ b/tests/tcg/multiarch/system/memory.c
@@ -14,7 +14,6 @@
#include <stdint.h>
#include <stdbool.h>
-#include <inttypes.h>
#include <minilib.h>
#ifndef CHECK_UNALIGNED
@@ -511,8 +510,8 @@ int main(void)
int i;
bool ok = true;
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
/* Run through the unsigned tests first */
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
@@ -529,8 +528,8 @@ int main(void)
ok = do_signed_reads(true);
}
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
return ok ? 0 : -1;
}