aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-07-11 07:36:33 +0100
committerRichard Henderson <richard.henderson@linaro.org>2023-07-11 07:36:33 +0100
commit154e3b61ac9cfab9639e6d6207a96fff017040fe (patch)
tree9e2a6da765be2528cb9b7299f12247d123fd4740 /target
parentadc97c4b808bb23d6bb17b8871787333af0086d2 (diff)
parent752dfff5ecf35a38145c2dfbb842224177fd1afd (diff)
downloadqemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.zip
qemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.tar.gz
qemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.tar.bz2
Merge tag 'mips-20230710' of https://github.com/philmd/qemu into staging
MIPS patches queue - Use clock API & divider for cp0_timer to avoid rounding issue (Jiaxun) - Implement Loongson CSR instructions (Jiaxun) - Implement Ingenic MXU ASE v1 rev2 (Siarhei) - Enable GINVx support for I6400 and I6500 cores (Marcin) - Generalize PCI IDE controller models (Bernhard) # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEE+qvnXhKRciHc/Wuy4+MsLN6twN4FAmSsg5kACgkQ4+MsLN6t # wN6O4g/9GpirNnG1tizIEksI17PaAotgui2PYzml2nQLyQNmPs3lSfyDEfFpZLC6 # HGxglNjdvCgmIhRH1IuRKuJofp0r84NY+sktXjz2+As3opyjR66gVsSefWeupr7t # avZQQIBBOV3OYLzFkqjDpBflyKXz43MRW3r9ai4Dle/TwiE5GA1iKuQ6Rt55urtT # 045OdtFZTsIwTyg75pSXExAehOn5FQ4aqIODwfJYqvhkkVZ9lgWYSgUOsgDcGqPQ # eytpif6+m350Xme4BgqITMZkeIbyKcCcfU37JBqk/q6/gDDf18zSWpC7MNXea4ZR # so9ffZqms/xcIOfIO3uc4t9AZRHchiVjFHihCUKc0mBTzLy1QhQ4ybdQu3fUywaG # WziEFLrJ/qfWjixRxeDdBZamC2fSxYtcRNST7g+XttiMacvQC6aPFVfLDa+3Xjtt # TmIjx8oGdLB9BMrGMuHsOygfgi98eGbWQ2I5ZhzwBbJ7uFQdeTkMCswcAsVcj8pW # e7/ixw2e+SYFm0q9Z/QiZZ7LFDp/b3u7/ufXCUBX2r1gi7Xi+x60E6dm3Ge3XAsY # qSx9ZOlVNJlIs/ChP0KckHDMeFuCnRmNEvKC039syHWSy6VP8NO7fwwxK+XytyrK # aJMyPS97kVXuqriKZIGsV0KjLOz3neh0OdQTolPv1R5yb9tI6Xc= # =rtlE # -----END PGP SIGNATURE----- # gpg: Signature made Mon 10 Jul 2023 11:18:01 PM BST # gpg: using RSA key FAABE75E12917221DCFD6BB2E3E32C2CDEADC0DE # gpg: Good signature from "Philippe Mathieu-Daudé (F4BUG) <f4bug@amsat.org>" [full] * tag 'mips-20230710' of https://github.com/philmd/qemu: (44 commits) hw/ide/piix: Move registration of VMStateDescription to DeviceClass hw/ide/pci: Replace some magic numbers by constants hw/ide: Extract bmdma_status_writeb() hw/ide: Extract IDEBus assignment into bmdma_init() hw/isa/vt82c686: Remove via_isa_set_irq() hw/ide/via: Wire up IDE legacy interrupts in host device hw/ide/pci: Expose legacy interrupts as named GPIOs target/mips: enable GINVx support for I6400 and I6500 target/mips/mxu: Add Q8SAD instruction target/mips/mxu: Add S32SFL instruction target/mips/mxu: Add Q8MADL instruction target/mips/mxu: Add Q16SCOP instruction target/mips/mxu: Add Q8MAC Q8MACSU instructions target/mips/mxu: Add S32/D16/Q8- MOVZ/MOVN instructions target/mips/mxu: Add D32/Q16- SLLV/SLRV/SARV instructions target/mips/mxu: Add Q16SLL Q16SLR Q16SAR instructions target/mips/mxu: Add D32SLL D32SLR D32SAR instructions target/mips/mxu: Add D32SARL D32SARW instructions target/mips/mxu: Add S32ALN S32LUI insns target/mips/mxu: Add S32MUL S32MULU S32EXTR S32EXTRV insns ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r--target/mips/cpu-defs.c.inc59
-rw-r--r--target/mips/cpu.c18
-rw-r--r--target/mips/cpu.h45
-rw-r--r--target/mips/helper.h4
-rw-r--r--target/mips/internal.h2
-rw-r--r--target/mips/sysemu/cp0_timer.c35
-rw-r--r--target/mips/tcg/lcsr.decode17
-rw-r--r--target/mips/tcg/lcsr_translate.c75
-rw-r--r--target/mips/tcg/meson.build2
-rw-r--r--target/mips/tcg/mxu_translate.c3753
-rw-r--r--target/mips/tcg/op_helper.c16
-rw-r--r--target/mips/tcg/sysemu/lcsr_helper.c45
-rw-r--r--target/mips/tcg/sysemu/meson.build4
-rw-r--r--target/mips/tcg/sysemu_helper.h.inc8
-rw-r--r--target/mips/tcg/translate.c10
-rw-r--r--target/mips/tcg/translate.h1
16 files changed, 3962 insertions, 132 deletions
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index d45f245..03185d9 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -118,6 +118,26 @@ const mips_def_t mips_defs[] =
.mmu_type = MMU_TYPE_R4000,
},
{
+ .name = "XBurstR1",
+ .CP0_PRid = 0x1ed0024f,
+ .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_R4000 << CP0C0_MT),
+ .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
+ (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+ (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+ (0 << CP0C1_CA),
+ .CP0_Config2 = MIPS_CONFIG2,
+ .CP0_Config3 = MIPS_CONFIG3,
+ .CP0_LLAddr_rw_bitmask = 0,
+ .CP0_LLAddr_shift = 4,
+ .SYNCI_Step = 32,
+ .CCRes = 2,
+ .CP0_Status_rw_bitmask = 0x1278FF17,
+ .SEGBITS = 32,
+ .PABITS = 32,
+ .insn_flags = CPU_MIPS32R1 | ASE_MXU,
+ .mmu_type = MMU_TYPE_R4000,
+ },
+ {
.name = "4KEmR1",
.CP0_PRid = 0x00018500,
.CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_FMT << CP0C0_MT),
@@ -324,6 +344,32 @@ const mips_def_t mips_defs[] =
.mmu_type = MMU_TYPE_R4000,
},
{
+ .name = "XBurstR2",
+ .CP0_PRid = 0x2ed1024f,
+ .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+ (MMU_TYPE_R4000 << CP0C0_MT),
+ .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
+ (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+ (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+ (1 << CP0C1_CA),
+ .CP0_Config2 = MIPS_CONFIG2,
+ .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_DSP2P) | (1 << CP0C3_DSPP) |
+ (1 << CP0C3_VInt),
+ .CP0_LLAddr_rw_bitmask = 0,
+ .CP0_LLAddr_shift = 4,
+ .SYNCI_Step = 32,
+ .CCRes = 2,
+ .CP0_Status_rw_bitmask = 0x3778FF1F,
+ .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
+ (1 << FCR0_D) | (1 << FCR0_S) | (0x93 << FCR0_PRID),
+ .CP1_fcr31 = 0,
+ .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
+ .SEGBITS = 32,
+ .PABITS = 32,
+ .insn_flags = CPU_MIPS32R2 | ASE_MXU,
+ .mmu_type = MMU_TYPE_R4000,
+ },
+ {
.name = "M14K",
.CP0_PRid = 0x00019b00,
/* Config1 implemented, fixed mapping MMU,
@@ -709,7 +755,7 @@ const mips_def_t mips_defs[] =
.CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
(1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
- (1 << CP0C5_LLB) | (1 << CP0C5_MRP),
+ (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
.CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
(1 << CP0C5_FRE) | (1 << CP0C5_UFE),
.CP0_LLAddr_rw_bitmask = 0,
@@ -749,7 +795,7 @@ const mips_def_t mips_defs[] =
.CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) |
(1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist),
.CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) |
- (1 << CP0C5_LLB) | (1 << CP0C5_MRP),
+ (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI),
.CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) |
(1 << CP0C5_FRE) | (1 << CP0C5_UFE),
.CP0_LLAddr_rw_bitmask = 0,
@@ -895,6 +941,15 @@ const mips_def_t mips_defs[] =
.CP1_fcr31 = 0,
.CP1_fcr31_rw_bitmask = 0xFF83FFFF,
.MSAIR = (0x01 << MSAIR_ProcID) | (0x40 << MSAIR_Rev),
+ .lcsr_cpucfg1 = (1 << CPUCFG1_FP) | (2 << CPUCFG1_FPREV) |
+ (1 << CPUCFG1_MSA1) | (1 << CPUCFG1_LSLDR0) |
+ (1 << CPUCFG1_LSPERF) | (1 << CPUCFG1_LSPERFX) |
+ (1 << CPUCFG1_LSSYNCI) | (1 << CPUCFG1_LLEXC) |
+ (1 << CPUCFG1_SCRAND) | (1 << CPUCFG1_MUALP) |
+ (1 << CPUCFG1_KMUALEN) | (1 << CPUCFG1_ITLBT) |
+ (1 << CPUCFG1_SFBP) | (1 << CPUCFG1_CDMAP),
+ .lcsr_cpucfg2 = (1 << CPUCFG2_LEXT1) | (1 << CPUCFG2_LCSRP) |
+ (1 << CPUCFG2_LDISBLIKELY),
.SEGBITS = 48,
.PABITS = 48,
.insn_flags = CPU_MIPS64R2 | INSN_LOONGSON3A |
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 01e0fbe..63da194 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -244,6 +244,8 @@ static void mips_cpu_reset_hold(Object *obj)
env->CP0_PageGrain_rw_bitmask = env->cpu_model->CP0_PageGrain_rw_bitmask;
env->CP0_PageGrain = env->cpu_model->CP0_PageGrain;
env->CP0_EBaseWG_rw_bitmask = env->cpu_model->CP0_EBaseWG_rw_bitmask;
+ env->lcsr_cpucfg1 = env->cpu_model->lcsr_cpucfg1;
+ env->lcsr_cpucfg2 = env->cpu_model->lcsr_cpucfg2;
env->active_fpu.fcr0 = env->cpu_model->CP1_fcr0;
env->active_fpu.fcr31_rw_bitmask = env->cpu_model->CP1_fcr31_rw_bitmask;
env->active_fpu.fcr31 = env->cpu_model->CP1_fcr31;
@@ -449,9 +451,9 @@ static void mips_cp0_period_set(MIPSCPU *cpu)
{
CPUMIPSState *env = &cpu->env;
- env->cp0_count_ns = clock_ticks_to_ns(MIPS_CPU(cpu)->clock,
- env->cpu_model->CCRes);
- assert(env->cp0_count_ns);
+ clock_set_mul_div(cpu->count_div, env->cpu_model->CCRes, 1);
+ clock_set_source(cpu->count_div, cpu->clock);
+ clock_set_source(env->count_clock, cpu->count_div);
}
static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -504,7 +506,17 @@ static void mips_cpu_initfn(Object *obj)
cpu_set_cpustate_pointers(cpu);
cpu->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, cpu, 0);
+ cpu->count_div = clock_new(OBJECT(obj), "clk-div-count");
+ env->count_clock = clock_new(OBJECT(obj), "clk-count");
env->cpu_model = mcc->cpu_def;
+#ifndef CONFIG_USER_ONLY
+ if (mcc->cpu_def->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP)) {
+ memory_region_init_io(&env->iocsr.mr, OBJECT(cpu), NULL,
+ env, "iocsr", UINT64_MAX);
+ address_space_init(&env->iocsr.as,
+ &env->iocsr.mr, "IOCSR");
+ }
+#endif
}
static char *mips_cpu_type_name(const char *cpu_model)
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index a3bc646..f81bd06 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -3,6 +3,9 @@
#include "cpu-qom.h"
#include "exec/cpu-defs.h"
+#ifndef CONFIG_USER_ONLY
+#include "exec/memory.h"
+#endif
#include "fpu/softfloat-types.h"
#include "hw/clock.h"
#include "mips-defs.h"
@@ -1068,6 +1071,33 @@ typedef struct CPUArchState {
*/
int32_t CP0_DESAVE;
target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM];
+/*
+ * Loongson CSR CPUCFG registers
+ */
+ uint32_t lcsr_cpucfg1;
+#define CPUCFG1_FP 0
+#define CPUCFG1_FPREV 1
+#define CPUCFG1_MMI 4
+#define CPUCFG1_MSA1 5
+#define CPUCFG1_MSA2 6
+#define CPUCFG1_LSLDR0 16
+#define CPUCFG1_LSPERF 17
+#define CPUCFG1_LSPERFX 18
+#define CPUCFG1_LSSYNCI 19
+#define CPUCFG1_LLEXC 20
+#define CPUCFG1_SCRAND 21
+#define CPUCFG1_MUALP 25
+#define CPUCFG1_KMUALEN 26
+#define CPUCFG1_ITLBT 27
+#define CPUCFG1_SFBP 29
+#define CPUCFG1_CDMAP 30
+ uint32_t lcsr_cpucfg2;
+#define CPUCFG2_LEXT1 0
+#define CPUCFG2_LEXT2 1
+#define CPUCFG2_LEXT3 2
+#define CPUCFG2_LSPW 3
+#define CPUCFG2_LCSRP 27
+#define CPUCFG2_LDISBLIKELY 28
/* We waste some space so we can handle shadow registers like TCs. */
TCState tcs[MIPS_SHADOW_SET_MAX];
@@ -1156,12 +1186,18 @@ typedef struct CPUArchState {
void *irq[8];
struct MIPSITUState *itu;
MemoryRegion *itc_tag; /* ITC Configuration Tags */
+
+ /* Loongson IOCSR memory */
+ struct {
+ AddressSpace as;
+ MemoryRegion mr;
+ } iocsr;
#endif
const mips_def_t *cpu_model;
QEMUTimer *timer; /* Internal timer */
+ Clock *count_clock; /* CP0_Count clock */
target_ulong exception_base; /* ExceptionBase input to the core */
- uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */
} CPUMIPSState;
/**
@@ -1178,6 +1214,7 @@ struct ArchCPU {
/*< public >*/
Clock *clock;
+ Clock *count_div; /* Divider for CP0_Count clock */
CPUNegativeOffsetState neg;
CPUMIPSState env;
};
@@ -1280,6 +1317,12 @@ static inline bool ase_msa_available(CPUMIPSState *env)
return env->CP0_Config3 & (1 << CP0C3_MSAP);
}
+/* Check presence of Loongson CSR instructions */
+static inline bool ase_lcsr_available(CPUMIPSState *env)
+{
+ return env->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP);
+}
+
/* Check presence of multi-threading ASE implementation */
static inline bool ase_mt_available(CPUMIPSState *env)
{
diff --git a/target/mips/helper.h b/target/mips/helper.h
index de32d82..0f8462f 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -196,6 +196,10 @@ DEF_HELPER_1(rdhwr_xnp, tl, env)
DEF_HELPER_2(pmon, void, env, int)
DEF_HELPER_1(wait, void, env)
+#ifdef TARGET_MIPS64
+DEF_HELPER_FLAGS_2(lcsr_cpucfg, TCG_CALL_NO_RWG_SE, tl, env, tl)
+#endif
+
/* Loongson multimedia functions. */
DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(paddush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target/mips/internal.h b/target/mips/internal.h
index 4b0031d..1d0c026 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -79,6 +79,8 @@ struct mips_def_t {
int32_t CP0_PageGrain_rw_bitmask;
int32_t CP0_PageGrain;
target_ulong CP0_EBaseWG_rw_bitmask;
+ uint32_t lcsr_cpucfg1;
+ uint32_t lcsr_cpucfg2;
uint64_t insn_flags;
enum mips_mmu_types mmu_type;
int32_t SAARP;
diff --git a/target/mips/sysemu/cp0_timer.c b/target/mips/sysemu/cp0_timer.c
index 70de95d..9d2bcb0 100644
--- a/target/mips/sysemu/cp0_timer.c
+++ b/target/mips/sysemu/cp0_timer.c
@@ -28,15 +28,26 @@
#include "internal.h"
/* MIPS R4K timer */
+static uint32_t cpu_mips_get_count_val(CPUMIPSState *env)
+{
+ int64_t now_ns;
+ now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ return env->CP0_Count +
+ (uint32_t)clock_ns_to_ticks(env->count_clock, now_ns);
+}
+
static void cpu_mips_timer_update(CPUMIPSState *env)
{
uint64_t now_ns, next_ns;
uint32_t wait;
now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- wait = env->CP0_Compare - env->CP0_Count -
- (uint32_t)(now_ns / env->cp0_count_ns);
- next_ns = now_ns + (uint64_t)wait * env->cp0_count_ns;
+ wait = env->CP0_Compare - cpu_mips_get_count_val(env);
+ /* Clamp interval to overflow if virtual time had not progressed */
+ if (!wait) {
+ wait = UINT32_MAX;
+ }
+ next_ns = now_ns + clock_ticks_to_ns(env->count_clock, wait);
timer_mod(env->timer, next_ns);
}
@@ -64,7 +75,7 @@ uint32_t cpu_mips_get_count(CPUMIPSState *env)
cpu_mips_timer_expire(env);
}
- return env->CP0_Count + (uint32_t)(now_ns / env->cp0_count_ns);
+ return cpu_mips_get_count_val(env);
}
}
@@ -79,9 +90,8 @@ void cpu_mips_store_count(CPUMIPSState *env, uint32_t count)
env->CP0_Count = count;
} else {
/* Store new count register */
- env->CP0_Count = count -
- (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
- env->cp0_count_ns);
+ env->CP0_Count = count - (uint32_t)clock_ns_to_ticks(env->count_clock,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
/* Update timer timer */
cpu_mips_timer_update(env);
}
@@ -107,8 +117,8 @@ void cpu_mips_start_count(CPUMIPSState *env)
void cpu_mips_stop_count(CPUMIPSState *env)
{
/* Store the current value */
- env->CP0_Count += (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
- env->cp0_count_ns);
+ env->CP0_Count += (uint32_t)clock_ns_to_ticks(env->count_clock,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
}
static void mips_timer_cb(void *opaque)
@@ -121,14 +131,7 @@ static void mips_timer_cb(void *opaque)
return;
}
- /*
- * ??? This callback should occur when the counter is exactly equal to
- * the comparator value. Offset the count by one to avoid immediately
- * retriggering the callback before any virtual time has passed.
- */
- env->CP0_Count++;
cpu_mips_timer_expire(env);
- env->CP0_Count--;
}
void cpu_mips_clock_init(MIPSCPU *cpu)
diff --git a/target/mips/tcg/lcsr.decode b/target/mips/tcg/lcsr.decode
new file mode 100644
index 0000000..960ef8b
--- /dev/null
+++ b/target/mips/tcg/lcsr.decode
@@ -0,0 +1,17 @@
+# Loongson CSR instructions
+#
+# Copyright (C) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+
+&r rs rt rd sa
+
+@rs_rd ...... rs:5 ..... rd:5 ..... ...... &r rt=0 sa=0
+
+CPUCFG 110010 ..... 01000 ..... 00100 011000 @rs_rd
+
+RDCSR 110010 ..... 00000 ..... 00100 011000 @rs_rd
+WRCSR 110010 ..... 00001 ..... 00100 011000 @rs_rd
+DRDCSR 110010 ..... 00010 ..... 00100 011000 @rs_rd
+DWRCSR 110010 ..... 00011 ..... 00100 011000 @rs_rd
diff --git a/target/mips/tcg/lcsr_translate.c b/target/mips/tcg/lcsr_translate.c
new file mode 100644
index 0000000..9f2a5f4
--- /dev/null
+++ b/target/mips/tcg/lcsr_translate.c
@@ -0,0 +1,75 @@
+/*
+ * Loongson CSR instructions translation routines
+ *
+ * Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/* Include the auto-generated decoder. */
+#include "decode-lcsr.c.inc"
+
+static bool trans_CPUCFG(DisasContext *ctx, arg_CPUCFG *a)
+{
+ TCGv dest = tcg_temp_new();
+ TCGv src1 = tcg_temp_new();
+
+ gen_load_gpr(src1, a->rs);
+ gen_helper_lcsr_cpucfg(dest, cpu_env, src1);
+ gen_store_gpr(dest, a->rd);
+
+ return true;
+}
+
+#ifndef CONFIG_USER_ONLY
+static bool gen_rdcsr(DisasContext *ctx, arg_r *a,
+ void (*func)(TCGv, TCGv_ptr, TCGv))
+{
+ TCGv dest = tcg_temp_new();
+ TCGv src1 = tcg_temp_new();
+
+ check_cp0_enabled(ctx);
+ gen_load_gpr(src1, a->rs);
+ func(dest, cpu_env, src1);
+ gen_store_gpr(dest, a->rd);
+
+ return true;
+}
+
+static bool gen_wrcsr(DisasContext *ctx, arg_r *a,
+ void (*func)(TCGv_ptr, TCGv, TCGv))
+{
+ TCGv val = tcg_temp_new();
+ TCGv addr = tcg_temp_new();
+
+ check_cp0_enabled(ctx);
+ gen_load_gpr(addr, a->rs);
+ gen_load_gpr(val, a->rd);
+ func(cpu_env, addr, val);
+
+ return true;
+}
+
+TRANS(RDCSR, gen_rdcsr, gen_helper_lcsr_rdcsr)
+TRANS(DRDCSR, gen_rdcsr, gen_helper_lcsr_drdcsr)
+TRANS(WRCSR, gen_wrcsr, gen_helper_lcsr_wrcsr)
+TRANS(DWRCSR, gen_wrcsr, gen_helper_lcsr_dwrcsr)
+#else
+#define GEN_FALSE_TRANS(name) \
+static bool trans_##name(DisasContext *ctx, arg_##name * a) \
+{ \
+ return false; \
+}
+
+GEN_FALSE_TRANS(RDCSR)
+GEN_FALSE_TRANS(DRDCSR)
+GEN_FALSE_TRANS(WRCSR)
+GEN_FALSE_TRANS(DWRCSR)
+#endif
diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build
index 7ee969e..ea7fb58 100644
--- a/target/mips/tcg/meson.build
+++ b/target/mips/tcg/meson.build
@@ -4,6 +4,7 @@ gen = [
decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'),
decodetree.process('vr54xx.decode', extra_args: '--decode=decode_ext_vr54xx'),
decodetree.process('octeon.decode', extra_args: '--decode=decode_ext_octeon'),
+ decodetree.process('lcsr.decode', extra_args: '--decode=decode_ase_lcsr'),
]
mips_ss.add(gen)
@@ -26,6 +27,7 @@ mips_ss.add(files(
mips_ss.add(when: 'TARGET_MIPS64', if_true: files(
'tx79_translate.c',
'octeon_translate.c',
+ 'lcsr_translate.c',
), if_false: files(
'mxu_translate.c',
))
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index 39348b3..deb8060 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -237,11 +237,11 @@
* ├─ 001100 ─ OPC_MXU_D16MADL
* ├─ 001101 ─ OPC_MXU_S16MAD
* ├─ 001110 ─ OPC_MXU_Q16ADD
- * ├─ 001111 ─ OPC_MXU_D16MACE 23
+ * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care)
* │ ┌─ 0 ─ OPC_MXU_S32LDD
* ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
* │ └─ 1 ─ OPC_MXU_S32STDR
* │
@@ -253,11 +253,11 @@
* ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
* │ └─ 0001 ─ OPC_MXU_S32STDVR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
* │ └─ 1 ─ OPC_MXU_S32LDIR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
* │ └─ 1 ─ OPC_MXU_S32SDIR
* │
@@ -268,7 +268,7 @@
* │ 13..10
* ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
* │ └─ 0001 ─ OPC_MXU_S32SDIVR
- * ├─ 011000 ─ OPC_MXU_D32ADD
+ * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too)
* │ 23..22
* MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
* opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM
@@ -277,7 +277,7 @@
* │ 23..22
* ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
* │ ├─ 01 ─ OPC_MXU_Q16ACCM
- * │ └─ 10 ─ OPC_MXU_Q16ASUM
+ * │ └─ 10 ─ OPC_MXU_D16ASUM
* │
* │ 23..22
* ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
@@ -290,9 +290,9 @@
* ├─ 100010 ─ OPC_MXU_S8LDD
* ├─ 100011 ─ OPC_MXU_S8STD 15..14
* ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL
- * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 00 ─ OPC_MXU_S32MULU
- * │ ├─ 00 ─ OPC_MXU_S32EXTR
- * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
+ * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU
+ * │ ├─ 10 ─ OPC_MXU_S32EXTR
+ * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
* │
* │ 20..18
* ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
@@ -304,7 +304,7 @@
* │ ├─ 110 ─ OPC_MXU_S32OR
* │ └─ 111 ─ OPC_MXU_S32XOR
* │
- * │ 7..5
+ * │ 8..6
* ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
* │ ├─ 001 ─ OPC_MXU_LXH
* ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW
@@ -318,15 +318,15 @@
* ├─ 110001 ─ OPC_MXU_D32SLR 20..18
* ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV
* ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV
- * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 010 ─ OPC_MXU_D32SARV
- * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 011 ─ OPC_MXU_Q16SLLV
- * │ ├─ 100 ─ OPC_MXU_Q16SLRV
- * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV
+ * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV
+ * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV
+ * │ ├─ 101 ─ OPC_MXU_Q16SLRV
+ * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
* │
* ├─ 110111 ─ OPC_MXU_Q16SAR
* │ 23..22
* ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
- * │ └─ 01 ─ OPC_MXU_Q8MULSU
+ * │ └─ 10 ─ OPC_MXU_Q8MULSU
* │
* │ 20..18
* ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
@@ -353,15 +353,62 @@
*/
enum {
+ OPC_MXU_S32MADD = 0x00,
+ OPC_MXU_S32MADDU = 0x01,
OPC_MXU__POOL00 = 0x03,
+ OPC_MXU_S32MSUB = 0x04,
+ OPC_MXU_S32MSUBU = 0x05,
+ OPC_MXU__POOL01 = 0x06,
+ OPC_MXU__POOL02 = 0x07,
OPC_MXU_D16MUL = 0x08,
+ OPC_MXU__POOL03 = 0x09,
OPC_MXU_D16MAC = 0x0A,
+ OPC_MXU_D16MACF = 0x0B,
+ OPC_MXU_D16MADL = 0x0C,
+ OPC_MXU_S16MAD = 0x0D,
+ OPC_MXU_Q16ADD = 0x0E,
+ OPC_MXU_D16MACE = 0x0F,
OPC_MXU__POOL04 = 0x10,
+ OPC_MXU__POOL05 = 0x11,
+ OPC_MXU__POOL06 = 0x12,
+ OPC_MXU__POOL07 = 0x13,
+ OPC_MXU__POOL08 = 0x14,
+ OPC_MXU__POOL09 = 0x15,
+ OPC_MXU__POOL10 = 0x16,
+ OPC_MXU__POOL11 = 0x17,
+ OPC_MXU_D32ADD = 0x18,
+ OPC_MXU__POOL12 = 0x19,
+ OPC_MXU__POOL13 = 0x1B,
+ OPC_MXU__POOL14 = 0x1C,
+ OPC_MXU_Q8ACCE = 0x1D,
OPC_MXU_S8LDD = 0x22,
+ OPC_MXU_S8STD = 0x23,
+ OPC_MXU_S8LDI = 0x24,
+ OPC_MXU_S8SDI = 0x25,
+ OPC_MXU__POOL15 = 0x26,
OPC_MXU__POOL16 = 0x27,
+ OPC_MXU__POOL17 = 0x28,
+ OPC_MXU_S16LDD = 0x2A,
+ OPC_MXU_S16STD = 0x2B,
+ OPC_MXU_S16LDI = 0x2C,
+ OPC_MXU_S16SDI = 0x2D,
OPC_MXU_S32M2I = 0x2E,
OPC_MXU_S32I2M = 0x2F,
+ OPC_MXU_D32SLL = 0x30,
+ OPC_MXU_D32SLR = 0x31,
+ OPC_MXU_D32SARL = 0x32,
+ OPC_MXU_D32SAR = 0x33,
+ OPC_MXU_Q16SLL = 0x34,
+ OPC_MXU_Q16SLR = 0x35,
+ OPC_MXU__POOL18 = 0x36,
+ OPC_MXU_Q16SAR = 0x37,
OPC_MXU__POOL19 = 0x38,
+ OPC_MXU__POOL20 = 0x39,
+ OPC_MXU__POOL21 = 0x3A,
+ OPC_MXU_Q16SCOP = 0x3B,
+ OPC_MXU_Q8MADL = 0x3C,
+ OPC_MXU_S32SFL = 0x3D,
+ OPC_MXU_Q8SAD = 0x3E,
};
@@ -375,21 +422,94 @@ enum {
OPC_MXU_D16MIN = 0x03,
OPC_MXU_Q8MAX = 0x04,
OPC_MXU_Q8MIN = 0x05,
+ OPC_MXU_Q8SLT = 0x06,
+ OPC_MXU_Q8SLTU = 0x07,
};
/*
- * MXU pool 04
+ * MXU pool 01
*/
enum {
- OPC_MXU_S32LDD = 0x00,
- OPC_MXU_S32LDDR = 0x01,
+ OPC_MXU_S32SLT = 0x00,
+ OPC_MXU_D16SLT = 0x01,
+ OPC_MXU_D16AVG = 0x02,
+ OPC_MXU_D16AVGR = 0x03,
+ OPC_MXU_Q8AVG = 0x04,
+ OPC_MXU_Q8AVGR = 0x05,
+ OPC_MXU_Q8ADD = 0x07,
+};
+
+/*
+ * MXU pool 02
+ */
+enum {
+ OPC_MXU_S32CPS = 0x00,
+ OPC_MXU_D16CPS = 0x02,
+ OPC_MXU_Q8ABD = 0x04,
+ OPC_MXU_Q16SAT = 0x06,
+};
+
+/*
+ * MXU pool 03
+ */
+enum {
+ OPC_MXU_D16MULF = 0x00,
+ OPC_MXU_D16MULE = 0x01,
+};
+
+/*
+ * MXU pool 04 05 06 07 08 09 10 11
+ */
+enum {
+ OPC_MXU_S32LDST = 0x00,
+ OPC_MXU_S32LDSTR = 0x01,
+};
+
+/*
+ * MXU pool 12
+ */
+enum {
+ OPC_MXU_D32ACC = 0x00,
+ OPC_MXU_D32ACCM = 0x01,
+ OPC_MXU_D32ASUM = 0x02,
+};
+
+/*
+ * MXU pool 13
+ */
+enum {
+ OPC_MXU_Q16ACC = 0x00,
+ OPC_MXU_Q16ACCM = 0x01,
+ OPC_MXU_D16ASUM = 0x02,
+};
+
+/*
+ * MXU pool 14
+ */
+enum {
+ OPC_MXU_Q8ADDE = 0x00,
+ OPC_MXU_D8SUM = 0x01,
+ OPC_MXU_D8SUMC = 0x02,
+};
+
+/*
+ * MXU pool 15
+ */
+enum {
+ OPC_MXU_S32MUL = 0x00,
+ OPC_MXU_S32MULU = 0x01,
+ OPC_MXU_S32EXTR = 0x02,
+ OPC_MXU_S32EXTRV = 0x03,
};
/*
* MXU pool 16
*/
enum {
+ OPC_MXU_D32SARW = 0x00,
+ OPC_MXU_S32ALN = 0x01,
OPC_MXU_S32ALNI = 0x02,
+ OPC_MXU_S32LUI = 0x03,
OPC_MXU_S32NOR = 0x04,
OPC_MXU_S32AND = 0x05,
OPC_MXU_S32OR = 0x06,
@@ -397,13 +517,57 @@ enum {
};
/*
+ * MXU pool 17
+ */
+enum {
+ OPC_MXU_LXB = 0x00,
+ OPC_MXU_LXH = 0x01,
+ OPC_MXU_LXW = 0x03,
+ OPC_MXU_LXBU = 0x04,
+ OPC_MXU_LXHU = 0x05,
+};
+
+/*
+ * MXU pool 18
+ */
+enum {
+ OPC_MXU_D32SLLV = 0x00,
+ OPC_MXU_D32SLRV = 0x01,
+ OPC_MXU_D32SARV = 0x03,
+ OPC_MXU_Q16SLLV = 0x04,
+ OPC_MXU_Q16SLRV = 0x05,
+ OPC_MXU_Q16SARV = 0x07,
+};
+
+/*
* MXU pool 19
*/
enum {
OPC_MXU_Q8MUL = 0x00,
- OPC_MXU_Q8MULSU = 0x01,
+ OPC_MXU_Q8MULSU = 0x02,
};
+/*
+ * MXU pool 20
+ */
+enum {
+ OPC_MXU_Q8MOVZ = 0x00,
+ OPC_MXU_Q8MOVN = 0x01,
+ OPC_MXU_D16MOVZ = 0x02,
+ OPC_MXU_D16MOVN = 0x03,
+ OPC_MXU_S32MOVZ = 0x04,
+ OPC_MXU_S32MOVN = 0x05,
+};
+
+/*
+ * MXU pool 21
+ */
+enum {
+ OPC_MXU_Q8MAC = 0x00,
+ OPC_MXU_Q8MACSU = 0x02,
+};
+
+
/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
#define MXU_APTN1_A 0
#define MXU_APTN1_S 1
@@ -537,8 +701,11 @@ static void gen_mxu_s32m2i(DisasContext *ctx)
/*
* S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
+ *
+ * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
+ * post modify address register
*/
-static void gen_mxu_s8ldd(DisasContext *ctx)
+static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, s8, optn3;
@@ -553,6 +720,9 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
switch (optn3) {
/* XRa[7:0] = tmp8 */
@@ -610,9 +780,208 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
}
/*
- * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
+ *
+ * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
+ * post modify address register
*/
-static void gen_mxu_d16mul(DisasContext *ctx)
+static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, s8, optn3;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s8 = extract32(ctx->opcode, 10, 8);
+ optn3 = extract32(ctx->opcode, 18, 3);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ if (optn3 > 3) {
+ /* reserved, do nothing */
+ return;
+ }
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (optn3) {
+ /* XRa[7:0] => tmp8 */
+ case MXU_OPTN3_PTN0:
+ tcg_gen_extract_tl(t1, t1, 0, 8);
+ break;
+ /* XRa[15:8] => tmp8 */
+ case MXU_OPTN3_PTN1:
+ tcg_gen_extract_tl(t1, t1, 8, 8);
+ break;
+ /* XRa[23:16] => tmp8 */
+ case MXU_OPTN3_PTN2:
+ tcg_gen_extract_tl(t1, t1, 16, 8);
+ break;
+ /* XRa[31:24] => tmp8 */
+ case MXU_OPTN3_PTN3:
+ tcg_gen_extract_tl(t1, t1, 24, 8);
+ break;
+ }
+
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
+}
+
+/*
+ * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
+ *
+ * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
+ * post modify address register
+ */
+static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, optn2;
+ int32_t s10;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s10 = sextract32(ctx->opcode, 10, 9) * 2;
+ optn2 = extract32(ctx->opcode, 19, 2);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, s10);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+
+ switch (optn2) {
+ /* XRa[15:0] = tmp16 */
+ case MXU_OPTN2_PTN0:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ gen_load_mxu_gpr(t0, XRa);
+ tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
+ break;
+ /* XRa[31:16] = tmp16 */
+ case MXU_OPTN2_PTN1:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ gen_load_mxu_gpr(t0, XRa);
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ break;
+ /* XRa = sign_extend(tmp16) */
+ case MXU_OPTN2_PTN2:
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
+ break;
+ /* XRa = {tmp16, tmp16} */
+ case MXU_OPTN2_PTN3:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ tcg_gen_deposit_tl(t0, t1, t1, 0, 16);
+ tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+ break;
+ }
+
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
+ *
+ * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
+ * post modify address register
+ */
+static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, optn2;
+ int32_t s10;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s10 = sextract32(ctx->opcode, 10, 9) * 2;
+ optn2 = extract32(ctx->opcode, 19, 2);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ if (optn2 > 1) {
+ /* reserved, do nothing */
+ return;
+ }
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, s10);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (optn2) {
+ /* XRa[15:0] => tmp16 */
+ case MXU_OPTN2_PTN0:
+ tcg_gen_extract_tl(t1, t1, 0, 16);
+ break;
+ /* XRa[31:16] => tmp16 */
+ case MXU_OPTN2_PTN1:
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+ break;
+ }
+
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
+}
+
+/*
+ * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ *
+ * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ */
+static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
+{
+ TCGv t0, t1;
+ uint32_t XRa, XRd, rs, rt;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ rs = extract32(ctx->opcode, 16, 5);
+ rt = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(rs == 0 || rt == 0)) {
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_movi_tl(t1, 0);
+ } else {
+ gen_load_gpr(t0, rs);
+ gen_load_gpr(t1, rt);
+
+ if (mulu) {
+ tcg_gen_mulu2_tl(t0, t1, t0, t1);
+ } else {
+ tcg_gen_muls2_tl(t0, t1, t0, t1);
+ }
+ }
+ tcg_gen_mov_tl(cpu_HI[0], t1);
+ tcg_gen_mov_tl(cpu_LO[0], t0);
+ gen_store_mxu_gpr(t1, XRa);
+ gen_store_mxu_gpr(t0, XRd);
+}
+
+/*
+ * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
+ * with rounding and packing result
+ * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
+ * multiplication with rounding
+ */
+static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
+ bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2;
@@ -628,6 +997,12 @@ static void gen_mxu_d16mul(DisasContext *ctx)
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
+ /*
+ * TODO: XRd field isn't used for D16MULF
+ * There's no knowledge how this field affect
+ * instruction decoding/behavior
+ */
+
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
@@ -653,15 +1028,64 @@ static void gen_mxu_d16mul(DisasContext *ctx)
tcg_gen_mul_tl(t2, t1, t2);
break;
}
- gen_store_mxu_gpr(t3, XRa);
- gen_store_mxu_gpr(t2, XRd);
+ if (fractional) {
+ TCGLabel *l_done = gen_new_label();
+ TCGv rounding = tcg_temp_new();
+
+ tcg_gen_shli_tl(t3, t3, 1);
+ tcg_gen_shli_tl(t2, t2, 1);
+ tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+ if (packed_result) {
+ TCGLabel *l_apply_bias_l = gen_new_label();
+ TCGLabel *l_apply_bias_r = gen_new_label();
+ TCGLabel *l_half_done = gen_new_label();
+ TCGv bias = tcg_temp_new();
+
+ /*
+ * D16MULF supports unbiased rounding aka "bankers rounding",
+ * "round to even", "convergent rounding"
+ */
+ tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+ tcg_gen_andi_tl(t0, t3, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+ gen_set_label(l_apply_bias_l);
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ gen_set_label(l_half_done);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+ tcg_gen_andi_tl(t0, t2, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+ gen_set_label(l_apply_bias_r);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ } else {
+ /* D16MULE doesn't support unbiased rounding */
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ }
+ gen_set_label(l_done);
+ }
+ if (!packed_result) {
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+ } else {
+ tcg_gen_andi_tl(t3, t3, 0xffff0000);
+ tcg_gen_shri_tl(t2, t2, 16);
+ tcg_gen_or_tl(t3, t3, t2);
+ gen_store_mxu_gpr(t3, XRa);
+ }
}
/*
- * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
- * and accumulate
+ * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
+ * Signed 16 bit pattern multiply and accumulate
+ * D16MACF XRa, XRb, XRc, aptn2, optn2
+ * Signed Q15 fraction pattern multiply accumulate and pack
+ * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
+ * Signed Q15 fraction pattern multiply and accumulate
*/
-static void gen_mxu_d16mac(DisasContext *ctx)
+static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
+ bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
@@ -704,6 +1128,11 @@ static void gen_mxu_d16mac(DisasContext *ctx)
tcg_gen_mul_tl(t2, t1, t2);
break;
}
+
+ if (fractional) {
+ tcg_gen_shli_tl(t3, t3, 1);
+ tcg_gen_shli_tl(t2, t2, 1);
+ }
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
@@ -725,18 +1154,205 @@ static void gen_mxu_d16mac(DisasContext *ctx)
tcg_gen_sub_tl(t2, t1, t2);
break;
}
- gen_store_mxu_gpr(t3, XRa);
- gen_store_mxu_gpr(t2, XRd);
+
+ if (fractional) {
+ TCGLabel *l_done = gen_new_label();
+ TCGv rounding = tcg_temp_new();
+
+ tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+ if (packed_result) {
+ TCGLabel *l_apply_bias_l = gen_new_label();
+ TCGLabel *l_apply_bias_r = gen_new_label();
+ TCGLabel *l_half_done = gen_new_label();
+ TCGv bias = tcg_temp_new();
+
+ /*
+ * D16MACF supports unbiased rounding aka "bankers rounding",
+ * "round to even", "convergent rounding"
+ */
+ tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+ tcg_gen_andi_tl(t0, t3, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+ gen_set_label(l_apply_bias_l);
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ gen_set_label(l_half_done);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+ tcg_gen_andi_tl(t0, t2, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+ gen_set_label(l_apply_bias_r);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ } else {
+ /* D16MACE doesn't support unbiased rounding */
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ }
+ gen_set_label(l_done);
+ }
+
+ if (!packed_result) {
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+ } else {
+ tcg_gen_andi_tl(t3, t3, 0xffff0000);
+ tcg_gen_shri_tl(t2, t2, 16);
+ tcg_gen_or_tl(t3, t3, t2);
+ gen_store_mxu_gpr(t3, XRa);
+ }
}
/*
- * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
- * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
+ * unsigned 16 bit pattern multiply and add/subtract.
*/
-static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+static void gen_mxu_d16madl(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3;
+ uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_sextract_tl(t0, t1, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_sextract_tl(t2, t3, 0, 16);
+ tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ }
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 0, 16);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ switch (aptn2) {
+ case MXU_APTN2_AA:
+ tcg_gen_add_tl(t3, t1, t3);
+ tcg_gen_add_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_AS:
+ tcg_gen_add_tl(t3, t1, t3);
+ tcg_gen_sub_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_SA:
+ tcg_gen_sub_tl(t3, t1, t3);
+ tcg_gen_add_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_SS:
+ tcg_gen_sub_tl(t3, t1, t3);
+ tcg_gen_sub_tl(t2, t0, t2);
+ break;
+ }
+
+ tcg_gen_andi_tl(t2, t2, 0xffff);
+ tcg_gen_shli_tl(t3, t3, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
+}
+
+/*
+ * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
+ * signed 16 bit pattern multiply and 32-bit add/subtract.
+ */
+static void gen_mxu_s16mad(DisasContext *ctx)
+{
+ TCGv t0, t1;
+ uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ aptn1 = extract32(ctx->opcode, 24, 1);
+ pad = extract32(ctx->opcode, 25, 1);
+
+ if (pad) {
+ /* FIXME check if it influence the result */
+ }
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H*XRC.H */
+ tcg_gen_sextract_tl(t0, t0, 16, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L*XRC.L */
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 0, 16);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H*XRC.L */
+ tcg_gen_sextract_tl(t0, t0, 16, 16);
+ tcg_gen_sextract_tl(t1, t1, 0, 16);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L*XRC.H */
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+ break;
+ }
+ tcg_gen_mul_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (aptn1) {
+ case MXU_APTN1_A:
+ tcg_gen_add_tl(t1, t1, t0);
+ break;
+ case MXU_APTN1_S:
+ tcg_gen_sub_tl(t1, t1, t0);
+ break;
+ }
+
+ gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * and accumulate
+ * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * and accumulate
+ */
+static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
- uint32_t XRa, XRb, XRc, XRd, sel;
+ uint32_t XRa, XRb, XRc, XRd, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
@@ -751,90 +1367,311 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
- sel = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
- if (sel == 0x2) {
- /* Q8MULSU */
- tcg_gen_ext8s_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t3, t3);
+ if (su) {
+ /* Q8MULSU / Q8MACSU */
+ tcg_gen_sextract_tl(t0, t3, 0, 8);
+ tcg_gen_sextract_tl(t1, t3, 8, 8);
+ tcg_gen_sextract_tl(t2, t3, 16, 8);
+ tcg_gen_sextract_tl(t3, t3, 24, 8);
} else {
- /* Q8MUL */
- tcg_gen_ext8u_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t3, t3);
- }
-
- tcg_gen_ext8u_tl(t4, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t5, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t6, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t7, t7);
+ /* Q8MUL / Q8MAC */
+ tcg_gen_extract_tl(t0, t3, 0, 8);
+ tcg_gen_extract_tl(t1, t3, 8, 8);
+ tcg_gen_extract_tl(t2, t3, 16, 8);
+ tcg_gen_extract_tl(t3, t3, 24, 8);
+ }
+
+ tcg_gen_extract_tl(t4, t7, 0, 8);
+ tcg_gen_extract_tl(t5, t7, 8, 8);
+ tcg_gen_extract_tl(t6, t7, 16, 8);
+ tcg_gen_extract_tl(t7, t7, 24, 8);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
- tcg_gen_andi_tl(t0, t0, 0xFFFF);
- tcg_gen_andi_tl(t1, t1, 0xFFFF);
- tcg_gen_andi_tl(t2, t2, 0xFFFF);
- tcg_gen_andi_tl(t3, t3, 0xFFFF);
-
- tcg_gen_shli_tl(t1, t1, 16);
- tcg_gen_shli_tl(t3, t3, 16);
+ if (mac) {
+ gen_load_mxu_gpr(t4, XRd);
+ gen_load_mxu_gpr(t5, XRa);
+ tcg_gen_extract_tl(t6, t4, 0, 16);
+ tcg_gen_extract_tl(t7, t4, 16, 16);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t6, t0);
+ tcg_gen_sub_tl(t1, t7, t1);
+ } else {
+ tcg_gen_add_tl(t0, t6, t0);
+ tcg_gen_add_tl(t1, t7, t1);
+ }
+ tcg_gen_extract_tl(t6, t5, 0, 16);
+ tcg_gen_extract_tl(t7, t5, 16, 16);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t6, t2);
+ tcg_gen_sub_tl(t3, t7, t3);
+ } else {
+ tcg_gen_add_tl(t2, t6, t2);
+ tcg_gen_add_tl(t3, t7, t3);
+ }
+ }
- tcg_gen_or_tl(t0, t0, t1);
- tcg_gen_or_tl(t1, t2, t3);
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRd);
gen_store_mxu_gpr(t1, XRa);
}
/*
+ * Q8MADL XRd, XRa, XRb, XRc
+ * Parallel quad unsigned 8 bit multiply and accumulate.
+ * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
+ */
+static void gen_mxu_q8madl(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3, t4, t5, t6, t7;
+ uint32_t XRa, XRb, XRc, XRd, aptn2;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+ t4 = tcg_temp_new();
+ t5 = tcg_temp_new();
+ t6 = tcg_temp_new();
+ t7 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ aptn2 = extract32(ctx->opcode, 24, 2);
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t7, XRc);
+
+ tcg_gen_extract_tl(t0, t3, 0, 8);
+ tcg_gen_extract_tl(t1, t3, 8, 8);
+ tcg_gen_extract_tl(t2, t3, 16, 8);
+ tcg_gen_extract_tl(t3, t3, 24, 8);
+
+ tcg_gen_extract_tl(t4, t7, 0, 8);
+ tcg_gen_extract_tl(t5, t7, 8, 8);
+ tcg_gen_extract_tl(t6, t7, 16, 8);
+ tcg_gen_extract_tl(t7, t7, 24, 8);
+
+ tcg_gen_mul_tl(t0, t0, t4);
+ tcg_gen_mul_tl(t1, t1, t5);
+ tcg_gen_mul_tl(t2, t2, t6);
+ tcg_gen_mul_tl(t3, t3, t7);
+
+ gen_load_mxu_gpr(t4, XRa);
+ tcg_gen_extract_tl(t6, t4, 0, 8);
+ tcg_gen_extract_tl(t7, t4, 8, 8);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t6, t0);
+ tcg_gen_sub_tl(t1, t7, t1);
+ } else {
+ tcg_gen_add_tl(t0, t6, t0);
+ tcg_gen_add_tl(t1, t7, t1);
+ }
+ tcg_gen_extract_tl(t6, t4, 16, 8);
+ tcg_gen_extract_tl(t7, t4, 24, 8);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t6, t2);
+ tcg_gen_sub_tl(t3, t7, t3);
+ } else {
+ tcg_gen_add_tl(t2, t6, t2);
+ tcg_gen_add_tl(t3, t7, t3);
+ }
+
+ tcg_gen_andi_tl(t5, t0, 0xff);
+ tcg_gen_deposit_tl(t5, t5, t1, 8, 8);
+ tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
+ tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
+
+ gen_store_mxu_gpr(t5, XRd);
+}
+
+/*
* S32LDD XRa, Rb, S12 - Load a word from memory to XRF
- * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
+ * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32LDI XRa, Rb, S12 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
+ * post modify base address GPR and load in reversed byte seq.
*/
-static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
+static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
{
TCGv t0, t1;
- uint32_t XRa, Rb, s12, sel;
+ uint32_t XRa, Rb, s12;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
- s12 = extract32(ctx->opcode, 10, 10);
- sel = extract32(ctx->opcode, 20, 1);
+ s12 = sextract32(ctx->opcode, 10, 10);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
+ tcg_gen_movi_tl(t1, s12 * 4);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+ gen_store_mxu_gpr(t1, XRa);
- tcg_gen_movi_tl(t1, s12);
- tcg_gen_shli_tl(t1, t1, 2);
- if (s12 & 0x200) {
- tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
}
- tcg_gen_add_tl(t1, t0, t1);
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
- ctx->default_tcg_memop_mask);
+}
+
+/*
+ * S32STD XRa, Rb, S12 - Store a word from XRF to memory
+ * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
+ * in reversed byte seq.
+ * S32SDI XRa, Rb, S12 - Store a word from XRF to memory,
+ * post modify base address GPR.
+ * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
+ * post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, s12;
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s12 = sextract32(ctx->opcode, 10, 10);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_movi_tl(t1, s12 * 4);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
+}
+
+/*
+ * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR and load in reversed byte seq.
+ */
+static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
+ bool postinc, uint32_t strd2)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
}
+/*
+ * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
+ * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ * sign extending to GPR size.
+ * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ * sign extending to GPR size.
+ * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ * zero extending to GPR size.
+ * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ * zero extending to GPR size.
+ */
+static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
+{
+ TCGv t0, t1;
+ uint32_t Ra, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ Ra = extract32(ctx->opcode, 11, 5);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
+ gen_store_gpr(t1, Ra);
+}
+
+/*
+ * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
+ bool postinc, uint32_t strd2)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
+}
/*
* MXU instruction category: logic
@@ -981,13 +1818,291 @@ static void gen_mxu_S32XOR(DisasContext *ctx)
}
}
+/*
+ * MXU instruction category: shift
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * D32SLL D32SLR D32SAR D32SARL
+ * D32SLLV D32SLRV D32SARV D32SARW
+ * Q16SLL Q16SLR Q16SAR
+ * Q16SLLV Q16SLRV Q16SARV
+ */
+
+/*
+ * D32SLL XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift left from XRb and XRc to SFT4
+ * bits (0..15). Store to XRa and XRd respectively.
+ * D32SLR XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift logic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ * D32SAR XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift arithmetic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRb, XRc, XRd, sft4;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ sft4 = extract32(ctx->opcode, 22, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sari_tl(t0, t0, sft4);
+ tcg_gen_sari_tl(t1, t1, sft4);
+ } else {
+ tcg_gen_shri_tl(t0, t0, sft4);
+ tcg_gen_shri_tl(t1, t1, sft4);
+ }
+ } else {
+ tcg_gen_shli_tl(t0, t0, sft4);
+ tcg_gen_shli_tl(t1, t1, sft4);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t1, XRd);
+}
/*
- * MXU instruction category max/min
+ * D32SLLV XRa, XRd, rs
+ * Dual 32-bit shift left from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ * D32SLRV XRa, XRd, rs
+ * Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ * D32SARV XRa, XRd, rs
+ * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRd, rs;
+
+ XRa = extract32(ctx->opcode, 10, 4);
+ XRd = extract32(ctx->opcode, 14, 4);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t1, XRd);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x0f);
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sar_tl(t0, t0, t2);
+ tcg_gen_sar_tl(t1, t1, t2);
+ } else {
+ tcg_gen_shr_tl(t0, t0, t2);
+ tcg_gen_shr_tl(t1, t1, t2);
+ }
+ } else {
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_shl_tl(t1, t1, t2);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ * D32SARL XRa, XRb, XRc, SFT4
+ * Dual shift arithmetic right 32-bit integers in XRb and XRc
+ * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
+ *
+ * D32SARW XRa, XRb, XRc, rb
+ * Dual shift arithmetic right 32-bit integers in XRb and XRc
+ * to rb[3:0] bits. Pack 16 LSBs of each into XRa.
+ */
+static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
+{
+ uint32_t XRa, XRb, XRc, rb;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ rb = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (!sarw) {
+ /* Make SFT4 from rb field */
+ tcg_gen_movi_tl(t2, rb >> 1);
+ } else {
+ gen_load_gpr(t2, rb);
+ tcg_gen_andi_tl(t2, t2, 0x0f);
+ }
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ tcg_gen_sar_tl(t0, t0, t2);
+ tcg_gen_sar_tl(t1, t1, t2);
+ tcg_gen_extract_tl(t2, t1, 0, 16);
+ tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q16SLL XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift left from XRb and XRc to SFT4
+ * bits (0..15). Store to XRa and XRd respectively.
+ * Q16SLR XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift logic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ * Q16SAR XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift arithmetic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRb, XRc, XRd, sft4;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ sft4 = extract32(ctx->opcode, 22, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t2, XRc);
+
+ if (arithmetic) {
+ tcg_gen_sextract_tl(t1, t0, 16, 16);
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t3, t2, 16, 16);
+ tcg_gen_sextract_tl(t2, t2, 0, 16);
+ } else {
+ tcg_gen_extract_tl(t1, t0, 16, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t3, t2, 16, 16);
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ }
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sari_tl(t0, t0, sft4);
+ tcg_gen_sari_tl(t1, t1, sft4);
+ tcg_gen_sari_tl(t2, t2, sft4);
+ tcg_gen_sari_tl(t3, t3, sft4);
+ } else {
+ tcg_gen_shri_tl(t0, t0, sft4);
+ tcg_gen_shri_tl(t1, t1, sft4);
+ tcg_gen_shri_tl(t2, t2, sft4);
+ tcg_gen_shri_tl(t3, t3, sft4);
+ }
+ } else {
+ tcg_gen_shli_tl(t0, t0, sft4);
+ tcg_gen_shli_tl(t1, t1, sft4);
+ tcg_gen_shli_tl(t2, t2, sft4);
+ tcg_gen_shli_tl(t3, t3, sft4);
+ }
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ * Q16SLLV XRa, XRd, rs
+ * Quad 16-bit shift left from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ * Q16SLRV XRa, XRd, rs
+ * Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ * Q16SARV XRa, XRd, rs
+ * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRd, rs;
+
+ XRa = extract32(ctx->opcode, 10, 4);
+ XRd = extract32(ctx->opcode, 14, 4);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t2, XRd);
+ gen_load_gpr(t5, rs);
+ tcg_gen_andi_tl(t5, t5, 0x0f);
+
+
+ if (arithmetic) {
+ tcg_gen_sextract_tl(t1, t0, 16, 16);
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t3, t2, 16, 16);
+ tcg_gen_sextract_tl(t2, t2, 0, 16);
+ } else {
+ tcg_gen_extract_tl(t1, t0, 16, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t3, t2, 16, 16);
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ }
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sar_tl(t0, t0, t5);
+ tcg_gen_sar_tl(t1, t1, t5);
+ tcg_gen_sar_tl(t2, t2, t5);
+ tcg_gen_sar_tl(t3, t3, t5);
+ } else {
+ tcg_gen_shr_tl(t0, t0, t5);
+ tcg_gen_shr_tl(t1, t1, t5);
+ tcg_gen_shr_tl(t2, t2, t5);
+ tcg_gen_shr_tl(t3, t3, t5);
+ }
+ } else {
+ tcg_gen_shl_tl(t0, t0, t5);
+ tcg_gen_shl_tl(t1, t1, t5);
+ tcg_gen_shl_tl(t2, t2, t5);
+ tcg_gen_shl_tl(t3, t3, t5);
+ }
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ * MXU instruction category max/min/avg
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32MAX D16MAX Q8MAX
* S32MIN D16MIN Q8MIN
+ * S32SLT D16SLT Q8SLT
+ * Q8SLTU
+ * D16AVG Q8AVG
+ * D16AVGR Q8AVGR
+ * S32MOVZ D16MOVZ Q8MOVZ
+ * S32MOVN D16MOVN Q8MOVN
*/
/*
@@ -1072,13 +2187,14 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* ...and do half-word-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
+ TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
@@ -1094,7 +2210,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1102,14 +2218,15 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
+ TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
@@ -1127,7 +2244,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
}
}
@@ -1163,14 +2280,15 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* ...and do byte-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
+ TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost byte (byte 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
@@ -1188,8 +2306,9 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(t2, t2, t0);
}
+ gen_store_mxu_gpr(t2, XRa);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1197,15 +2316,16 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
+ TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost bytes (bytes 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
@@ -1225,11 +2345,1741 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(t2, t2, t0);
}
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc
+ * on per-byte basis.
+ * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ *
+ * Q8SLTU
+ * Update XRa with the unsigned "set less than" comparison of XRb and XRc
+ * on per-byte basis.
+ * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ */
+static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ if (sltu) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+ } else {
+ tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
+ }
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * S32SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc.
+ * a.k.a. XRa = XRb < XRc ? 1 : 0;
+ */
+static void gen_mxu_S32SLT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
+ mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
+/*
+ * D16SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc
+ * on per-word basis.
+ * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
+ */
+static void gen_mxu_D16SLT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_sextract_tl(t0, t3, 16, 16);
+ tcg_gen_sextract_tl(t1, t4, 16, 16);
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_shli_tl(t2, t0, 16);
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t4, 0, 16);
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
+ }
+}
+
+/*
+ * D16AVG
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, rounding down.
+ * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
+ *
+ * D16AVGR
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, math rounding 4/5.
+ * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
+ */
+static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to same */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_sextract_tl(t0, t3, 16, 16);
+ tcg_gen_sextract_tl(t1, t4, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shli_tl(t2, t0, 15);
+ tcg_gen_andi_tl(t2, t2, 0xffff0000);
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t4, 0, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shri_tl(t0, t0, 1);
+ tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8AVG
+ * Update XRa with the signed average of XRb and XRc
+ * on per-byte basis, rounding down.
+ * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
+ *
+ * Q8AVGR
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, math rounding 4/5.
+ * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
+ */
+static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to same */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shri_tl(t0, t0, 1);
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8MOVZ
+ * Quadruple 8-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
+ *
+ * Q8MOVN
+ * Quadruple 8-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
+ */
+static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_quarterdone = gen_new_label();
+ TCGLabel *l_halfdone = gen_new_label();
+ TCGLabel *l_quarterrest = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+ gen_load_mxu_gpr(t2, XRa);
+
+ tcg_gen_extract_tl(t3, t1, 24, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
+ tcg_gen_extract_tl(t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
+
+ gen_set_label(l_quarterdone);
+ tcg_gen_extract_tl(t3, t1, 16, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+ tcg_gen_extract_tl(t3, t0, 16, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
+
+ gen_set_label(l_halfdone);
+ tcg_gen_extract_tl(t3, t1, 8, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
+ tcg_gen_extract_tl(t3, t0, 8, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
+
+ gen_set_label(l_quarterrest);
+ tcg_gen_extract_tl(t3, t1, 0, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+ tcg_gen_extract_tl(t3, t0, 0, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ * D16MOVZ
+ * Double 16-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
+ *
+ * D16MOVN
+ * Double 16-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
+ */
+static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_halfdone = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+ gen_load_mxu_gpr(t2, XRa);
+
+ tcg_gen_extract_tl(t3, t1, 16, 16);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+ tcg_gen_extract_tl(t3, t0, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_set_label(l_halfdone);
+ tcg_gen_extract_tl(t3, t1, 0, 16);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+ tcg_gen_extract_tl(t3, t0, 0, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ * S32MOVZ
+ * Quadruple 32-bit conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb == 0) { XRa = XRc }
+ *
+ * S32MOVN
+ * Single 32-bit conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb != 0) { XRa = XRc }
+ */
+static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+
+ tcg_gen_brcondi_tl(cond, t1, 0, l_done);
+ gen_store_mxu_gpr(t0, XRa);
+ gen_set_label(l_done);
+}
+
+/*
+ * MXU instruction category: Addition and subtraction
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * S32CPS D16CPS
+ * Q8ADD
+ */
+
+/*
+ * S32CPS
+ * Update XRa if XRc < 0 by value of 0 - XRb
+ * else XRa = XRb
+ */
+static void gen_mxu_S32CPS(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely(XRb == 0)) {
+ /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRc == 0)) {
+ /* condition always false -> just move XRb to XRa */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGLabel *l_not_less = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
+ tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
+ tcg_gen_br(l_done);
+ gen_set_label(l_not_less);
+ gen_load_mxu_gpr(t0, XRb);
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
+
+/*
+ * D16CPS
+ * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
+ * else XRa[0..1] = XRb[0..1]
+ */
+static void gen_mxu_D16CPS(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely(XRb == 0)) {
+ /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRc == 0)) {
+ /* condition always false -> just move XRb to XRa */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGLabel *l_done_hi = gen_new_label();
+ TCGLabel *l_not_less_lo = gen_new_label();
+ TCGLabel *l_done_lo = gen_new_label();
+
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
+ tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
+ tcg_gen_subfi_tl(t1, 0, t1);
+
+ gen_set_label(l_done_hi);
+ tcg_gen_shli_i32(t1, t1, 16);
+
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
+ tcg_gen_subfi_tl(t0, 0, t0);
+ tcg_gen_br(l_done_lo);
+
+ gen_set_label(l_not_less_lo);
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
+
+ gen_set_label(l_done_lo);
+ tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
+ }
+}
+
+/*
+ * Q8ABD XRa, XRb, XRc
+ * Gets absolute difference for quadruple of 8-bit
+ * packed in XRb to another one in XRc,
+ * put the result in XRa.
+ * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
+ */
+static void gen_mxu_Q8ABD(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_abs_tl(t0, t0);
+
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8ADD XRa, XRb, XRc, ptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, put the result in XRa.
+ */
+static void gen_mxu_Q8ADD(DisasContext *ctx)
+{
+ uint32_t aptn2, pad, XRc, XRb, XRa;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_andi_tl(t0, t3, 0xff);
+ tcg_gen_andi_tl(t1, t4, 0xff);
+
+ if (i < 2) {
+ if (aptn2 & 0x01) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ }
+ } else {
+ if (aptn2 & 0x02) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ }
+ }
+ if (i < 3) {
+ tcg_gen_shri_tl(t3, t3, 8);
+ tcg_gen_shri_tl(t4, t4, 8);
+ }
+ if (i > 0) {
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ } else {
+ tcg_gen_andi_tl(t0, t0, 0xff);
+ tcg_gen_mov_tl(t2, t0);
+ }
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8ADDE XRa, XRb, XRc, XRd, aptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, with zero extending
+ * to 16-bit and put results as packed 16-bit data
+ * into XRa and XRd.
+ * aptn2 manages action add or subract of pairs of data.
+ *
+ * Q8ACCE XRa, XRb, XRc, XRd, aptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, with zero extending
+ * to 16-bit and accumulate results as packed 16-bit data
+ * into XRa and XRd.
+ * aptn2 manages action add or subract of pairs of data.
+ */
+static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
+{
+ uint32_t aptn2, XRd, XRc, XRb, XRa;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ if (XRa != 0) {
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ }
+ if (XRd != 0) {
+ tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
+ }
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ if (XRa != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_sub_tl(t2, t2, t3);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ if (accumulate) {
+ gen_load_mxu_gpr(t5, XRa);
+ tcg_gen_extract_tl(t1, t5, 0, 16);
+ tcg_gen_extract_tl(t3, t5, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ tcg_gen_shli_tl(t2, t2, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_or_tl(t4, t2, t0);
+ }
+ if (XRd != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_sub_tl(t2, t2, t3);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ if (accumulate) {
+ gen_load_mxu_gpr(t5, XRd);
+ tcg_gen_extract_tl(t1, t5, 0, 16);
+ tcg_gen_extract_tl(t3, t5, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ tcg_gen_shli_tl(t2, t2, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_or_tl(t5, t2, t0);
+ }
+
+ gen_store_mxu_gpr(t4, XRa);
+ gen_store_mxu_gpr(t5, XRd);
+ }
+}
+
+/*
+ * D8SUM XRa, XRb, XRc
+ * Double parallel add of quadruple unsigned 8-bit together
+ * with zero extending to 16-bit data.
+ * D8SUMC XRa, XRb, XRc
+ * Double parallel add of quadruple unsigned 8-bit together
+ * with zero extending to 16-bit data and adding 2 to each
+ * parallel result.
+ */
+static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
+{
+ uint32_t pad, pad2, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 24, 2);
+ pad2 = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0 || pad2 != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ if (XRb != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
+ tcg_gen_add_tl(t4, t0, t1);
+ tcg_gen_add_tl(t4, t4, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ } else {
+ tcg_gen_mov_tl(t4, 0);
+ }
+ if (XRc != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+ tcg_gen_add_tl(t5, t0, t1);
+ tcg_gen_add_tl(t5, t5, t2);
+ tcg_gen_add_tl(t5, t5, t3);
+ } else {
+ tcg_gen_mov_tl(t5, 0);
+ }
+
+ if (sumc) {
+ tcg_gen_addi_tl(t4, t4, 2);
+ tcg_gen_addi_tl(t5, t5, 2);
+ }
+ tcg_gen_shli_tl(t4, t4, 16);
+
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+ }
+}
+
+/*
+ * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
+ * 16-bit pattern addition.
+ */
+static void gen_mxu_q16add(DisasContext *ctx)
+{
+ uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_extract_tl(t2, t3, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 16, 16);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t1);
+ tcg_gen_mov_tl(t5, t0);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t0);
+ tcg_gen_mov_tl(t5, t0);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t1);
+ tcg_gen_mov_tl(t5, t1);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t0);
+ tcg_gen_mov_tl(t5, t1);
+ break;
+ }
+
+ switch (aptn2) {
+ case MXU_APTN2_AA: /* lop +, rop + */
+ tcg_gen_add_tl(t0, t4, t3);
+ tcg_gen_add_tl(t1, t5, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ tcg_gen_add_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_AS: /* lop +, rop + */
+ tcg_gen_sub_tl(t0, t4, t3);
+ tcg_gen_sub_tl(t1, t5, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ tcg_gen_add_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_SA: /* lop +, rop + */
+ tcg_gen_add_tl(t0, t4, t3);
+ tcg_gen_add_tl(t1, t5, t2);
+ tcg_gen_sub_tl(t4, t4, t3);
+ tcg_gen_sub_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_SS: /* lop +, rop + */
+ tcg_gen_sub_tl(t0, t4, t3);
+ tcg_gen_sub_tl(t1, t5, t2);
+ tcg_gen_sub_tl(t4, t4, t3);
+ tcg_gen_sub_tl(t5, t5, t2);
+ break;
+ }
+
+ tcg_gen_shli_tl(t0, t0, 16);
+ tcg_gen_extract_tl(t1, t1, 0, 16);
+ tcg_gen_shli_tl(t4, t4, 16);
+ tcg_gen_extract_tl(t5, t5, 0, 16);
+
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
+}
+
+/*
+ * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit addition/subtraction with accumulate.
+ */
+static void gen_mxu_q16acc(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv s3 = tcg_temp_new();
+ TCGv s2 = tcg_temp_new();
+ TCGv s1 = tcg_temp_new();
+ TCGv s0 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_extract_tl(t2, t3, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 16, 16);
+
+ switch (aptn2) {
+ case MXU_APTN2_AA: /* lop +, rop + */
+ tcg_gen_add_tl(s3, t1, t3);
+ tcg_gen_add_tl(s2, t0, t2);
+ tcg_gen_add_tl(s1, t1, t3);
+ tcg_gen_add_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_AS: /* lop +, rop - */
+ tcg_gen_sub_tl(s3, t1, t3);
+ tcg_gen_sub_tl(s2, t0, t2);
+ tcg_gen_add_tl(s1, t1, t3);
+ tcg_gen_add_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_SA: /* lop -, rop + */
+ tcg_gen_add_tl(s3, t1, t3);
+ tcg_gen_add_tl(s2, t0, t2);
+ tcg_gen_sub_tl(s1, t1, t3);
+ tcg_gen_sub_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_SS: /* lop -, rop - */
+ tcg_gen_sub_tl(s3, t1, t3);
+ tcg_gen_sub_tl(s2, t0, t2);
+ tcg_gen_sub_tl(s1, t1, t3);
+ tcg_gen_sub_tl(s0, t0, t2);
+ break;
+ }
+
+ if (XRa != 0) {
+ tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
+ tcg_gen_add_tl(t1, t1, s1);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
+ }
+
+ if (XRd != 0) {
+ tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
+ tcg_gen_add_tl(t1, t1, s3);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
+ }
+}
+
+/*
+ * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit accumulate.
+ */
+static void gen_mxu_q16accm(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+
+ if (XRa != 0) {
+ TCGv a0 = tcg_temp_new();
+ TCGv a1 = tcg_temp_new();
+
+ tcg_gen_extract_tl(t0, t2, 0, 16);
+ tcg_gen_extract_tl(t1, t2, 16, 16);
+
+ gen_load_mxu_gpr(a1, XRa);
+ tcg_gen_extract_tl(a0, a1, 0, 16);
+ tcg_gen_extract_tl(a1, a1, 16, 16);
+
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(a0, a0, t0);
+ tcg_gen_sub_tl(a1, a1, t1);
+ } else {
+ tcg_gen_add_tl(a0, a0, t0);
+ tcg_gen_add_tl(a1, a1, t1);
+ }
+ tcg_gen_extract_tl(a0, a0, 0, 16);
+ tcg_gen_shli_tl(a1, a1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
+ }
+
+ if (XRd != 0) {
+ TCGv a0 = tcg_temp_new();
+ TCGv a1 = tcg_temp_new();
+
+ tcg_gen_extract_tl(t0, t3, 0, 16);
+ tcg_gen_extract_tl(t1, t3, 16, 16);
+
+ gen_load_mxu_gpr(a1, XRd);
+ tcg_gen_extract_tl(a0, a1, 0, 16);
+ tcg_gen_extract_tl(a1, a1, 16, 16);
+
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(a0, a0, t0);
+ tcg_gen_sub_tl(a1, a1, t1);
+ } else {
+ tcg_gen_add_tl(a0, a0, t0);
+ tcg_gen_add_tl(a1, a1, t1);
+ }
+ tcg_gen_extract_tl(a0, a0, 0, 16);
+ tcg_gen_shli_tl(a1, a1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
+ }
+}
+
+
+/*
+ * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
+ * 16-bit sign extended addition and accumulate.
+ */
+static void gen_mxu_d16asum(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+
+ if (XRa != 0) {
+ tcg_gen_sextract_tl(t0, t2, 0, 16);
+ tcg_gen_sextract_tl(t1, t2, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ }
+
+ if (XRd != 0) {
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t3, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+ }
+ }
+}
+
+/*
+ * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction, set carry.
+ *
+ * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction with carry.
+ */
+static void gen_mxu_d32add(DisasContext *ctx)
+{
+ uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ addc = extract32(ctx->opcode, 22, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv cr = tcg_temp_new();
+
+ if (unlikely(addc > 1)) {
+ /* opcode incorrect -> do nothing */
+ } else if (addc == 1) {
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* FIXME ??? What if XRa == XRd ??? */
+ /* aptn2 is unused here */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_mxu_cr(cr);
+ if (XRa != 0) {
+ tcg_gen_extract_tl(t2, cr, 31, 1);
+ tcg_gen_add_tl(t0, t0, t2);
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ if (XRd != 0) {
+ tcg_gen_extract_tl(t2, cr, 30, 1);
+ tcg_gen_add_tl(t1, t1, t2);
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ }
+ }
+ } else if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ /* FIXME ??? What if XRa == XRd ??? */
+ TCGv carry = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_mxu_cr(cr);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+ } else {
+ tcg_gen_add_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+ }
+ tcg_gen_andi_tl(cr, cr, 0x7fffffff);
+ tcg_gen_shli_tl(carry, carry, 31);
+ tcg_gen_or_tl(cr, cr, carry);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+ } else {
+ tcg_gen_add_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+ }
+ tcg_gen_andi_tl(cr, cr, 0xbfffffff);
+ tcg_gen_shli_tl(carry, carry, 30);
+ tcg_gen_or_tl(cr, cr, carry);
+ gen_store_mxu_gpr(t2, XRd);
+ }
+ gen_store_mxu_cr(cr);
+ }
+}
+
+/*
+ * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32acc(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ } else {
+ tcg_gen_add_tl(t2, t0, t1);
+ }
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ } else {
+ tcg_gen_add_tl(t2, t0, t1);
+ }
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ }
+ }
+}
+
+/*
+ * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32accm(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ tcg_gen_add_tl(t2, t0, t1);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ }
+ }
+ if (XRd != 0) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ }
+ }
+ }
+}
+
+/*
+ * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction.
+ */
+static void gen_mxu_d32asum(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ }
+ }
+ }
+}
+
+/*
+ * MXU instruction category: Miscellaneous
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * S32EXTR S32LUI
+ * S32EXTRV
+ * Q16SAT
+ * Q16SCOP
+ */
+
+/*
+ * S32EXTR XRa, XRd, rs, bits5
+ * Extract bits5 bits from 64-bit pair {XRa:XRd}
+ * starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extr(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3;
+ uint32_t XRa, XRd, rs, bits5;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ bits5 = extract32(ctx->opcode, 16, 5);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
+ /* {XRa} = extract({tmp}, 0, bits5); */
+ if (bits5 > 0) {
+ TCGLabel *l_xra_only = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRd);
+ gen_load_mxu_gpr(t1, XRa);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x1f);
+ tcg_gen_subfi_tl(t2, 32, t2);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
+ tcg_gen_subfi_tl(t2, bits5, t2);
+ tcg_gen_subfi_tl(t3, 32, t2);
+ tcg_gen_shr_tl(t0, t0, t3);
+ tcg_gen_shl_tl(t1, t1, t2);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_br(l_done);
+ gen_set_label(l_xra_only);
+ tcg_gen_subi_tl(t2, t2, bits5);
+ tcg_gen_shr_tl(t0, t1, t2);
+ gen_set_label(l_done);
+ tcg_gen_extract_tl(t0, t0, 0, bits5);
+ } else {
+ /* unspecified behavior but matches tests on real hardware*/
+ tcg_gen_movi_tl(t0, 0);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S32EXTRV XRa, XRd, rs, rt
+ * Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
+ * starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extrv(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3, t4;
+ uint32_t XRa, XRd, rs, rt;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+ t4 = tcg_temp_new();
+ TCGLabel *l_xra_only = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+ TCGLabel *l_zero = gen_new_label();
+ TCGLabel *l_extract = gen_new_label();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ rt = extract32(ctx->opcode, 16, 5);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
+ gen_load_mxu_gpr(t0, XRd);
+ gen_load_mxu_gpr(t1, XRa);
+ gen_load_gpr(t2, rs);
+ gen_load_gpr(t4, rt);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
+ tcg_gen_andi_tl(t2, t2, 0x1f);
+ tcg_gen_subfi_tl(t2, 32, t2);
+ tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
+ tcg_gen_sub_tl(t2, t4, t2);
+ tcg_gen_subfi_tl(t3, 32, t2);
+ tcg_gen_shr_tl(t0, t0, t3);
+ tcg_gen_shl_tl(t1, t1, t2);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_br(l_extract);
+
+ gen_set_label(l_xra_only);
+ tcg_gen_sub_tl(t2, t2, t4);
+ tcg_gen_shr_tl(t0, t1, t2);
+ tcg_gen_br(l_extract);
+
+ /* unspecified behavior but matches tests on real hardware*/
+ gen_set_label(l_zero);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_done);
+
+ /* {XRa} = extract({tmp}, 0, rt) */
+ gen_set_label(l_extract);
+ tcg_gen_subfi_tl(t4, 32, t4);
+ tcg_gen_shl_tl(t0, t0, t4);
+ tcg_gen_shr_tl(t0, t0, t4);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S32LUI XRa, S8, optn3
+ * Permutate the immediate S8 value to form a word
+ * to update XRa.
+ */
+static void gen_mxu_s32lui(DisasContext *ctx)
+{
+ uint32_t XRa, s8, optn3, pad;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s8 = extract32(ctx->opcode, 10, 8);
+ pad = extract32(ctx->opcode, 21, 2);
+ optn3 = extract32(ctx->opcode, 23, 3);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ uint32_t s16;
+ TCGv t0 = tcg_temp_new();
+
+ switch (optn3) {
+ case 0:
+ tcg_gen_movi_tl(t0, s8);
+ break;
+ case 1:
+ tcg_gen_movi_tl(t0, s8 << 8);
+ break;
+ case 2:
+ tcg_gen_movi_tl(t0, s8 << 16);
+ break;
+ case 3:
+ tcg_gen_movi_tl(t0, s8 << 24);
+ break;
+ case 4:
+ tcg_gen_movi_tl(t0, (s8 << 16) | s8);
+ break;
+ case 5:
+ tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
+ break;
+ case 6:
+ s16 = (uint16_t)(int16_t)(int8_t)s8;
+ tcg_gen_movi_tl(t0, (s16 << 16) | s16);
+ break;
+ case 7:
+ tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
+ break;
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
+
+/*
+ * Q16SAT XRa, XRb, XRc
+ * Packs four 16-bit signed integers in XRb and XRc to
+ * four saturated unsigned 8-bit into XRa.
+ *
+ */
+static void gen_mxu_Q16SAT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ tcg_gen_movi_tl(t2, 0);
+ if (XRb != 0) {
+ TCGLabel *l_less_hi = gen_new_label();
+ TCGLabel *l_less_lo = gen_new_label();
+ TCGLabel *l_lo = gen_new_label();
+ TCGLabel *l_greater_hi = gen_new_label();
+ TCGLabel *l_greater_lo = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_less_hi);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_greater_hi);
+ tcg_gen_movi_tl(t0, 255);
+
+ gen_set_label(l_lo);
+ tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
+ tcg_gen_sari_tl(t1, t1, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+ tcg_gen_br(l_done);
+ gen_set_label(l_less_lo);
+ tcg_gen_movi_tl(t1, 0);
+ tcg_gen_br(l_done);
+ gen_set_label(l_greater_lo);
+ tcg_gen_movi_tl(t1, 255);
+
+ gen_set_label(l_done);
+ tcg_gen_shli_tl(t2, t0, 24);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(t2, t2, t1);
+ }
+
+ if (XRc != 0) {
+ TCGLabel *l_less_hi = gen_new_label();
+ TCGLabel *l_less_lo = gen_new_label();
+ TCGLabel *l_lo = gen_new_label();
+ TCGLabel *l_greater_hi = gen_new_label();
+ TCGLabel *l_greater_lo = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_less_hi);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_greater_hi);
+ tcg_gen_movi_tl(t0, 255);
+
+ gen_set_label(l_lo);
+ tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
+ tcg_gen_sari_tl(t1, t1, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+ tcg_gen_br(l_done);
+ gen_set_label(l_less_lo);
+ tcg_gen_movi_tl(t1, 0);
+ tcg_gen_br(l_done);
+ gen_set_label(l_greater_lo);
+ tcg_gen_movi_tl(t1, 255);
+
+ gen_set_label(l_done);
+ tcg_gen_shli_tl(t0, t0, 8);
+ tcg_gen_or_tl(t2, t2, t0);
+ tcg_gen_or_tl(t2, t2, t1);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q16SCOP XRa, XRd, XRb, XRc
+ * Determine sign of quad packed 16-bit signed values
+ * in XRb and XRc put result in XRa and XRd respectively.
+ */
+static void gen_mxu_q16scop(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ TCGLabel *l_b_hi_lt = gen_new_label();
+ TCGLabel *l_b_hi_gt = gen_new_label();
+ TCGLabel *l_b_lo = gen_new_label();
+ TCGLabel *l_b_lo_lt = gen_new_label();
+ TCGLabel *l_c_hi = gen_new_label();
+ TCGLabel *l_c_hi_lt = gen_new_label();
+ TCGLabel *l_c_hi_gt = gen_new_label();
+ TCGLabel *l_c_lo = gen_new_label();
+ TCGLabel *l_c_lo_lt = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ tcg_gen_sextract_tl(t2, t0, 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
+ tcg_gen_movi_tl(t3, 0);
+ tcg_gen_br(l_b_lo);
+ gen_set_label(l_b_hi_lt);
+ tcg_gen_movi_tl(t3, 0xffff0000);
+ tcg_gen_br(l_b_lo);
+ gen_set_label(l_b_hi_gt);
+ tcg_gen_movi_tl(t3, 0x00010000);
+
+ gen_set_label(l_b_lo);
+ tcg_gen_sextract_tl(t2, t0, 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
+ tcg_gen_ori_tl(t3, t3, 0x00000001);
+ tcg_gen_br(l_c_hi);
+ gen_set_label(l_b_lo_lt);
+ tcg_gen_ori_tl(t3, t3, 0x0000ffff);
+ tcg_gen_br(l_c_hi);
+
+ gen_set_label(l_c_hi);
+ tcg_gen_sextract_tl(t2, t1, 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
+ tcg_gen_movi_tl(t4, 0);
+ tcg_gen_br(l_c_lo);
+ gen_set_label(l_c_hi_lt);
+ tcg_gen_movi_tl(t4, 0xffff0000);
+ tcg_gen_br(l_c_lo);
+ gen_set_label(l_c_hi_gt);
+ tcg_gen_movi_tl(t4, 0x00010000);
+
+ gen_set_label(l_c_lo);
+ tcg_gen_sextract_tl(t2, t1, 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
+ tcg_gen_ori_tl(t4, t4, 0x00000001);
+ tcg_gen_br(l_done);
+ gen_set_label(l_c_lo_lt);
+ tcg_gen_ori_tl(t4, t4, 0x0000ffff);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t4, XRd);
+}
+
+/*
+ * S32SFL XRa, XRd, XRb, XRc
+ * Shuffle bytes according to one of four patterns.
+ */
+static void gen_mxu_s32sfl(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa, ptn2;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+ ptn2 = extract32(ctx->opcode, 24, 2);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ switch (ptn2) {
+ case 0:
+ tcg_gen_andi_tl(t2, t0, 0xff000000);
+ tcg_gen_andi_tl(t3, t1, 0x000000ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t0, 8, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+ break;
+ case 1:
+ tcg_gen_andi_tl(t2, t0, 0xff000000);
+ tcg_gen_andi_tl(t3, t1, 0x000000ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t3, t3, t1, 8, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 8, 8);
+ break;
+ case 2:
+ tcg_gen_andi_tl(t2, t0, 0xff00ff00);
+ tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
+ tcg_gen_shri_tl(t0, t0, 16);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_shri_tl(t1, t1, 16);
+ tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+ break;
+ case 3:
+ tcg_gen_andi_tl(t2, t0, 0xffff0000);
+ tcg_gen_andi_tl(t3, t1, 0x0000ffff);
+ tcg_gen_shri_tl(t1, t1, 16);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 16);
+ tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
+ break;
+ }
+
+ gen_store_mxu_gpr(t2, XRa);
+ gen_store_mxu_gpr(t3, XRd);
+}
+
+/*
+ * Q8SAD XRa, XRd, XRb, XRc
+ * Typical SAD opration for motion estimation.
+ */
+static void gen_mxu_q8sad(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+ gen_load_mxu_gpr(t5, XRd);
+ tcg_gen_movi_tl(t4, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_andi_tl(t0, t2, 0xff);
+ tcg_gen_andi_tl(t1, t3, 0xff);
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_abs_tl(t0, t0);
+ tcg_gen_add_tl(t4, t4, t0);
+ if (i < 3) {
+ tcg_gen_shri_tl(t2, t2, 8);
+ tcg_gen_shri_tl(t3, t3, 8);
+ }
+ }
+ tcg_gen_add_tl(t5, t5, t4);
+ gen_store_mxu_gpr(t4, XRa);
+ gen_store_mxu_gpr(t5, XRd);
+}
/*
* MXU instruction category: align
@@ -1408,6 +4258,129 @@ static void gen_mxu_S32ALNI(DisasContext *ctx)
}
}
+/*
+ * S32ALN XRc, XRb, XRa, rs
+ * Arrange bytes from XRb and XRc according to one of five sets of
+ * rules determined by rs[2:0], and place the result in XRa.
+ */
+static void gen_mxu_S32ALN(DisasContext *ctx)
+{
+ uint32_t rs, XRc, XRb, XRa;
+
+ rs = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to all 0s */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_exit = gen_new_label();
+ TCGLabel *l_b_only = gen_new_label();
+ TCGLabel *l_c_only = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x07);
+
+ /* do nothing for undefined cases */
+ tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
+
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
+
+ tcg_gen_shli_tl(t2, t2, 3);
+ tcg_gen_subfi_tl(t3, 32, t2);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_shr_tl(t1, t1, t3);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_br(l_exit);
+
+ gen_set_label(l_b_only);
+ gen_store_mxu_gpr(t0, XRa);
+ tcg_gen_br(l_exit);
+
+ gen_set_label(l_c_only);
+ gen_store_mxu_gpr(t1, XRa);
+
+ gen_set_label(l_exit);
+ }
+}
+
+/*
+ * S32MADD XRa, XRd, rb, rc
+ * 32 to 64 bit signed multiply with subsequent add
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MADDU XRa, XRd, rb, rc
+ * 32 to 64 bit unsigned multiply with subsequent add
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MSUB XRa, XRd, rb, rc
+ * 32 to 64 bit signed multiply with subsequent subtract
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MSUBU XRa, XRd, rb, rc
+ * 32 to 64 bit unsigned multiply with subsequent subtract
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ */
+static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
+{
+ uint32_t XRa, XRd, Rb, Rc;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ Rb = extract32(ctx->opcode, 16, 5);
+ Rc = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(Rb == 0 || Rc == 0)) {
+ /* do nothing because x + 0 * y => x */
+ } else if (unlikely(XRa == 0 && XRd == 0)) {
+ /* do nothing because result just dropped */
+ } else {
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+
+ if (uns) {
+ tcg_gen_extu_tl_i64(t2, t0);
+ tcg_gen_extu_tl_i64(t3, t1);
+ } else {
+ tcg_gen_ext_tl_i64(t2, t0);
+ tcg_gen_ext_tl_i64(t3, t1);
+ }
+ tcg_gen_mul_i64(t2, t2, t3);
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t1, XRd);
+
+ tcg_gen_concat_tl_i64(t3, t1, t0);
+ if (sub) {
+ tcg_gen_sub_i64(t3, t3, t2);
+ } else {
+ tcg_gen_add_i64(t3, t3, t2);
+ }
+ gen_move_low32(t1, t3);
+ gen_move_high32(t0, t3);
+
+ tcg_gen_mov_tl(cpu_HI[0], t0);
+ tcg_gen_mov_tl(cpu_LO[0], t1);
+
+ gen_store_mxu_gpr(t1, XRd);
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
/*
* Decoding engine for MXU
@@ -1431,6 +4404,116 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
case OPC_MXU_Q8MIN:
gen_mxu_Q8MAX_Q8MIN(ctx);
break;
+ case OPC_MXU_Q8SLT:
+ gen_mxu_q8slt(ctx, false);
+ break;
+ case OPC_MXU_Q8SLTU:
+ gen_mxu_q8slt(ctx, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 0, 6);
+ uint32_t pad = extract32(ctx->opcode, 14, 2);
+
+ if (pad != 2) {
+ /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
+ return false;
+ }
+
+ switch (opcode) {
+ case OPC_MXU_S32MADD:
+ gen_mxu_s32madd_sub(ctx, false, false);
+ break;
+ case OPC_MXU_S32MADDU:
+ gen_mxu_s32madd_sub(ctx, false, true);
+ break;
+ case OPC_MXU_S32MSUB:
+ gen_mxu_s32madd_sub(ctx, true, false);
+ break;
+ case OPC_MXU_S32MSUBU:
+ gen_mxu_s32madd_sub(ctx, true, true);
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static void decode_opc_mxu__pool01(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_S32SLT:
+ gen_mxu_S32SLT(ctx);
+ break;
+ case OPC_MXU_D16SLT:
+ gen_mxu_D16SLT(ctx);
+ break;
+ case OPC_MXU_D16AVG:
+ gen_mxu_d16avg(ctx, false);
+ break;
+ case OPC_MXU_D16AVGR:
+ gen_mxu_d16avg(ctx, true);
+ break;
+ case OPC_MXU_Q8AVG:
+ gen_mxu_q8avg(ctx, false);
+ break;
+ case OPC_MXU_Q8AVGR:
+ gen_mxu_q8avg(ctx, true);
+ break;
+ case OPC_MXU_Q8ADD:
+ gen_mxu_Q8ADD(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool02(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_S32CPS:
+ gen_mxu_S32CPS(ctx);
+ break;
+ case OPC_MXU_D16CPS:
+ gen_mxu_D16CPS(ctx);
+ break;
+ case OPC_MXU_Q8ABD:
+ gen_mxu_Q8ABD(ctx);
+ break;
+ case OPC_MXU_Q16SAT:
+ gen_mxu_Q16SAT(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool03(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 24, 2);
+
+ switch (opcode) {
+ case OPC_MXU_D16MULF:
+ gen_mxu_d16mul(ctx, true, true);
+ break;
+ case OPC_MXU_D16MULE:
+ gen_mxu_d16mul(ctx, true, false);
+ break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
@@ -1440,12 +4523,215 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
static void decode_opc_mxu__pool04(DisasContext *ctx)
{
- uint32_t opcode = extract32(ctx->opcode, 20, 1);
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32ldxx(ctx, reversed, false);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool05(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32stxx(ctx, reversed, false);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool06(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
- case OPC_MXU_S32LDD:
- case OPC_MXU_S32LDDR:
- gen_mxu_s32ldd_s32lddr(ctx);
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool07(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32stxvx(ctx, opcode, false, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool08(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32ldxx(ctx, reversed, true);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool09(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32stxx(ctx, reversed, true);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool10(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool11(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32stxvx(ctx, opcode, true, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool12(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_D32ACC:
+ gen_mxu_d32acc(ctx);
+ break;
+ case OPC_MXU_D32ACCM:
+ gen_mxu_d32accm(ctx);
+ break;
+ case OPC_MXU_D32ASUM:
+ gen_mxu_d32asum(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool13(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q16ACC:
+ gen_mxu_q16acc(ctx);
+ break;
+ case OPC_MXU_Q16ACCM:
+ gen_mxu_q16accm(ctx);
+ break;
+ case OPC_MXU_D16ASUM:
+ gen_mxu_d16asum(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool14(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q8ADDE:
+ gen_mxu_q8adde(ctx, false);
+ break;
+ case OPC_MXU_D8SUM:
+ gen_mxu_d8sum(ctx, false);
+ break;
+ case OPC_MXU_D8SUMC:
+ gen_mxu_d8sum(ctx, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool15(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32MUL:
+ gen_mxu_s32mul(ctx, false);
+ break;
+ case OPC_MXU_S32MULU:
+ gen_mxu_s32mul(ctx, true);
+ break;
+ case OPC_MXU_S32EXTR:
+ gen_mxu_s32extr(ctx);
+ break;
+ case OPC_MXU_S32EXTRV:
+ gen_mxu_s32extrv(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
@@ -1459,9 +4745,18 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
+ case OPC_MXU_D32SARW:
+ gen_mxu_d32sarl(ctx, true);
+ break;
+ case OPC_MXU_S32ALN:
+ gen_mxu_S32ALN(ctx);
+ break;
case OPC_MXU_S32ALNI:
gen_mxu_S32ALNI(ctx);
break;
+ case OPC_MXU_S32LUI:
+ gen_mxu_s32lui(ctx);
+ break;
case OPC_MXU_S32NOR:
gen_mxu_S32NOR(ctx);
break;
@@ -1481,14 +4776,128 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
}
}
+static void decode_opc_mxu__pool17(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 6, 3);
+ uint32_t strd2 = extract32(ctx->opcode, 9, 2);
+
+ if (strd2 > 2) {
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ return;
+ }
+
+ switch (opcode) {
+ case OPC_MXU_LXW:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
+ break;
+ case OPC_MXU_LXB:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
+ break;
+ case OPC_MXU_LXH:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
+ break;
+ case OPC_MXU_LXBU:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
+ break;
+ case OPC_MXU_LXHU:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool18(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_D32SLLV:
+ gen_mxu_d32sxxv(ctx, false, false);
+ break;
+ case OPC_MXU_D32SLRV:
+ gen_mxu_d32sxxv(ctx, true, false);
+ break;
+ case OPC_MXU_D32SARV:
+ gen_mxu_d32sxxv(ctx, true, true);
+ break;
+ case OPC_MXU_Q16SLLV:
+ gen_mxu_q16sxxv(ctx, false, false);
+ break;
+ case OPC_MXU_Q16SLRV:
+ gen_mxu_q16sxxv(ctx, true, false);
+ break;
+ case OPC_MXU_Q16SARV:
+ gen_mxu_q16sxxv(ctx, true, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
static void decode_opc_mxu__pool19(DisasContext *ctx)
{
- uint32_t opcode = extract32(ctx->opcode, 22, 2);
+ uint32_t opcode = extract32(ctx->opcode, 22, 4);
switch (opcode) {
case OPC_MXU_Q8MUL:
+ gen_mxu_q8mul_mac(ctx, false, false);
+ break;
case OPC_MXU_Q8MULSU:
- gen_mxu_q8mul_q8mulsu(ctx);
+ gen_mxu_q8mul_mac(ctx, true, false);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool20(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_Q8MOVZ:
+ gen_mxu_q8movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_Q8MOVN:
+ gen_mxu_q8movzn(ctx, TCG_COND_EQ);
+ break;
+ case OPC_MXU_D16MOVZ:
+ gen_mxu_d16movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_D16MOVN:
+ gen_mxu_d16movzn(ctx, TCG_COND_EQ);
+ break;
+ case OPC_MXU_S32MOVZ:
+ gen_mxu_s32movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_S32MOVN:
+ gen_mxu_s32movzn(ctx, TCG_COND_EQ);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool21(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q8MAC:
+ gen_mxu_q8mul_mac(ctx, false, true);
+ break;
+ case OPC_MXU_Q8MACSU:
+ gen_mxu_q8mul_mac(ctx, true, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
@@ -1497,6 +4906,7 @@ static void decode_opc_mxu__pool19(DisasContext *ctx)
}
}
+
bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
{
uint32_t opcode = extract32(insn, 0, 6);
@@ -1520,30 +4930,163 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
switch (opcode) {
+ case OPC_MXU_S32MADD:
+ case OPC_MXU_S32MADDU:
+ case OPC_MXU_S32MSUB:
+ case OPC_MXU_S32MSUBU:
+ return decode_opc_mxu_s32madd_sub(ctx);
case OPC_MXU__POOL00:
decode_opc_mxu__pool00(ctx);
break;
case OPC_MXU_D16MUL:
- gen_mxu_d16mul(ctx);
+ gen_mxu_d16mul(ctx, false, false);
break;
case OPC_MXU_D16MAC:
- gen_mxu_d16mac(ctx);
+ gen_mxu_d16mac(ctx, false, false);
+ break;
+ case OPC_MXU_D16MACF:
+ gen_mxu_d16mac(ctx, true, true);
+ break;
+ case OPC_MXU_D16MADL:
+ gen_mxu_d16madl(ctx);
+ break;
+ case OPC_MXU_S16MAD:
+ gen_mxu_s16mad(ctx);
+ break;
+ case OPC_MXU_Q16ADD:
+ gen_mxu_q16add(ctx);
+ break;
+ case OPC_MXU_D16MACE:
+ gen_mxu_d16mac(ctx, true, false);
+ break;
+ case OPC_MXU__POOL01:
+ decode_opc_mxu__pool01(ctx);
+ break;
+ case OPC_MXU__POOL02:
+ decode_opc_mxu__pool02(ctx);
+ break;
+ case OPC_MXU__POOL03:
+ decode_opc_mxu__pool03(ctx);
break;
case OPC_MXU__POOL04:
decode_opc_mxu__pool04(ctx);
break;
+ case OPC_MXU__POOL05:
+ decode_opc_mxu__pool05(ctx);
+ break;
+ case OPC_MXU__POOL06:
+ decode_opc_mxu__pool06(ctx);
+ break;
+ case OPC_MXU__POOL07:
+ decode_opc_mxu__pool07(ctx);
+ break;
+ case OPC_MXU__POOL08:
+ decode_opc_mxu__pool08(ctx);
+ break;
+ case OPC_MXU__POOL09:
+ decode_opc_mxu__pool09(ctx);
+ break;
+ case OPC_MXU__POOL10:
+ decode_opc_mxu__pool10(ctx);
+ break;
+ case OPC_MXU__POOL11:
+ decode_opc_mxu__pool11(ctx);
+ break;
+ case OPC_MXU_D32ADD:
+ gen_mxu_d32add(ctx);
+ break;
+ case OPC_MXU__POOL12:
+ decode_opc_mxu__pool12(ctx);
+ break;
+ case OPC_MXU__POOL13:
+ decode_opc_mxu__pool13(ctx);
+ break;
+ case OPC_MXU__POOL14:
+ decode_opc_mxu__pool14(ctx);
+ break;
+ case OPC_MXU_Q8ACCE:
+ gen_mxu_q8adde(ctx, true);
+ break;
case OPC_MXU_S8LDD:
- gen_mxu_s8ldd(ctx);
+ gen_mxu_s8ldd(ctx, false);
+ break;
+ case OPC_MXU_S8STD:
+ gen_mxu_s8std(ctx, false);
+ break;
+ case OPC_MXU_S8LDI:
+ gen_mxu_s8ldd(ctx, true);
+ break;
+ case OPC_MXU_S8SDI:
+ gen_mxu_s8std(ctx, true);
+ break;
+ case OPC_MXU__POOL15:
+ decode_opc_mxu__pool15(ctx);
break;
case OPC_MXU__POOL16:
decode_opc_mxu__pool16(ctx);
break;
+ case OPC_MXU__POOL17:
+ decode_opc_mxu__pool17(ctx);
+ break;
+ case OPC_MXU_S16LDD:
+ gen_mxu_s16ldd(ctx, false);
+ break;
+ case OPC_MXU_S16STD:
+ gen_mxu_s16std(ctx, false);
+ break;
+ case OPC_MXU_S16LDI:
+ gen_mxu_s16ldd(ctx, true);
+ break;
+ case OPC_MXU_S16SDI:
+ gen_mxu_s16std(ctx, true);
+ break;
+ case OPC_MXU_D32SLL:
+ gen_mxu_d32sxx(ctx, false, false);
+ break;
+ case OPC_MXU_D32SLR:
+ gen_mxu_d32sxx(ctx, true, false);
+ break;
+ case OPC_MXU_D32SARL:
+ gen_mxu_d32sarl(ctx, false);
+ break;
+ case OPC_MXU_D32SAR:
+ gen_mxu_d32sxx(ctx, true, true);
+ break;
+ case OPC_MXU_Q16SLL:
+ gen_mxu_q16sxx(ctx, false, false);
+ break;
+ case OPC_MXU__POOL18:
+ decode_opc_mxu__pool18(ctx);
+ break;
+ case OPC_MXU_Q16SLR:
+ gen_mxu_q16sxx(ctx, true, false);
+ break;
+ case OPC_MXU_Q16SAR:
+ gen_mxu_q16sxx(ctx, true, true);
+ break;
case OPC_MXU__POOL19:
decode_opc_mxu__pool19(ctx);
break;
+ case OPC_MXU__POOL20:
+ decode_opc_mxu__pool20(ctx);
+ break;
+ case OPC_MXU__POOL21:
+ decode_opc_mxu__pool21(ctx);
+ break;
+ case OPC_MXU_Q16SCOP:
+ gen_mxu_q16scop(ctx);
+ break;
+ case OPC_MXU_Q8MADL:
+ gen_mxu_q8madl(ctx);
+ break;
+ case OPC_MXU_S32SFL:
+ gen_mxu_s32sfl(ctx);
+ break;
+ case OPC_MXU_Q8SAD:
+ gen_mxu_q8sad(ctx);
+ break;
default:
- MIPS_INVAL("decode_opc_mxu");
- gen_reserved_instruction(ctx);
+ return false;
}
gen_set_label(l_exit);
diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c
index ef3dafc..98935b5 100644
--- a/target/mips/tcg/op_helper.c
+++ b/target/mips/tcg/op_helper.c
@@ -257,6 +257,22 @@ void helper_pmon(CPUMIPSState *env, int function)
}
}
+#ifdef TARGET_MIPS64
+target_ulong helper_lcsr_cpucfg(CPUMIPSState *env, target_ulong rs)
+{
+ switch (rs) {
+ case 0:
+ return env->CP0_PRid;
+ case 1:
+ return env->lcsr_cpucfg1;
+ case 2:
+ return env->lcsr_cpucfg2;
+ default:
+ return 0;
+ }
+}
+#endif
+
#if !defined(CONFIG_USER_ONLY)
void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
diff --git a/target/mips/tcg/sysemu/lcsr_helper.c b/target/mips/tcg/sysemu/lcsr_helper.c
new file mode 100644
index 0000000..942143d
--- /dev/null
+++ b/target/mips/tcg/sysemu/lcsr_helper.c
@@ -0,0 +1,45 @@
+/*
+ * Loongson CSR instructions translation routines
+ *
+ * Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "internal.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+
+#define GET_MEMTXATTRS(cas) \
+ ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
+
+uint64_t helper_lcsr_rdcsr(CPUMIPSState *env, target_ulong r_addr)
+{
+ return address_space_ldl(&env->iocsr.as, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+uint64_t helper_lcsr_drdcsr(CPUMIPSState *env, target_ulong r_addr)
+{
+ return address_space_ldq(&env->iocsr.as, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_lcsr_wrcsr(CPUMIPSState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stl(&env->iocsr.as, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_lcsr_dwrcsr(CPUMIPSState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stq(&env->iocsr.as, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
diff --git a/target/mips/tcg/sysemu/meson.build b/target/mips/tcg/sysemu/meson.build
index 43b35b3..ec665a4 100644
--- a/target/mips/tcg/sysemu/meson.build
+++ b/target/mips/tcg/sysemu/meson.build
@@ -4,3 +4,7 @@ mips_system_ss.add(files(
'special_helper.c',
'tlb_helper.c',
))
+
+mips_system_ss.add(when: 'TARGET_MIPS64', if_true: files(
+ 'lcsr_helper.c',
+))
diff --git a/target/mips/tcg/sysemu_helper.h.inc b/target/mips/tcg/sysemu_helper.h.inc
index af585b5..f163af1 100644
--- a/target/mips/tcg/sysemu_helper.h.inc
+++ b/target/mips/tcg/sysemu_helper.h.inc
@@ -181,3 +181,11 @@ DEF_HELPER_1(eret, void, env)
DEF_HELPER_1(eretnc, void, env)
DEF_HELPER_1(deret, void, env)
DEF_HELPER_3(cache, void, env, tl, i32)
+
+#ifdef TARGET_MIPS64
+/* Longson CSR */
+DEF_HELPER_2(lcsr_rdcsr, i64, env, tl)
+DEF_HELPER_2(lcsr_drdcsr, i64, env, tl)
+DEF_HELPER_3(lcsr_wrcsr, void, env, tl, tl)
+DEF_HELPER_3(lcsr_dwrcsr, void, env, tl, tl)
+#endif
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 74af91e..9bb40f1 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -14644,12 +14644,9 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
}
#endif
if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) {
- if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) {
- gen_arith(ctx, OPC_MUL, rd, rs, rt);
- } else {
- decode_ase_mxu(ctx, ctx->opcode);
+ if (decode_ase_mxu(ctx, ctx->opcode)) {
+ break;
}
- break;
}
decode_opc_special2_legacy(env, ctx);
break;
@@ -15352,6 +15349,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
return;
}
#if defined(TARGET_MIPS64)
+ if (ase_lcsr_available(env) && decode_ase_lcsr(ctx, ctx->opcode)) {
+ return;
+ }
if (cpu_supports_isa(env, INSN_OCTEON) && decode_ext_octeon(ctx, ctx->opcode)) {
return;
}
diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h
index 3b0498a..db3dc93 100644
--- a/target/mips/tcg/translate.h
+++ b/target/mips/tcg/translate.h
@@ -221,6 +221,7 @@ bool decode_isa_rel6(DisasContext *ctx, uint32_t insn);
bool decode_ase_msa(DisasContext *ctx, uint32_t insn);
bool decode_ext_txx9(DisasContext *ctx, uint32_t insn);
#if defined(TARGET_MIPS64)
+bool decode_ase_lcsr(DisasContext *ctx, uint32_t insn);
bool decode_ext_tx79(DisasContext *ctx, uint32_t insn);
bool decode_ext_octeon(DisasContext *ctx, uint32_t insn);
#endif