diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-07-11 07:36:33 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-07-11 07:36:33 +0100 |
commit | 154e3b61ac9cfab9639e6d6207a96fff017040fe (patch) | |
tree | 9e2a6da765be2528cb9b7299f12247d123fd4740 /target | |
parent | adc97c4b808bb23d6bb17b8871787333af0086d2 (diff) | |
parent | 752dfff5ecf35a38145c2dfbb842224177fd1afd (diff) | |
download | qemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.zip qemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.tar.gz qemu-154e3b61ac9cfab9639e6d6207a96fff017040fe.tar.bz2 |
Merge tag 'mips-20230710' of https://github.com/philmd/qemu into staging
MIPS patches queue
- Use clock API & divider for cp0_timer to avoid rounding issue (Jiaxun)
- Implement Loongson CSR instructions (Jiaxun)
- Implement Ingenic MXU ASE v1 rev2 (Siarhei)
- Enable GINVx support for I6400 and I6500 cores (Marcin)
- Generalize PCI IDE controller models (Bernhard)
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEE+qvnXhKRciHc/Wuy4+MsLN6twN4FAmSsg5kACgkQ4+MsLN6t
# wN6O4g/9GpirNnG1tizIEksI17PaAotgui2PYzml2nQLyQNmPs3lSfyDEfFpZLC6
# HGxglNjdvCgmIhRH1IuRKuJofp0r84NY+sktXjz2+As3opyjR66gVsSefWeupr7t
# avZQQIBBOV3OYLzFkqjDpBflyKXz43MRW3r9ai4Dle/TwiE5GA1iKuQ6Rt55urtT
# 045OdtFZTsIwTyg75pSXExAehOn5FQ4aqIODwfJYqvhkkVZ9lgWYSgUOsgDcGqPQ
# eytpif6+m350Xme4BgqITMZkeIbyKcCcfU37JBqk/q6/gDDf18zSWpC7MNXea4ZR
# so9ffZqms/xcIOfIO3uc4t9AZRHchiVjFHihCUKc0mBTzLy1QhQ4ybdQu3fUywaG
# WziEFLrJ/qfWjixRxeDdBZamC2fSxYtcRNST7g+XttiMacvQC6aPFVfLDa+3Xjtt
# TmIjx8oGdLB9BMrGMuHsOygfgi98eGbWQ2I5ZhzwBbJ7uFQdeTkMCswcAsVcj8pW
# e7/ixw2e+SYFm0q9Z/QiZZ7LFDp/b3u7/ufXCUBX2r1gi7Xi+x60E6dm3Ge3XAsY
# qSx9ZOlVNJlIs/ChP0KckHDMeFuCnRmNEvKC039syHWSy6VP8NO7fwwxK+XytyrK
# aJMyPS97kVXuqriKZIGsV0KjLOz3neh0OdQTolPv1R5yb9tI6Xc=
# =rtlE
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 10 Jul 2023 11:18:01 PM BST
# gpg: using RSA key FAABE75E12917221DCFD6BB2E3E32C2CDEADC0DE
# gpg: Good signature from "Philippe Mathieu-Daudé (F4BUG) <f4bug@amsat.org>" [full]
* tag 'mips-20230710' of https://github.com/philmd/qemu: (44 commits)
hw/ide/piix: Move registration of VMStateDescription to DeviceClass
hw/ide/pci: Replace some magic numbers by constants
hw/ide: Extract bmdma_status_writeb()
hw/ide: Extract IDEBus assignment into bmdma_init()
hw/isa/vt82c686: Remove via_isa_set_irq()
hw/ide/via: Wire up IDE legacy interrupts in host device
hw/ide/pci: Expose legacy interrupts as named GPIOs
target/mips: enable GINVx support for I6400 and I6500
target/mips/mxu: Add Q8SAD instruction
target/mips/mxu: Add S32SFL instruction
target/mips/mxu: Add Q8MADL instruction
target/mips/mxu: Add Q16SCOP instruction
target/mips/mxu: Add Q8MAC Q8MACSU instructions
target/mips/mxu: Add S32/D16/Q8- MOVZ/MOVN instructions
target/mips/mxu: Add D32/Q16- SLLV/SLRV/SARV instructions
target/mips/mxu: Add Q16SLL Q16SLR Q16SAR instructions
target/mips/mxu: Add D32SLL D32SLR D32SAR instructions
target/mips/mxu: Add D32SARL D32SARW instructions
target/mips/mxu: Add S32ALN S32LUI insns
target/mips/mxu: Add S32MUL S32MULU S32EXTR S32EXTRV insns
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/mips/cpu-defs.c.inc | 59 | ||||
-rw-r--r-- | target/mips/cpu.c | 18 | ||||
-rw-r--r-- | target/mips/cpu.h | 45 | ||||
-rw-r--r-- | target/mips/helper.h | 4 | ||||
-rw-r--r-- | target/mips/internal.h | 2 | ||||
-rw-r--r-- | target/mips/sysemu/cp0_timer.c | 35 | ||||
-rw-r--r-- | target/mips/tcg/lcsr.decode | 17 | ||||
-rw-r--r-- | target/mips/tcg/lcsr_translate.c | 75 | ||||
-rw-r--r-- | target/mips/tcg/meson.build | 2 | ||||
-rw-r--r-- | target/mips/tcg/mxu_translate.c | 3753 | ||||
-rw-r--r-- | target/mips/tcg/op_helper.c | 16 | ||||
-rw-r--r-- | target/mips/tcg/sysemu/lcsr_helper.c | 45 | ||||
-rw-r--r-- | target/mips/tcg/sysemu/meson.build | 4 | ||||
-rw-r--r-- | target/mips/tcg/sysemu_helper.h.inc | 8 | ||||
-rw-r--r-- | target/mips/tcg/translate.c | 10 | ||||
-rw-r--r-- | target/mips/tcg/translate.h | 1 |
16 files changed, 3962 insertions, 132 deletions
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc index d45f245..03185d9 100644 --- a/target/mips/cpu-defs.c.inc +++ b/target/mips/cpu-defs.c.inc @@ -118,6 +118,26 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { + .name = "XBurstR1", + .CP0_PRid = 0x1ed0024f, + .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) | + (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) | + (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) | + (0 << CP0C1_CA), + .CP0_Config2 = MIPS_CONFIG2, + .CP0_Config3 = MIPS_CONFIG3, + .CP0_LLAddr_rw_bitmask = 0, + .CP0_LLAddr_shift = 4, + .SYNCI_Step = 32, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x1278FF17, + .SEGBITS = 32, + .PABITS = 32, + .insn_flags = CPU_MIPS32R1 | ASE_MXU, + .mmu_type = MMU_TYPE_R4000, + }, + { .name = "4KEmR1", .CP0_PRid = 0x00018500, .CP0_Config0 = MIPS_CONFIG0 | (MMU_TYPE_FMT << CP0C0_MT), @@ -324,6 +344,32 @@ const mips_def_t mips_defs[] = .mmu_type = MMU_TYPE_R4000, }, { + .name = "XBurstR2", + .CP0_PRid = 0x2ed1024f, + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | + (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) | + (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) | + (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) | + (1 << CP0C1_CA), + .CP0_Config2 = MIPS_CONFIG2, + .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_DSP2P) | (1 << CP0C3_DSPP) | + (1 << CP0C3_VInt), + .CP0_LLAddr_rw_bitmask = 0, + .CP0_LLAddr_shift = 4, + .SYNCI_Step = 32, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x3778FF1F, + .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) | + (1 << FCR0_D) | (1 << FCR0_S) | (0x93 << FCR0_PRID), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, + .SEGBITS = 32, + .PABITS = 32, + .insn_flags = CPU_MIPS32R2 | ASE_MXU, + .mmu_type = MMU_TYPE_R4000, + }, + { .name = "M14K", .CP0_PRid = 0x00019b00, /* Config1 implemented, fixed mapping MMU, @@ -709,7 +755,7 @@ const mips_def_t mips_defs[] = .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP), + (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, @@ -749,7 +795,7 @@ const mips_def_t mips_defs[] = .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP), + (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, @@ -895,6 +941,15 @@ const mips_def_t mips_defs[] = .CP1_fcr31 = 0, .CP1_fcr31_rw_bitmask = 0xFF83FFFF, .MSAIR = (0x01 << MSAIR_ProcID) | (0x40 << MSAIR_Rev), + .lcsr_cpucfg1 = (1 << CPUCFG1_FP) | (2 << CPUCFG1_FPREV) | + (1 << CPUCFG1_MSA1) | (1 << CPUCFG1_LSLDR0) | + (1 << CPUCFG1_LSPERF) | (1 << CPUCFG1_LSPERFX) | + (1 << CPUCFG1_LSSYNCI) | (1 << CPUCFG1_LLEXC) | + (1 << CPUCFG1_SCRAND) | (1 << CPUCFG1_MUALP) | + (1 << CPUCFG1_KMUALEN) | (1 << CPUCFG1_ITLBT) | + (1 << CPUCFG1_SFBP) | (1 << CPUCFG1_CDMAP), + .lcsr_cpucfg2 = (1 << CPUCFG2_LEXT1) | (1 << CPUCFG2_LCSRP) | + (1 << CPUCFG2_LDISBLIKELY), .SEGBITS = 48, .PABITS = 48, .insn_flags = CPU_MIPS64R2 | INSN_LOONGSON3A | diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 01e0fbe..63da194 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -244,6 +244,8 @@ static void mips_cpu_reset_hold(Object *obj) env->CP0_PageGrain_rw_bitmask = env->cpu_model->CP0_PageGrain_rw_bitmask; env->CP0_PageGrain = env->cpu_model->CP0_PageGrain; env->CP0_EBaseWG_rw_bitmask = env->cpu_model->CP0_EBaseWG_rw_bitmask; + env->lcsr_cpucfg1 = env->cpu_model->lcsr_cpucfg1; + env->lcsr_cpucfg2 = env->cpu_model->lcsr_cpucfg2; env->active_fpu.fcr0 = env->cpu_model->CP1_fcr0; env->active_fpu.fcr31_rw_bitmask = env->cpu_model->CP1_fcr31_rw_bitmask; env->active_fpu.fcr31 = env->cpu_model->CP1_fcr31; @@ -449,9 +451,9 @@ static void mips_cp0_period_set(MIPSCPU *cpu) { CPUMIPSState *env = &cpu->env; - env->cp0_count_ns = clock_ticks_to_ns(MIPS_CPU(cpu)->clock, - env->cpu_model->CCRes); - assert(env->cp0_count_ns); + clock_set_mul_div(cpu->count_div, env->cpu_model->CCRes, 1); + clock_set_source(cpu->count_div, cpu->clock); + clock_set_source(env->count_clock, cpu->count_div); } static void mips_cpu_realizefn(DeviceState *dev, Error **errp) @@ -504,7 +506,17 @@ static void mips_cpu_initfn(Object *obj) cpu_set_cpustate_pointers(cpu); cpu->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, cpu, 0); + cpu->count_div = clock_new(OBJECT(obj), "clk-div-count"); + env->count_clock = clock_new(OBJECT(obj), "clk-count"); env->cpu_model = mcc->cpu_def; +#ifndef CONFIG_USER_ONLY + if (mcc->cpu_def->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP)) { + memory_region_init_io(&env->iocsr.mr, OBJECT(cpu), NULL, + env, "iocsr", UINT64_MAX); + address_space_init(&env->iocsr.as, + &env->iocsr.mr, "IOCSR"); + } +#endif } static char *mips_cpu_type_name(const char *cpu_model) diff --git a/target/mips/cpu.h b/target/mips/cpu.h index a3bc646..f81bd06 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -3,6 +3,9 @@ #include "cpu-qom.h" #include "exec/cpu-defs.h" +#ifndef CONFIG_USER_ONLY +#include "exec/memory.h" +#endif #include "fpu/softfloat-types.h" #include "hw/clock.h" #include "mips-defs.h" @@ -1068,6 +1071,33 @@ typedef struct CPUArchState { */ int32_t CP0_DESAVE; target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM]; +/* + * Loongson CSR CPUCFG registers + */ + uint32_t lcsr_cpucfg1; +#define CPUCFG1_FP 0 +#define CPUCFG1_FPREV 1 +#define CPUCFG1_MMI 4 +#define CPUCFG1_MSA1 5 +#define CPUCFG1_MSA2 6 +#define CPUCFG1_LSLDR0 16 +#define CPUCFG1_LSPERF 17 +#define CPUCFG1_LSPERFX 18 +#define CPUCFG1_LSSYNCI 19 +#define CPUCFG1_LLEXC 20 +#define CPUCFG1_SCRAND 21 +#define CPUCFG1_MUALP 25 +#define CPUCFG1_KMUALEN 26 +#define CPUCFG1_ITLBT 27 +#define CPUCFG1_SFBP 29 +#define CPUCFG1_CDMAP 30 + uint32_t lcsr_cpucfg2; +#define CPUCFG2_LEXT1 0 +#define CPUCFG2_LEXT2 1 +#define CPUCFG2_LEXT3 2 +#define CPUCFG2_LSPW 3 +#define CPUCFG2_LCSRP 27 +#define CPUCFG2_LDISBLIKELY 28 /* We waste some space so we can handle shadow registers like TCs. */ TCState tcs[MIPS_SHADOW_SET_MAX]; @@ -1156,12 +1186,18 @@ typedef struct CPUArchState { void *irq[8]; struct MIPSITUState *itu; MemoryRegion *itc_tag; /* ITC Configuration Tags */ + + /* Loongson IOCSR memory */ + struct { + AddressSpace as; + MemoryRegion mr; + } iocsr; #endif const mips_def_t *cpu_model; QEMUTimer *timer; /* Internal timer */ + Clock *count_clock; /* CP0_Count clock */ target_ulong exception_base; /* ExceptionBase input to the core */ - uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */ } CPUMIPSState; /** @@ -1178,6 +1214,7 @@ struct ArchCPU { /*< public >*/ Clock *clock; + Clock *count_div; /* Divider for CP0_Count clock */ CPUNegativeOffsetState neg; CPUMIPSState env; }; @@ -1280,6 +1317,12 @@ static inline bool ase_msa_available(CPUMIPSState *env) return env->CP0_Config3 & (1 << CP0C3_MSAP); } +/* Check presence of Loongson CSR instructions */ +static inline bool ase_lcsr_available(CPUMIPSState *env) +{ + return env->lcsr_cpucfg2 & (1 << CPUCFG2_LCSRP); +} + /* Check presence of multi-threading ASE implementation */ static inline bool ase_mt_available(CPUMIPSState *env) { diff --git a/target/mips/helper.h b/target/mips/helper.h index de32d82..0f8462f 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -196,6 +196,10 @@ DEF_HELPER_1(rdhwr_xnp, tl, env) DEF_HELPER_2(pmon, void, env, int) DEF_HELPER_1(wait, void, env) +#ifdef TARGET_MIPS64 +DEF_HELPER_FLAGS_2(lcsr_cpucfg, TCG_CALL_NO_RWG_SE, tl, env, tl) +#endif + /* Loongson multimedia functions. */ DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(paddush, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/target/mips/internal.h b/target/mips/internal.h index 4b0031d..1d0c026 100644 --- a/target/mips/internal.h +++ b/target/mips/internal.h @@ -79,6 +79,8 @@ struct mips_def_t { int32_t CP0_PageGrain_rw_bitmask; int32_t CP0_PageGrain; target_ulong CP0_EBaseWG_rw_bitmask; + uint32_t lcsr_cpucfg1; + uint32_t lcsr_cpucfg2; uint64_t insn_flags; enum mips_mmu_types mmu_type; int32_t SAARP; diff --git a/target/mips/sysemu/cp0_timer.c b/target/mips/sysemu/cp0_timer.c index 70de95d..9d2bcb0 100644 --- a/target/mips/sysemu/cp0_timer.c +++ b/target/mips/sysemu/cp0_timer.c @@ -28,15 +28,26 @@ #include "internal.h" /* MIPS R4K timer */ +static uint32_t cpu_mips_get_count_val(CPUMIPSState *env) +{ + int64_t now_ns; + now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + return env->CP0_Count + + (uint32_t)clock_ns_to_ticks(env->count_clock, now_ns); +} + static void cpu_mips_timer_update(CPUMIPSState *env) { uint64_t now_ns, next_ns; uint32_t wait; now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - wait = env->CP0_Compare - env->CP0_Count - - (uint32_t)(now_ns / env->cp0_count_ns); - next_ns = now_ns + (uint64_t)wait * env->cp0_count_ns; + wait = env->CP0_Compare - cpu_mips_get_count_val(env); + /* Clamp interval to overflow if virtual time had not progressed */ + if (!wait) { + wait = UINT32_MAX; + } + next_ns = now_ns + clock_ticks_to_ns(env->count_clock, wait); timer_mod(env->timer, next_ns); } @@ -64,7 +75,7 @@ uint32_t cpu_mips_get_count(CPUMIPSState *env) cpu_mips_timer_expire(env); } - return env->CP0_Count + (uint32_t)(now_ns / env->cp0_count_ns); + return cpu_mips_get_count_val(env); } } @@ -79,9 +90,8 @@ void cpu_mips_store_count(CPUMIPSState *env, uint32_t count) env->CP0_Count = count; } else { /* Store new count register */ - env->CP0_Count = count - - (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / - env->cp0_count_ns); + env->CP0_Count = count - (uint32_t)clock_ns_to_ticks(env->count_clock, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); /* Update timer timer */ cpu_mips_timer_update(env); } @@ -107,8 +117,8 @@ void cpu_mips_start_count(CPUMIPSState *env) void cpu_mips_stop_count(CPUMIPSState *env) { /* Store the current value */ - env->CP0_Count += (uint32_t)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / - env->cp0_count_ns); + env->CP0_Count += (uint32_t)clock_ns_to_ticks(env->count_clock, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); } static void mips_timer_cb(void *opaque) @@ -121,14 +131,7 @@ static void mips_timer_cb(void *opaque) return; } - /* - * ??? This callback should occur when the counter is exactly equal to - * the comparator value. Offset the count by one to avoid immediately - * retriggering the callback before any virtual time has passed. - */ - env->CP0_Count++; cpu_mips_timer_expire(env); - env->CP0_Count--; } void cpu_mips_clock_init(MIPSCPU *cpu) diff --git a/target/mips/tcg/lcsr.decode b/target/mips/tcg/lcsr.decode new file mode 100644 index 0000000..960ef8b --- /dev/null +++ b/target/mips/tcg/lcsr.decode @@ -0,0 +1,17 @@ +# Loongson CSR instructions +# +# Copyright (C) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com> +# +# SPDX-License-Identifier: LGPL-2.1-or-later +# + +&r rs rt rd sa + +@rs_rd ...... rs:5 ..... rd:5 ..... ...... &r rt=0 sa=0 + +CPUCFG 110010 ..... 01000 ..... 00100 011000 @rs_rd + +RDCSR 110010 ..... 00000 ..... 00100 011000 @rs_rd +WRCSR 110010 ..... 00001 ..... 00100 011000 @rs_rd +DRDCSR 110010 ..... 00010 ..... 00100 011000 @rs_rd +DWRCSR 110010 ..... 00011 ..... 00100 011000 @rs_rd diff --git a/target/mips/tcg/lcsr_translate.c b/target/mips/tcg/lcsr_translate.c new file mode 100644 index 0000000..9f2a5f4 --- /dev/null +++ b/target/mips/tcg/lcsr_translate.c @@ -0,0 +1,75 @@ +/* + * Loongson CSR instructions translation routines + * + * Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" +#include "exec/helper-gen.h" +#include "translate.h" + +/* Include the auto-generated decoder. */ +#include "decode-lcsr.c.inc" + +static bool trans_CPUCFG(DisasContext *ctx, arg_CPUCFG *a) +{ + TCGv dest = tcg_temp_new(); + TCGv src1 = tcg_temp_new(); + + gen_load_gpr(src1, a->rs); + gen_helper_lcsr_cpucfg(dest, cpu_env, src1); + gen_store_gpr(dest, a->rd); + + return true; +} + +#ifndef CONFIG_USER_ONLY +static bool gen_rdcsr(DisasContext *ctx, arg_r *a, + void (*func)(TCGv, TCGv_ptr, TCGv)) +{ + TCGv dest = tcg_temp_new(); + TCGv src1 = tcg_temp_new(); + + check_cp0_enabled(ctx); + gen_load_gpr(src1, a->rs); + func(dest, cpu_env, src1); + gen_store_gpr(dest, a->rd); + + return true; +} + +static bool gen_wrcsr(DisasContext *ctx, arg_r *a, + void (*func)(TCGv_ptr, TCGv, TCGv)) +{ + TCGv val = tcg_temp_new(); + TCGv addr = tcg_temp_new(); + + check_cp0_enabled(ctx); + gen_load_gpr(addr, a->rs); + gen_load_gpr(val, a->rd); + func(cpu_env, addr, val); + + return true; +} + +TRANS(RDCSR, gen_rdcsr, gen_helper_lcsr_rdcsr) +TRANS(DRDCSR, gen_rdcsr, gen_helper_lcsr_drdcsr) +TRANS(WRCSR, gen_wrcsr, gen_helper_lcsr_wrcsr) +TRANS(DWRCSR, gen_wrcsr, gen_helper_lcsr_dwrcsr) +#else +#define GEN_FALSE_TRANS(name) \ +static bool trans_##name(DisasContext *ctx, arg_##name * a) \ +{ \ + return false; \ +} + +GEN_FALSE_TRANS(RDCSR) +GEN_FALSE_TRANS(DRDCSR) +GEN_FALSE_TRANS(WRCSR) +GEN_FALSE_TRANS(DWRCSR) +#endif diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build index 7ee969e..ea7fb58 100644 --- a/target/mips/tcg/meson.build +++ b/target/mips/tcg/meson.build @@ -4,6 +4,7 @@ gen = [ decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'), decodetree.process('vr54xx.decode', extra_args: '--decode=decode_ext_vr54xx'), decodetree.process('octeon.decode', extra_args: '--decode=decode_ext_octeon'), + decodetree.process('lcsr.decode', extra_args: '--decode=decode_ase_lcsr'), ] mips_ss.add(gen) @@ -26,6 +27,7 @@ mips_ss.add(files( mips_ss.add(when: 'TARGET_MIPS64', if_true: files( 'tx79_translate.c', 'octeon_translate.c', + 'lcsr_translate.c', ), if_false: files( 'mxu_translate.c', )) diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index 39348b3..deb8060 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -237,11 +237,11 @@ * ├─ 001100 ─ OPC_MXU_D16MADL * ├─ 001101 ─ OPC_MXU_S16MAD * ├─ 001110 ─ OPC_MXU_Q16ADD - * ├─ 001111 ─ OPC_MXU_D16MACE 23 + * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care) * │ ┌─ 0 ─ OPC_MXU_S32LDD * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR * │ - * │ 23 + * │ 20 (13..10 don't care) * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD * │ └─ 1 ─ OPC_MXU_S32STDR * │ @@ -253,11 +253,11 @@ * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV * │ └─ 0001 ─ OPC_MXU_S32STDVR * │ - * │ 23 + * │ 20 (13..10 don't care) * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI * │ └─ 1 ─ OPC_MXU_S32LDIR * │ - * │ 23 + * │ 20 (13..10 don't care) * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI * │ └─ 1 ─ OPC_MXU_S32SDIR * │ @@ -268,7 +268,7 @@ * │ 13..10 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV * │ └─ 0001 ─ OPC_MXU_S32SDIVR - * ├─ 011000 ─ OPC_MXU_D32ADD + * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too) * │ 23..22 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM @@ -277,7 +277,7 @@ * │ 23..22 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC * │ ├─ 01 ─ OPC_MXU_Q16ACCM - * │ └─ 10 ─ OPC_MXU_Q16ASUM + * │ └─ 10 ─ OPC_MXU_D16ASUM * │ * │ 23..22 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE @@ -290,9 +290,9 @@ * ├─ 100010 ─ OPC_MXU_S8LDD * ├─ 100011 ─ OPC_MXU_S8STD 15..14 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL - * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 00 ─ OPC_MXU_S32MULU - * │ ├─ 00 ─ OPC_MXU_S32EXTR - * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV + * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU + * │ ├─ 10 ─ OPC_MXU_S32EXTR + * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV * │ * │ 20..18 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW @@ -304,7 +304,7 @@ * │ ├─ 110 ─ OPC_MXU_S32OR * │ └─ 111 ─ OPC_MXU_S32XOR * │ - * │ 7..5 + * │ 8..6 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB * │ ├─ 001 ─ OPC_MXU_LXH * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW @@ -318,15 +318,15 @@ * ├─ 110001 ─ OPC_MXU_D32SLR 20..18 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV - * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 010 ─ OPC_MXU_D32SARV - * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 011 ─ OPC_MXU_Q16SLLV - * │ ├─ 100 ─ OPC_MXU_Q16SLRV - * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV + * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV + * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV + * │ ├─ 101 ─ OPC_MXU_Q16SLRV + * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV * │ * ├─ 110111 ─ OPC_MXU_Q16SAR * │ 23..22 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL - * │ └─ 01 ─ OPC_MXU_Q8MULSU + * │ └─ 10 ─ OPC_MXU_Q8MULSU * │ * │ 20..18 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ @@ -353,15 +353,62 @@ */ enum { + OPC_MXU_S32MADD = 0x00, + OPC_MXU_S32MADDU = 0x01, OPC_MXU__POOL00 = 0x03, + OPC_MXU_S32MSUB = 0x04, + OPC_MXU_S32MSUBU = 0x05, + OPC_MXU__POOL01 = 0x06, + OPC_MXU__POOL02 = 0x07, OPC_MXU_D16MUL = 0x08, + OPC_MXU__POOL03 = 0x09, OPC_MXU_D16MAC = 0x0A, + OPC_MXU_D16MACF = 0x0B, + OPC_MXU_D16MADL = 0x0C, + OPC_MXU_S16MAD = 0x0D, + OPC_MXU_Q16ADD = 0x0E, + OPC_MXU_D16MACE = 0x0F, OPC_MXU__POOL04 = 0x10, + OPC_MXU__POOL05 = 0x11, + OPC_MXU__POOL06 = 0x12, + OPC_MXU__POOL07 = 0x13, + OPC_MXU__POOL08 = 0x14, + OPC_MXU__POOL09 = 0x15, + OPC_MXU__POOL10 = 0x16, + OPC_MXU__POOL11 = 0x17, + OPC_MXU_D32ADD = 0x18, + OPC_MXU__POOL12 = 0x19, + OPC_MXU__POOL13 = 0x1B, + OPC_MXU__POOL14 = 0x1C, + OPC_MXU_Q8ACCE = 0x1D, OPC_MXU_S8LDD = 0x22, + OPC_MXU_S8STD = 0x23, + OPC_MXU_S8LDI = 0x24, + OPC_MXU_S8SDI = 0x25, + OPC_MXU__POOL15 = 0x26, OPC_MXU__POOL16 = 0x27, + OPC_MXU__POOL17 = 0x28, + OPC_MXU_S16LDD = 0x2A, + OPC_MXU_S16STD = 0x2B, + OPC_MXU_S16LDI = 0x2C, + OPC_MXU_S16SDI = 0x2D, OPC_MXU_S32M2I = 0x2E, OPC_MXU_S32I2M = 0x2F, + OPC_MXU_D32SLL = 0x30, + OPC_MXU_D32SLR = 0x31, + OPC_MXU_D32SARL = 0x32, + OPC_MXU_D32SAR = 0x33, + OPC_MXU_Q16SLL = 0x34, + OPC_MXU_Q16SLR = 0x35, + OPC_MXU__POOL18 = 0x36, + OPC_MXU_Q16SAR = 0x37, OPC_MXU__POOL19 = 0x38, + OPC_MXU__POOL20 = 0x39, + OPC_MXU__POOL21 = 0x3A, + OPC_MXU_Q16SCOP = 0x3B, + OPC_MXU_Q8MADL = 0x3C, + OPC_MXU_S32SFL = 0x3D, + OPC_MXU_Q8SAD = 0x3E, }; @@ -375,21 +422,94 @@ enum { OPC_MXU_D16MIN = 0x03, OPC_MXU_Q8MAX = 0x04, OPC_MXU_Q8MIN = 0x05, + OPC_MXU_Q8SLT = 0x06, + OPC_MXU_Q8SLTU = 0x07, }; /* - * MXU pool 04 + * MXU pool 01 */ enum { - OPC_MXU_S32LDD = 0x00, - OPC_MXU_S32LDDR = 0x01, + OPC_MXU_S32SLT = 0x00, + OPC_MXU_D16SLT = 0x01, + OPC_MXU_D16AVG = 0x02, + OPC_MXU_D16AVGR = 0x03, + OPC_MXU_Q8AVG = 0x04, + OPC_MXU_Q8AVGR = 0x05, + OPC_MXU_Q8ADD = 0x07, +}; + +/* + * MXU pool 02 + */ +enum { + OPC_MXU_S32CPS = 0x00, + OPC_MXU_D16CPS = 0x02, + OPC_MXU_Q8ABD = 0x04, + OPC_MXU_Q16SAT = 0x06, +}; + +/* + * MXU pool 03 + */ +enum { + OPC_MXU_D16MULF = 0x00, + OPC_MXU_D16MULE = 0x01, +}; + +/* + * MXU pool 04 05 06 07 08 09 10 11 + */ +enum { + OPC_MXU_S32LDST = 0x00, + OPC_MXU_S32LDSTR = 0x01, +}; + +/* + * MXU pool 12 + */ +enum { + OPC_MXU_D32ACC = 0x00, + OPC_MXU_D32ACCM = 0x01, + OPC_MXU_D32ASUM = 0x02, +}; + +/* + * MXU pool 13 + */ +enum { + OPC_MXU_Q16ACC = 0x00, + OPC_MXU_Q16ACCM = 0x01, + OPC_MXU_D16ASUM = 0x02, +}; + +/* + * MXU pool 14 + */ +enum { + OPC_MXU_Q8ADDE = 0x00, + OPC_MXU_D8SUM = 0x01, + OPC_MXU_D8SUMC = 0x02, +}; + +/* + * MXU pool 15 + */ +enum { + OPC_MXU_S32MUL = 0x00, + OPC_MXU_S32MULU = 0x01, + OPC_MXU_S32EXTR = 0x02, + OPC_MXU_S32EXTRV = 0x03, }; /* * MXU pool 16 */ enum { + OPC_MXU_D32SARW = 0x00, + OPC_MXU_S32ALN = 0x01, OPC_MXU_S32ALNI = 0x02, + OPC_MXU_S32LUI = 0x03, OPC_MXU_S32NOR = 0x04, OPC_MXU_S32AND = 0x05, OPC_MXU_S32OR = 0x06, @@ -397,13 +517,57 @@ enum { }; /* + * MXU pool 17 + */ +enum { + OPC_MXU_LXB = 0x00, + OPC_MXU_LXH = 0x01, + OPC_MXU_LXW = 0x03, + OPC_MXU_LXBU = 0x04, + OPC_MXU_LXHU = 0x05, +}; + +/* + * MXU pool 18 + */ +enum { + OPC_MXU_D32SLLV = 0x00, + OPC_MXU_D32SLRV = 0x01, + OPC_MXU_D32SARV = 0x03, + OPC_MXU_Q16SLLV = 0x04, + OPC_MXU_Q16SLRV = 0x05, + OPC_MXU_Q16SARV = 0x07, +}; + +/* * MXU pool 19 */ enum { OPC_MXU_Q8MUL = 0x00, - OPC_MXU_Q8MULSU = 0x01, + OPC_MXU_Q8MULSU = 0x02, }; +/* + * MXU pool 20 + */ +enum { + OPC_MXU_Q8MOVZ = 0x00, + OPC_MXU_Q8MOVN = 0x01, + OPC_MXU_D16MOVZ = 0x02, + OPC_MXU_D16MOVN = 0x03, + OPC_MXU_S32MOVZ = 0x04, + OPC_MXU_S32MOVN = 0x05, +}; + +/* + * MXU pool 21 + */ +enum { + OPC_MXU_Q8MAC = 0x00, + OPC_MXU_Q8MACSU = 0x02, +}; + + /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ #define MXU_APTN1_A 0 #define MXU_APTN1_S 1 @@ -537,8 +701,11 @@ static void gen_mxu_s32m2i(DisasContext *ctx) /* * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF + * + * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF, + * post modify address register */ -static void gen_mxu_s8ldd(DisasContext *ctx) +static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify) { TCGv t0, t1; uint32_t XRa, Rb, s8, optn3; @@ -553,6 +720,9 @@ static void gen_mxu_s8ldd(DisasContext *ctx) gen_load_gpr(t0, Rb); tcg_gen_addi_tl(t0, t0, (int8_t)s8); + if (postmodify) { + gen_store_gpr(t0, Rb); + } switch (optn3) { /* XRa[7:0] = tmp8 */ @@ -610,9 +780,208 @@ static void gen_mxu_s8ldd(DisasContext *ctx) } /* - * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication + * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory + * + * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory, + * post modify address register */ -static void gen_mxu_d16mul(DisasContext *ctx) +static void gen_mxu_s8std(DisasContext *ctx, bool postmodify) +{ + TCGv t0, t1; + uint32_t XRa, Rb, s8, optn3; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s8 = extract32(ctx->opcode, 10, 8); + optn3 = extract32(ctx->opcode, 18, 3); + Rb = extract32(ctx->opcode, 21, 5); + + if (optn3 > 3) { + /* reserved, do nothing */ + return; + } + + gen_load_gpr(t0, Rb); + tcg_gen_addi_tl(t0, t0, (int8_t)s8); + if (postmodify) { + gen_store_gpr(t0, Rb); + } + gen_load_mxu_gpr(t1, XRa); + + switch (optn3) { + /* XRa[7:0] => tmp8 */ + case MXU_OPTN3_PTN0: + tcg_gen_extract_tl(t1, t1, 0, 8); + break; + /* XRa[15:8] => tmp8 */ + case MXU_OPTN3_PTN1: + tcg_gen_extract_tl(t1, t1, 8, 8); + break; + /* XRa[23:16] => tmp8 */ + case MXU_OPTN3_PTN2: + tcg_gen_extract_tl(t1, t1, 16, 8); + break; + /* XRa[31:24] => tmp8 */ + case MXU_OPTN3_PTN3: + tcg_gen_extract_tl(t1, t1, 24, 8); + break; + } + + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB); +} + +/* + * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF + * + * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF, + * post modify address register + */ +static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify) +{ + TCGv t0, t1; + uint32_t XRa, Rb, optn2; + int32_t s10; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s10 = sextract32(ctx->opcode, 10, 9) * 2; + optn2 = extract32(ctx->opcode, 19, 2); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + tcg_gen_addi_tl(t0, t0, s10); + if (postmodify) { + gen_store_gpr(t0, Rb); + } + + switch (optn2) { + /* XRa[15:0] = tmp16 */ + case MXU_OPTN2_PTN0: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 0, 16); + break; + /* XRa[31:16] = tmp16 */ + case MXU_OPTN2_PTN1: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); + gen_load_mxu_gpr(t0, XRa); + tcg_gen_deposit_tl(t0, t0, t1, 16, 16); + break; + /* XRa = sign_extend(tmp16) */ + case MXU_OPTN2_PTN2: + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW); + break; + /* XRa = {tmp16, tmp16} */ + case MXU_OPTN2_PTN3: + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); + tcg_gen_deposit_tl(t0, t1, t1, 0, 16); + tcg_gen_deposit_tl(t0, t1, t1, 16, 16); + break; + } + + gen_store_mxu_gpr(t0, XRa); +} + +/* + * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory + * + * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory, + * post modify address register + */ +static void gen_mxu_s16std(DisasContext *ctx, bool postmodify) +{ + TCGv t0, t1; + uint32_t XRa, Rb, optn2; + int32_t s10; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s10 = sextract32(ctx->opcode, 10, 9) * 2; + optn2 = extract32(ctx->opcode, 19, 2); + Rb = extract32(ctx->opcode, 21, 5); + + if (optn2 > 1) { + /* reserved, do nothing */ + return; + } + + gen_load_gpr(t0, Rb); + tcg_gen_addi_tl(t0, t0, s10); + if (postmodify) { + gen_store_gpr(t0, Rb); + } + gen_load_mxu_gpr(t1, XRa); + + switch (optn2) { + /* XRa[15:0] => tmp16 */ + case MXU_OPTN2_PTN0: + tcg_gen_extract_tl(t1, t1, 0, 16); + break; + /* XRa[31:16] => tmp16 */ + case MXU_OPTN2_PTN1: + tcg_gen_extract_tl(t1, t1, 16, 16); + break; + } + + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW); +} + +/* + * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication + * of GPR's and stores result into pair of MXU registers. + * It strains HI and LO registers. + * + * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication + * of GPR's and stores result into pair of MXU registers. + * It strains HI and LO registers. + */ +static void gen_mxu_s32mul(DisasContext *ctx, bool mulu) +{ + TCGv t0, t1; + uint32_t XRa, XRd, rs, rt; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRd = extract32(ctx->opcode, 10, 4); + rs = extract32(ctx->opcode, 16, 5); + rt = extract32(ctx->opcode, 21, 5); + + if (unlikely(rs == 0 || rt == 0)) { + tcg_gen_movi_tl(t0, 0); + tcg_gen_movi_tl(t1, 0); + } else { + gen_load_gpr(t0, rs); + gen_load_gpr(t1, rt); + + if (mulu) { + tcg_gen_mulu2_tl(t0, t1, t0, t1); + } else { + tcg_gen_muls2_tl(t0, t1, t0, t1); + } + } + tcg_gen_mov_tl(cpu_HI[0], t1); + tcg_gen_mov_tl(cpu_LO[0], t0); + gen_store_mxu_gpr(t1, XRa); + gen_store_mxu_gpr(t0, XRd); +} + +/* + * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication + * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication + * with rounding and packing result + * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern + * multiplication with rounding + */ +static void gen_mxu_d16mul(DisasContext *ctx, bool fractional, + bool packed_result) { TCGv t0, t1, t2, t3; uint32_t XRa, XRb, XRc, XRd, optn2; @@ -628,6 +997,12 @@ static void gen_mxu_d16mul(DisasContext *ctx) XRd = extract32(ctx->opcode, 18, 4); optn2 = extract32(ctx->opcode, 22, 2); + /* + * TODO: XRd field isn't used for D16MULF + * There's no knowledge how this field affect + * instruction decoding/behavior + */ + gen_load_mxu_gpr(t1, XRb); tcg_gen_sextract_tl(t0, t1, 0, 16); tcg_gen_sextract_tl(t1, t1, 16, 16); @@ -653,15 +1028,64 @@ static void gen_mxu_d16mul(DisasContext *ctx) tcg_gen_mul_tl(t2, t1, t2); break; } - gen_store_mxu_gpr(t3, XRa); - gen_store_mxu_gpr(t2, XRd); + if (fractional) { + TCGLabel *l_done = gen_new_label(); + TCGv rounding = tcg_temp_new(); + + tcg_gen_shli_tl(t3, t3, 1); + tcg_gen_shli_tl(t2, t2, 1); + tcg_gen_andi_tl(rounding, mxu_CR, 0x2); + tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); + if (packed_result) { + TCGLabel *l_apply_bias_l = gen_new_label(); + TCGLabel *l_apply_bias_r = gen_new_label(); + TCGLabel *l_half_done = gen_new_label(); + TCGv bias = tcg_temp_new(); + + /* + * D16MULF supports unbiased rounding aka "bankers rounding", + * "round to even", "convergent rounding" + */ + tcg_gen_andi_tl(bias, mxu_CR, 0x4); + tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); + tcg_gen_andi_tl(t0, t3, 0x1ffff); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); + gen_set_label(l_apply_bias_l); + tcg_gen_addi_tl(t3, t3, 0x8000); + gen_set_label(l_half_done); + tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); + tcg_gen_andi_tl(t0, t2, 0x1ffff); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); + gen_set_label(l_apply_bias_r); + tcg_gen_addi_tl(t2, t2, 0x8000); + } else { + /* D16MULE doesn't support unbiased rounding */ + tcg_gen_addi_tl(t3, t3, 0x8000); + tcg_gen_addi_tl(t2, t2, 0x8000); + } + gen_set_label(l_done); + } + if (!packed_result) { + gen_store_mxu_gpr(t3, XRa); + gen_store_mxu_gpr(t2, XRd); + } else { + tcg_gen_andi_tl(t3, t3, 0xffff0000); + tcg_gen_shri_tl(t2, t2, 16); + tcg_gen_or_tl(t3, t3, t2); + gen_store_mxu_gpr(t3, XRa); + } } /* - * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply - * and accumulate + * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 + * Signed 16 bit pattern multiply and accumulate + * D16MACF XRa, XRb, XRc, aptn2, optn2 + * Signed Q15 fraction pattern multiply accumulate and pack + * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 + * Signed Q15 fraction pattern multiply and accumulate */ -static void gen_mxu_d16mac(DisasContext *ctx) +static void gen_mxu_d16mac(DisasContext *ctx, bool fractional, + bool packed_result) { TCGv t0, t1, t2, t3; uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; @@ -704,6 +1128,11 @@ static void gen_mxu_d16mac(DisasContext *ctx) tcg_gen_mul_tl(t2, t1, t2); break; } + + if (fractional) { + tcg_gen_shli_tl(t3, t3, 1); + tcg_gen_shli_tl(t2, t2, 1); + } gen_load_mxu_gpr(t0, XRa); gen_load_mxu_gpr(t1, XRd); @@ -725,18 +1154,205 @@ static void gen_mxu_d16mac(DisasContext *ctx) tcg_gen_sub_tl(t2, t1, t2); break; } - gen_store_mxu_gpr(t3, XRa); - gen_store_mxu_gpr(t2, XRd); + + if (fractional) { + TCGLabel *l_done = gen_new_label(); + TCGv rounding = tcg_temp_new(); + + tcg_gen_andi_tl(rounding, mxu_CR, 0x2); + tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); + if (packed_result) { + TCGLabel *l_apply_bias_l = gen_new_label(); + TCGLabel *l_apply_bias_r = gen_new_label(); + TCGLabel *l_half_done = gen_new_label(); + TCGv bias = tcg_temp_new(); + + /* + * D16MACF supports unbiased rounding aka "bankers rounding", + * "round to even", "convergent rounding" + */ + tcg_gen_andi_tl(bias, mxu_CR, 0x4); + tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); + tcg_gen_andi_tl(t0, t3, 0x1ffff); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); + gen_set_label(l_apply_bias_l); + tcg_gen_addi_tl(t3, t3, 0x8000); + gen_set_label(l_half_done); + tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); + tcg_gen_andi_tl(t0, t2, 0x1ffff); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); + gen_set_label(l_apply_bias_r); + tcg_gen_addi_tl(t2, t2, 0x8000); + } else { + /* D16MACE doesn't support unbiased rounding */ + tcg_gen_addi_tl(t3, t3, 0x8000); + tcg_gen_addi_tl(t2, t2, 0x8000); + } + gen_set_label(l_done); + } + + if (!packed_result) { + gen_store_mxu_gpr(t3, XRa); + gen_store_mxu_gpr(t2, XRd); + } else { + tcg_gen_andi_tl(t3, t3, 0xffff0000); + tcg_gen_shri_tl(t2, t2, 16); + tcg_gen_or_tl(t3, t3, t2); + gen_store_mxu_gpr(t3, XRa); + } } /* - * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply - * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply + * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed + * unsigned 16 bit pattern multiply and add/subtract. */ -static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx) +static void gen_mxu_d16madl(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3; + uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + optn2 = extract32(ctx->opcode, 22, 2); + aptn2 = extract32(ctx->opcode, 24, 2); + + gen_load_mxu_gpr(t1, XRb); + tcg_gen_sextract_tl(t0, t1, 0, 16); + tcg_gen_sextract_tl(t1, t1, 16, 16); + + gen_load_mxu_gpr(t3, XRc); + tcg_gen_sextract_tl(t2, t3, 0, 16); + tcg_gen_sextract_tl(t3, t3, 16, 16); + + switch (optn2) { + case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t0, t2); + break; + case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t1, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ + tcg_gen_mul_tl(t3, t0, t3); + tcg_gen_mul_tl(t2, t1, t2); + break; + } + tcg_gen_extract_tl(t2, t2, 0, 16); + tcg_gen_extract_tl(t3, t3, 0, 16); + + gen_load_mxu_gpr(t1, XRa); + tcg_gen_extract_tl(t0, t1, 0, 16); + tcg_gen_extract_tl(t1, t1, 16, 16); + + switch (aptn2) { + case MXU_APTN2_AA: + tcg_gen_add_tl(t3, t1, t3); + tcg_gen_add_tl(t2, t0, t2); + break; + case MXU_APTN2_AS: + tcg_gen_add_tl(t3, t1, t3); + tcg_gen_sub_tl(t2, t0, t2); + break; + case MXU_APTN2_SA: + tcg_gen_sub_tl(t3, t1, t3); + tcg_gen_add_tl(t2, t0, t2); + break; + case MXU_APTN2_SS: + tcg_gen_sub_tl(t3, t1, t3); + tcg_gen_sub_tl(t2, t0, t2); + break; + } + + tcg_gen_andi_tl(t2, t2, 0xffff); + tcg_gen_shli_tl(t3, t3, 16); + tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2); +} + +/* + * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed + * signed 16 bit pattern multiply and 32-bit add/subtract. + */ +static void gen_mxu_s16mad(DisasContext *ctx) +{ + TCGv t0, t1; + uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + optn2 = extract32(ctx->opcode, 22, 2); + aptn1 = extract32(ctx->opcode, 24, 1); + pad = extract32(ctx->opcode, 25, 1); + + if (pad) { + /* FIXME check if it influence the result */ + } + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + + switch (optn2) { + case MXU_OPTN2_WW: /* XRB.H*XRC.H */ + tcg_gen_sextract_tl(t0, t0, 16, 16); + tcg_gen_sextract_tl(t1, t1, 16, 16); + break; + case MXU_OPTN2_LW: /* XRB.L*XRC.L */ + tcg_gen_sextract_tl(t0, t0, 0, 16); + tcg_gen_sextract_tl(t1, t1, 0, 16); + break; + case MXU_OPTN2_HW: /* XRB.H*XRC.L */ + tcg_gen_sextract_tl(t0, t0, 16, 16); + tcg_gen_sextract_tl(t1, t1, 0, 16); + break; + case MXU_OPTN2_XW: /* XRB.L*XRC.H */ + tcg_gen_sextract_tl(t0, t0, 0, 16); + tcg_gen_sextract_tl(t1, t1, 16, 16); + break; + } + tcg_gen_mul_tl(t0, t0, t1); + + gen_load_mxu_gpr(t1, XRa); + + switch (aptn1) { + case MXU_APTN1_A: + tcg_gen_add_tl(t1, t1, t0); + break; + case MXU_APTN1_S: + tcg_gen_sub_tl(t1, t1, t0); + break; + } + + gen_store_mxu_gpr(t1, XRd); +} + +/* + * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply + * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply + * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply + * and accumulate + * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply + * and accumulate + */ +static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac) { TCGv t0, t1, t2, t3, t4, t5, t6, t7; - uint32_t XRa, XRb, XRc, XRd, sel; + uint32_t XRa, XRb, XRc, XRd, aptn2; t0 = tcg_temp_new(); t1 = tcg_temp_new(); @@ -751,90 +1367,311 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx) XRb = extract32(ctx->opcode, 10, 4); XRc = extract32(ctx->opcode, 14, 4); XRd = extract32(ctx->opcode, 18, 4); - sel = extract32(ctx->opcode, 22, 2); + aptn2 = extract32(ctx->opcode, 24, 2); gen_load_mxu_gpr(t3, XRb); gen_load_mxu_gpr(t7, XRc); - if (sel == 0x2) { - /* Q8MULSU */ - tcg_gen_ext8s_tl(t0, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t1, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t2, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t3, t3); + if (su) { + /* Q8MULSU / Q8MACSU */ + tcg_gen_sextract_tl(t0, t3, 0, 8); + tcg_gen_sextract_tl(t1, t3, 8, 8); + tcg_gen_sextract_tl(t2, t3, 16, 8); + tcg_gen_sextract_tl(t3, t3, 24, 8); } else { - /* Q8MUL */ - tcg_gen_ext8u_tl(t0, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t1, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t2, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t3, t3); - } - - tcg_gen_ext8u_tl(t4, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t5, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t6, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t7, t7); + /* Q8MUL / Q8MAC */ + tcg_gen_extract_tl(t0, t3, 0, 8); + tcg_gen_extract_tl(t1, t3, 8, 8); + tcg_gen_extract_tl(t2, t3, 16, 8); + tcg_gen_extract_tl(t3, t3, 24, 8); + } + + tcg_gen_extract_tl(t4, t7, 0, 8); + tcg_gen_extract_tl(t5, t7, 8, 8); + tcg_gen_extract_tl(t6, t7, 16, 8); + tcg_gen_extract_tl(t7, t7, 24, 8); tcg_gen_mul_tl(t0, t0, t4); tcg_gen_mul_tl(t1, t1, t5); tcg_gen_mul_tl(t2, t2, t6); tcg_gen_mul_tl(t3, t3, t7); - tcg_gen_andi_tl(t0, t0, 0xFFFF); - tcg_gen_andi_tl(t1, t1, 0xFFFF); - tcg_gen_andi_tl(t2, t2, 0xFFFF); - tcg_gen_andi_tl(t3, t3, 0xFFFF); - - tcg_gen_shli_tl(t1, t1, 16); - tcg_gen_shli_tl(t3, t3, 16); + if (mac) { + gen_load_mxu_gpr(t4, XRd); + gen_load_mxu_gpr(t5, XRa); + tcg_gen_extract_tl(t6, t4, 0, 16); + tcg_gen_extract_tl(t7, t4, 16, 16); + if (aptn2 & 1) { + tcg_gen_sub_tl(t0, t6, t0); + tcg_gen_sub_tl(t1, t7, t1); + } else { + tcg_gen_add_tl(t0, t6, t0); + tcg_gen_add_tl(t1, t7, t1); + } + tcg_gen_extract_tl(t6, t5, 0, 16); + tcg_gen_extract_tl(t7, t5, 16, 16); + if (aptn2 & 2) { + tcg_gen_sub_tl(t2, t6, t2); + tcg_gen_sub_tl(t3, t7, t3); + } else { + tcg_gen_add_tl(t2, t6, t2); + tcg_gen_add_tl(t3, t7, t3); + } + } - tcg_gen_or_tl(t0, t0, t1); - tcg_gen_or_tl(t1, t2, t3); + tcg_gen_deposit_tl(t0, t0, t1, 16, 16); + tcg_gen_deposit_tl(t1, t2, t3, 16, 16); gen_store_mxu_gpr(t0, XRd); gen_store_mxu_gpr(t1, XRa); } /* + * Q8MADL XRd, XRa, XRb, XRc + * Parallel quad unsigned 8 bit multiply and accumulate. + * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3] + */ +static void gen_mxu_q8madl(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3, t4, t5, t6, t7; + uint32_t XRa, XRb, XRc, XRd, aptn2; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + t4 = tcg_temp_new(); + t5 = tcg_temp_new(); + t6 = tcg_temp_new(); + t7 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + aptn2 = extract32(ctx->opcode, 24, 2); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t7, XRc); + + tcg_gen_extract_tl(t0, t3, 0, 8); + tcg_gen_extract_tl(t1, t3, 8, 8); + tcg_gen_extract_tl(t2, t3, 16, 8); + tcg_gen_extract_tl(t3, t3, 24, 8); + + tcg_gen_extract_tl(t4, t7, 0, 8); + tcg_gen_extract_tl(t5, t7, 8, 8); + tcg_gen_extract_tl(t6, t7, 16, 8); + tcg_gen_extract_tl(t7, t7, 24, 8); + + tcg_gen_mul_tl(t0, t0, t4); + tcg_gen_mul_tl(t1, t1, t5); + tcg_gen_mul_tl(t2, t2, t6); + tcg_gen_mul_tl(t3, t3, t7); + + gen_load_mxu_gpr(t4, XRa); + tcg_gen_extract_tl(t6, t4, 0, 8); + tcg_gen_extract_tl(t7, t4, 8, 8); + if (aptn2 & 1) { + tcg_gen_sub_tl(t0, t6, t0); + tcg_gen_sub_tl(t1, t7, t1); + } else { + tcg_gen_add_tl(t0, t6, t0); + tcg_gen_add_tl(t1, t7, t1); + } + tcg_gen_extract_tl(t6, t4, 16, 8); + tcg_gen_extract_tl(t7, t4, 24, 8); + if (aptn2 & 2) { + tcg_gen_sub_tl(t2, t6, t2); + tcg_gen_sub_tl(t3, t7, t3); + } else { + tcg_gen_add_tl(t2, t6, t2); + tcg_gen_add_tl(t3, t7, t3); + } + + tcg_gen_andi_tl(t5, t0, 0xff); + tcg_gen_deposit_tl(t5, t5, t1, 8, 8); + tcg_gen_deposit_tl(t5, t5, t2, 16, 8); + tcg_gen_deposit_tl(t5, t5, t3, 24, 8); + + gen_store_mxu_gpr(t5, XRd); +} + +/* * S32LDD XRa, Rb, S12 - Load a word from memory to XRF - * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq. + * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF + * in reversed byte seq. + * S32LDI XRa, Rb, S12 - Load a word from memory to XRF, + * post modify base address GPR. + * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF, + * post modify base address GPR and load in reversed byte seq. */ -static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx) +static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc) { TCGv t0, t1; - uint32_t XRa, Rb, s12, sel; + uint32_t XRa, Rb, s12; t0 = tcg_temp_new(); t1 = tcg_temp_new(); XRa = extract32(ctx->opcode, 6, 4); - s12 = extract32(ctx->opcode, 10, 10); - sel = extract32(ctx->opcode, 20, 1); + s12 = sextract32(ctx->opcode, 10, 10); Rb = extract32(ctx->opcode, 21, 5); gen_load_gpr(t0, Rb); + tcg_gen_movi_tl(t1, s12 * 4); + tcg_gen_add_tl(t0, t0, t1); + + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, + (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | + ctx->default_tcg_memop_mask); + gen_store_mxu_gpr(t1, XRa); - tcg_gen_movi_tl(t1, s12); - tcg_gen_shli_tl(t1, t1, 2); - if (s12 & 0x200) { - tcg_gen_ori_tl(t1, t1, 0xFFFFF000); + if (postinc) { + gen_store_gpr(t0, Rb); } - tcg_gen_add_tl(t1, t0, t1); - tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) | - ctx->default_tcg_memop_mask); +} + +/* + * S32STD XRa, Rb, S12 - Store a word from XRF to memory + * S32STDR XRa, Rb, S12 - Store a word from XRF to memory + * in reversed byte seq. + * S32SDI XRa, Rb, S12 - Store a word from XRF to memory, + * post modify base address GPR. + * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory, + * post modify base address GPR and store in reversed byte seq. + */ +static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc) +{ + TCGv t0, t1; + uint32_t XRa, Rb, s12; + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + s12 = sextract32(ctx->opcode, 10, 10); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + tcg_gen_movi_tl(t1, s12 * 4); + tcg_gen_add_tl(t0, t0, t1); + + gen_load_mxu_gpr(t1, XRa); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, + (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | + ctx->default_tcg_memop_mask); + + if (postinc) { + gen_store_gpr(t0, Rb); + } +} + +/* + * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF + * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF + * in reversed byte seq. + * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, + * post modify base address GPR. + * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, + * post modify base address GPR and load in reversed byte seq. + */ +static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed, + bool postinc, uint32_t strd2) +{ + TCGv t0, t1; + uint32_t XRa, Rb, Rc; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + Rc = extract32(ctx->opcode, 16, 5); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + gen_load_gpr(t1, Rc); + tcg_gen_shli_tl(t1, t1, strd2); + tcg_gen_add_tl(t0, t0, t1); + + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, + (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | + ctx->default_tcg_memop_mask); gen_store_mxu_gpr(t1, XRa); + + if (postinc) { + gen_store_gpr(t0, Rb); + } } +/* + * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR + * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, + * sign extending to GPR size. + * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, + * sign extending to GPR size. + * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, + * zero extending to GPR size. + * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, + * zero extending to GPR size. + */ +static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop) +{ + TCGv t0, t1; + uint32_t Ra, Rb, Rc; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + Ra = extract32(ctx->opcode, 11, 5); + Rc = extract32(ctx->opcode, 16, 5); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + gen_load_gpr(t1, Rc); + tcg_gen_shli_tl(t1, t1, strd2); + tcg_gen_add_tl(t0, t0, t1); + + tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask); + gen_store_gpr(t1, Ra); +} + +/* + * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF + * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF + * in reversed byte seq. + * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, + * post modify base address GPR. + * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, + * post modify base address GPR and store in reversed byte seq. + */ +static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed, + bool postinc, uint32_t strd2) +{ + TCGv t0, t1; + uint32_t XRa, Rb, Rc; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + Rc = extract32(ctx->opcode, 16, 5); + Rb = extract32(ctx->opcode, 21, 5); + + gen_load_gpr(t0, Rb); + gen_load_gpr(t1, Rc); + tcg_gen_shli_tl(t1, t1, strd2); + tcg_gen_add_tl(t0, t0, t1); + + gen_load_mxu_gpr(t1, XRa); + tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, + (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | + ctx->default_tcg_memop_mask); + + if (postinc) { + gen_store_gpr(t0, Rb); + } +} /* * MXU instruction category: logic @@ -981,13 +1818,291 @@ static void gen_mxu_S32XOR(DisasContext *ctx) } } +/* + * MXU instruction category: shift + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * D32SLL D32SLR D32SAR D32SARL + * D32SLLV D32SLRV D32SARV D32SARW + * Q16SLL Q16SLR Q16SAR + * Q16SLLV Q16SLRV Q16SARV + */ + +/* + * D32SLL XRa, XRd, XRb, XRc, SFT4 + * Dual 32-bit shift left from XRb and XRc to SFT4 + * bits (0..15). Store to XRa and XRd respectively. + * D32SLR XRa, XRd, XRb, XRc, SFT4 + * Dual 32-bit shift logic right from XRb and XRc + * to SFT4 bits (0..15). Store to XRa and XRd respectively. + * D32SAR XRa, XRd, XRb, XRc, SFT4 + * Dual 32-bit shift arithmetic right from XRb and XRc + * to SFT4 bits (0..15). Store to XRa and XRd respectively. + */ +static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic) +{ + uint32_t XRa, XRb, XRc, XRd, sft4; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + sft4 = extract32(ctx->opcode, 22, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + + if (right) { + if (arithmetic) { + tcg_gen_sari_tl(t0, t0, sft4); + tcg_gen_sari_tl(t1, t1, sft4); + } else { + tcg_gen_shri_tl(t0, t0, sft4); + tcg_gen_shri_tl(t1, t1, sft4); + } + } else { + tcg_gen_shli_tl(t0, t0, sft4); + tcg_gen_shli_tl(t1, t1, sft4); + } + gen_store_mxu_gpr(t0, XRa); + gen_store_mxu_gpr(t1, XRd); +} /* - * MXU instruction category max/min + * D32SLLV XRa, XRd, rs + * Dual 32-bit shift left from XRa and XRd to rs[3:0] + * bits. Store back to XRa and XRd respectively. + * D32SLRV XRa, XRd, rs + * Dual 32-bit shift logic right from XRa and XRd to rs[3:0] + * bits. Store back to XRa and XRd respectively. + * D32SARV XRa, XRd, rs + * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0] + * bits. Store back to XRa and XRd respectively. + */ +static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic) +{ + uint32_t XRa, XRd, rs; + + XRa = extract32(ctx->opcode, 10, 4); + XRd = extract32(ctx->opcode, 14, 4); + rs = extract32(ctx->opcode, 21, 5); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRa); + gen_load_mxu_gpr(t1, XRd); + gen_load_gpr(t2, rs); + tcg_gen_andi_tl(t2, t2, 0x0f); + + if (right) { + if (arithmetic) { + tcg_gen_sar_tl(t0, t0, t2); + tcg_gen_sar_tl(t1, t1, t2); + } else { + tcg_gen_shr_tl(t0, t0, t2); + tcg_gen_shr_tl(t1, t1, t2); + } + } else { + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_shl_tl(t1, t1, t2); + } + gen_store_mxu_gpr(t0, XRa); + gen_store_mxu_gpr(t1, XRd); +} + +/* + * D32SARL XRa, XRb, XRc, SFT4 + * Dual shift arithmetic right 32-bit integers in XRb and XRc + * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa. + * + * D32SARW XRa, XRb, XRc, rb + * Dual shift arithmetic right 32-bit integers in XRb and XRc + * to rb[3:0] bits. Pack 16 LSBs of each into XRa. + */ +static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw) +{ + uint32_t XRa, XRb, XRc, rb; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + rb = extract32(ctx->opcode, 21, 5); + + if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else { + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + if (!sarw) { + /* Make SFT4 from rb field */ + tcg_gen_movi_tl(t2, rb >> 1); + } else { + gen_load_gpr(t2, rb); + tcg_gen_andi_tl(t2, t2, 0x0f); + } + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + tcg_gen_sar_tl(t0, t0, t2); + tcg_gen_sar_tl(t1, t1, t2); + tcg_gen_extract_tl(t2, t1, 0, 16); + tcg_gen_deposit_tl(t2, t2, t0, 16, 16); + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q16SLL XRa, XRd, XRb, XRc, SFT4 + * Quad 16-bit shift left from XRb and XRc to SFT4 + * bits (0..15). Store to XRa and XRd respectively. + * Q16SLR XRa, XRd, XRb, XRc, SFT4 + * Quad 16-bit shift logic right from XRb and XRc + * to SFT4 bits (0..15). Store to XRa and XRd respectively. + * Q16SAR XRa, XRd, XRb, XRc, SFT4 + * Quad 16-bit shift arithmetic right from XRb and XRc + * to SFT4 bits (0..15). Store to XRa and XRd respectively. + */ +static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic) +{ + uint32_t XRa, XRb, XRc, XRd, sft4; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRd = extract32(ctx->opcode, 18, 4); + sft4 = extract32(ctx->opcode, 22, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t2, XRc); + + if (arithmetic) { + tcg_gen_sextract_tl(t1, t0, 16, 16); + tcg_gen_sextract_tl(t0, t0, 0, 16); + tcg_gen_sextract_tl(t3, t2, 16, 16); + tcg_gen_sextract_tl(t2, t2, 0, 16); + } else { + tcg_gen_extract_tl(t1, t0, 16, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_extract_tl(t3, t2, 16, 16); + tcg_gen_extract_tl(t2, t2, 0, 16); + } + + if (right) { + if (arithmetic) { + tcg_gen_sari_tl(t0, t0, sft4); + tcg_gen_sari_tl(t1, t1, sft4); + tcg_gen_sari_tl(t2, t2, sft4); + tcg_gen_sari_tl(t3, t3, sft4); + } else { + tcg_gen_shri_tl(t0, t0, sft4); + tcg_gen_shri_tl(t1, t1, sft4); + tcg_gen_shri_tl(t2, t2, sft4); + tcg_gen_shri_tl(t3, t3, sft4); + } + } else { + tcg_gen_shli_tl(t0, t0, sft4); + tcg_gen_shli_tl(t1, t1, sft4); + tcg_gen_shli_tl(t2, t2, sft4); + tcg_gen_shli_tl(t3, t3, sft4); + } + tcg_gen_deposit_tl(t0, t0, t1, 16, 16); + tcg_gen_deposit_tl(t2, t2, t3, 16, 16); + + gen_store_mxu_gpr(t0, XRa); + gen_store_mxu_gpr(t2, XRd); +} + +/* + * Q16SLLV XRa, XRd, rs + * Quad 16-bit shift left from XRa and XRd to rs[3:0] + * bits. Store to XRa and XRd respectively. + * Q16SLRV XRa, XRd, rs + * Quad 16-bit shift logic right from XRa and XRd to rs[3:0] + * bits. Store to XRa and XRd respectively. + * Q16SARV XRa, XRd, rs + * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0] + * bits. Store to XRa and XRd respectively. + */ +static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic) +{ + uint32_t XRa, XRd, rs; + + XRa = extract32(ctx->opcode, 10, 4); + XRd = extract32(ctx->opcode, 14, 4); + rs = extract32(ctx->opcode, 21, 5); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRa); + gen_load_mxu_gpr(t2, XRd); + gen_load_gpr(t5, rs); + tcg_gen_andi_tl(t5, t5, 0x0f); + + + if (arithmetic) { + tcg_gen_sextract_tl(t1, t0, 16, 16); + tcg_gen_sextract_tl(t0, t0, 0, 16); + tcg_gen_sextract_tl(t3, t2, 16, 16); + tcg_gen_sextract_tl(t2, t2, 0, 16); + } else { + tcg_gen_extract_tl(t1, t0, 16, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_extract_tl(t3, t2, 16, 16); + tcg_gen_extract_tl(t2, t2, 0, 16); + } + + if (right) { + if (arithmetic) { + tcg_gen_sar_tl(t0, t0, t5); + tcg_gen_sar_tl(t1, t1, t5); + tcg_gen_sar_tl(t2, t2, t5); + tcg_gen_sar_tl(t3, t3, t5); + } else { + tcg_gen_shr_tl(t0, t0, t5); + tcg_gen_shr_tl(t1, t1, t5); + tcg_gen_shr_tl(t2, t2, t5); + tcg_gen_shr_tl(t3, t3, t5); + } + } else { + tcg_gen_shl_tl(t0, t0, t5); + tcg_gen_shl_tl(t1, t1, t5); + tcg_gen_shl_tl(t2, t2, t5); + tcg_gen_shl_tl(t3, t3, t5); + } + tcg_gen_deposit_tl(t0, t0, t1, 16, 16); + tcg_gen_deposit_tl(t2, t2, t3, 16, 16); + + gen_store_mxu_gpr(t0, XRa); + gen_store_mxu_gpr(t2, XRd); +} + +/* + * MXU instruction category max/min/avg * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * S32MAX D16MAX Q8MAX * S32MIN D16MIN Q8MIN + * S32SLT D16SLT Q8SLT + * Q8SLTU + * D16AVG Q8AVG + * D16AVGR Q8AVGR + * S32MOVZ D16MOVZ Q8MOVZ + * S32MOVN D16MOVN Q8MOVN */ /* @@ -1072,13 +2187,14 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) /* ...and do half-word-wise max/min with one operand 0 */ TCGv_i32 t0 = tcg_temp_new(); TCGv_i32 t1 = tcg_constant_i32(0); + TCGv_i32 t2 = tcg_temp_new(); /* the left half-word first */ tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000); if (opc == OPC_MXU_D16MAX) { - tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smax_i32(t2, t0, t1); } else { - tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smin_i32(t2, t0, t1); } /* the right half-word */ @@ -1094,7 +2210,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) /* return resulting half-words to its original position */ tcg_gen_shri_i32(t0, t0, 16); /* finally update the destination */ - tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); } else if (unlikely(XRb == XRc)) { /* both operands same -> just set destination to one of them */ tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); @@ -1102,14 +2218,15 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) /* the most general case */ TCGv_i32 t0 = tcg_temp_new(); TCGv_i32 t1 = tcg_temp_new(); + TCGv_i32 t2 = tcg_temp_new(); /* the left half-word first */ tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000); tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); if (opc == OPC_MXU_D16MAX) { - tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smax_i32(t2, t0, t1); } else { - tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smin_i32(t2, t0, t1); } /* the right half-word */ @@ -1127,7 +2244,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) /* return resulting half-words to its original position */ tcg_gen_shri_i32(t0, t0, 16); /* finally update the destination */ - tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); } } @@ -1163,14 +2280,15 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) /* ...and do byte-wise max/min with one operand 0 */ TCGv_i32 t0 = tcg_temp_new(); TCGv_i32 t1 = tcg_constant_i32(0); + TCGv_i32 t2 = tcg_temp_new(); int32_t i; /* the leftmost byte (byte 3) first */ tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000); if (opc == OPC_MXU_Q8MAX) { - tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smax_i32(t2, t0, t1); } else { - tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smin_i32(t2, t0, t1); } /* bytes 2, 1, 0 */ @@ -1188,8 +2306,9 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) /* return resulting byte to its original position */ tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); /* finally update the destination */ - tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + tcg_gen_or_i32(t2, t2, t0); } + gen_store_mxu_gpr(t2, XRa); } else if (unlikely(XRb == XRc)) { /* both operands same -> just set destination to one of them */ tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); @@ -1197,15 +2316,16 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) /* the most general case */ TCGv_i32 t0 = tcg_temp_new(); TCGv_i32 t1 = tcg_temp_new(); + TCGv_i32 t2 = tcg_temp_new(); int32_t i; /* the leftmost bytes (bytes 3) first */ tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000); tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); if (opc == OPC_MXU_Q8MAX) { - tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smax_i32(t2, t0, t1); } else { - tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_smin_i32(t2, t0, t1); } /* bytes 2, 1, 0 */ @@ -1225,11 +2345,1741 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) /* return resulting byte to its original position */ tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); /* finally update the destination */ - tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + tcg_gen_or_i32(t2, t2, t0); } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q8SLT + * Update XRa with the signed "set less than" comparison of XRb and XRc + * on per-byte basis. + * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; + * + * Q8SLTU + * Update XRa with the unsigned "set less than" comparison of XRb and XRc + * on per-byte basis. + * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; + */ +static void gen_mxu_q8slt(DisasContext *ctx, bool sltu) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRb == XRc)) { + /* both operands same registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + tcg_gen_movi_tl(t2, 0); + + for (int i = 0; i < 4; i++) { + if (sltu) { + tcg_gen_extract_tl(t0, t3, 8 * i, 8); + tcg_gen_extract_tl(t1, t4, 8 * i, 8); + } else { + tcg_gen_sextract_tl(t0, t3, 8 * i, 8); + tcg_gen_sextract_tl(t1, t4, 8 * i, 8); + } + tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); + tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); + } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * S32SLT + * Update XRa with the signed "set less than" comparison of XRb and XRc. + * a.k.a. XRa = XRb < XRc ? 1 : 0; + */ +static void gen_mxu_S32SLT(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRb == XRc)) { + /* both operands same registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], + mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); } } +/* + * D16SLT + * Update XRa with the signed "set less than" comparison of XRb and XRc + * on per-word basis. + * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0; + */ +static void gen_mxu_D16SLT(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRb == XRc)) { + /* both operands same registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + tcg_gen_sextract_tl(t0, t3, 16, 16); + tcg_gen_sextract_tl(t1, t4, 16, 16); + tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); + tcg_gen_shli_tl(t2, t0, 16); + tcg_gen_sextract_tl(t0, t3, 0, 16); + tcg_gen_sextract_tl(t1, t4, 0, 16); + tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); + tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0); + } +} + +/* + * D16AVG + * Update XRa with the signed average of XRb and XRc + * on per-word basis, rounding down. + * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1; + * + * D16AVGR + * Update XRa with the signed average of XRb and XRc + * on per-word basis, math rounding 4/5. + * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1; + */ +static void gen_mxu_d16avg(DisasContext *ctx, bool round45) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRb == XRc)) { + /* both operands same registers -> just set destination to same */ + tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + tcg_gen_sextract_tl(t0, t3, 16, 16); + tcg_gen_sextract_tl(t1, t4, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + if (round45) { + tcg_gen_addi_tl(t0, t0, 1); + } + tcg_gen_shli_tl(t2, t0, 15); + tcg_gen_andi_tl(t2, t2, 0xffff0000); + tcg_gen_sextract_tl(t0, t3, 0, 16); + tcg_gen_sextract_tl(t1, t4, 0, 16); + tcg_gen_add_tl(t0, t0, t1); + if (round45) { + tcg_gen_addi_tl(t0, t0, 1); + } + tcg_gen_shri_tl(t0, t0, 1); + tcg_gen_deposit_tl(t2, t2, t0, 0, 16); + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q8AVG + * Update XRa with the signed average of XRb and XRc + * on per-byte basis, rounding down. + * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1; + * + * Q8AVGR + * Update XRa with the signed average of XRb and XRc + * on per-word basis, math rounding 4/5. + * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1; + */ +static void gen_mxu_q8avg(DisasContext *ctx, bool round45) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRb == XRc)) { + /* both operands same registers -> just set destination to same */ + tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + tcg_gen_movi_tl(t2, 0); + + for (int i = 0; i < 4; i++) { + tcg_gen_extract_tl(t0, t3, 8 * i, 8); + tcg_gen_extract_tl(t1, t4, 8 * i, 8); + tcg_gen_add_tl(t0, t0, t1); + if (round45) { + tcg_gen_addi_tl(t0, t0, 1); + } + tcg_gen_shri_tl(t0, t0, 1); + tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); + } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q8MOVZ + * Quadruple 8-bit packed conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] } + * + * Q8MOVN + * Quadruple 8-bit packed conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] } + */ +static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond) +{ + uint32_t XRc, XRb, XRa; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGLabel *l_quarterdone = gen_new_label(); + TCGLabel *l_halfdone = gen_new_label(); + TCGLabel *l_quarterrest = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + gen_load_mxu_gpr(t0, XRc); + gen_load_mxu_gpr(t1, XRb); + gen_load_mxu_gpr(t2, XRa); + + tcg_gen_extract_tl(t3, t1, 24, 8); + tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone); + tcg_gen_extract_tl(t3, t0, 24, 8); + tcg_gen_deposit_tl(t2, t2, t3, 24, 8); + + gen_set_label(l_quarterdone); + tcg_gen_extract_tl(t3, t1, 16, 8); + tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); + tcg_gen_extract_tl(t3, t0, 16, 8); + tcg_gen_deposit_tl(t2, t2, t3, 16, 8); + + gen_set_label(l_halfdone); + tcg_gen_extract_tl(t3, t1, 8, 8); + tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest); + tcg_gen_extract_tl(t3, t0, 8, 8); + tcg_gen_deposit_tl(t2, t2, t3, 8, 8); + + gen_set_label(l_quarterrest); + tcg_gen_extract_tl(t3, t1, 0, 8); + tcg_gen_brcondi_tl(cond, t3, 0, l_done); + tcg_gen_extract_tl(t3, t0, 0, 8); + tcg_gen_deposit_tl(t2, t2, t3, 0, 8); + + gen_set_label(l_done); + gen_store_mxu_gpr(t2, XRa); +} + +/* + * D16MOVZ + * Double 16-bit packed conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] } + * + * D16MOVN + * Double 16-bit packed conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] } + */ +static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond) +{ + uint32_t XRc, XRb, XRa; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGLabel *l_halfdone = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + gen_load_mxu_gpr(t0, XRc); + gen_load_mxu_gpr(t1, XRb); + gen_load_mxu_gpr(t2, XRa); + + tcg_gen_extract_tl(t3, t1, 16, 16); + tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); + tcg_gen_extract_tl(t3, t0, 16, 16); + tcg_gen_deposit_tl(t2, t2, t3, 16, 16); + + gen_set_label(l_halfdone); + tcg_gen_extract_tl(t3, t1, 0, 16); + tcg_gen_brcondi_tl(cond, t3, 0, l_done); + tcg_gen_extract_tl(t3, t0, 0, 16); + tcg_gen_deposit_tl(t2, t2, t3, 0, 16); + + gen_set_label(l_done); + gen_store_mxu_gpr(t2, XRa); +} + +/* + * S32MOVZ + * Quadruple 32-bit conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb == 0) { XRa = XRc } + * + * S32MOVN + * Single 32-bit conditional move where + * XRb contains conditions, XRc what to move and + * XRa is the destination. + * a.k.a. if (XRb != 0) { XRa = XRc } + */ +static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond) +{ + uint32_t XRc, XRb, XRa; + + XRa = extract32(ctx->opcode, 6, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRc = extract32(ctx->opcode, 14, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGLabel *l_done = gen_new_label(); + + gen_load_mxu_gpr(t0, XRc); + gen_load_mxu_gpr(t1, XRb); + + tcg_gen_brcondi_tl(cond, t1, 0, l_done); + gen_store_mxu_gpr(t0, XRa); + gen_set_label(l_done); +} + +/* + * MXU instruction category: Addition and subtraction + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * S32CPS D16CPS + * Q8ADD + */ + +/* + * S32CPS + * Update XRa if XRc < 0 by value of 0 - XRb + * else XRa = XRb + */ +static void gen_mxu_S32CPS(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely(XRb == 0)) { + /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRc == 0)) { + /* condition always false -> just move XRb to XRa */ + tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGLabel *l_not_less = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less); + tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]); + tcg_gen_br(l_done); + gen_set_label(l_not_less); + gen_load_mxu_gpr(t0, XRb); + gen_set_label(l_done); + gen_store_mxu_gpr(t0, XRa); + } +} + +/* + * D16CPS + * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1] + * else XRa[0..1] = XRb[0..1] + */ +static void gen_mxu_D16CPS(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely(XRb == 0)) { + /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else if (unlikely(XRc == 0)) { + /* condition always false -> just move XRb to XRa */ + tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGLabel *l_done_hi = gen_new_label(); + TCGLabel *l_not_less_lo = gen_new_label(); + TCGLabel *l_done_lo = gen_new_label(); + + tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16); + tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16); + tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi); + tcg_gen_subfi_tl(t1, 0, t1); + + gen_set_label(l_done_hi); + tcg_gen_shli_i32(t1, t1, 16); + + tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16); + tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo); + tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16); + tcg_gen_subfi_tl(t0, 0, t0); + tcg_gen_br(l_done_lo); + + gen_set_label(l_not_less_lo); + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16); + + gen_set_label(l_done_lo); + tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16); + } +} + +/* + * Q8ABD XRa, XRb, XRc + * Gets absolute difference for quadruple of 8-bit + * packed in XRb to another one in XRc, + * put the result in XRa. + * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]); + */ +static void gen_mxu_Q8ABD(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 3); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + tcg_gen_movi_tl(t2, 0); + + for (int i = 0; i < 4; i++) { + tcg_gen_extract_tl(t0, t3, 8 * i, 8); + tcg_gen_extract_tl(t1, t4, 8 * i, 8); + + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_abs_tl(t0, t0); + + tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); + } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q8ADD XRa, XRb, XRc, ptn2 + * Add/subtract quadruple of 8-bit packed in XRb + * to another one in XRc, put the result in XRa. + */ +static void gen_mxu_Q8ADD(DisasContext *ctx) +{ + uint32_t aptn2, pad, XRc, XRb, XRa; + + aptn2 = extract32(ctx->opcode, 24, 2); + pad = extract32(ctx->opcode, 21, 3); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + gen_load_mxu_gpr(t3, XRb); + gen_load_mxu_gpr(t4, XRc); + + for (int i = 0; i < 4; i++) { + tcg_gen_andi_tl(t0, t3, 0xff); + tcg_gen_andi_tl(t1, t4, 0xff); + + if (i < 2) { + if (aptn2 & 0x01) { + tcg_gen_sub_tl(t0, t0, t1); + } else { + tcg_gen_add_tl(t0, t0, t1); + } + } else { + if (aptn2 & 0x02) { + tcg_gen_sub_tl(t0, t0, t1); + } else { + tcg_gen_add_tl(t0, t0, t1); + } + } + if (i < 3) { + tcg_gen_shri_tl(t3, t3, 8); + tcg_gen_shri_tl(t4, t4, 8); + } + if (i > 0) { + tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); + } else { + tcg_gen_andi_tl(t0, t0, 0xff); + tcg_gen_mov_tl(t2, t0); + } + } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q8ADDE XRa, XRb, XRc, XRd, aptn2 + * Add/subtract quadruple of 8-bit packed in XRb + * to another one in XRc, with zero extending + * to 16-bit and put results as packed 16-bit data + * into XRa and XRd. + * aptn2 manages action add or subract of pairs of data. + * + * Q8ACCE XRa, XRb, XRc, XRd, aptn2 + * Add/subtract quadruple of 8-bit packed in XRb + * to another one in XRc, with zero extending + * to 16-bit and accumulate results as packed 16-bit data + * into XRa and XRd. + * aptn2 manages action add or subract of pairs of data. + */ +static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate) +{ + uint32_t aptn2, XRd, XRc, XRb, XRa; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + if (XRa != 0) { + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } + if (XRd != 0) { + tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0); + } + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + if (XRa != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); + if (aptn2 & 2) { + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_sub_tl(t2, t2, t3); + } else { + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + if (accumulate) { + gen_load_mxu_gpr(t5, XRa); + tcg_gen_extract_tl(t1, t5, 0, 16); + tcg_gen_extract_tl(t3, t5, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + tcg_gen_shli_tl(t2, t2, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_or_tl(t4, t2, t0); + } + if (XRd != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8); + if (aptn2 & 1) { + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_sub_tl(t2, t2, t3); + } else { + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + if (accumulate) { + gen_load_mxu_gpr(t5, XRd); + tcg_gen_extract_tl(t1, t5, 0, 16); + tcg_gen_extract_tl(t3, t5, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + tcg_gen_add_tl(t2, t2, t3); + } + tcg_gen_shli_tl(t2, t2, 16); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_or_tl(t5, t2, t0); + } + + gen_store_mxu_gpr(t4, XRa); + gen_store_mxu_gpr(t5, XRd); + } +} + +/* + * D8SUM XRa, XRb, XRc + * Double parallel add of quadruple unsigned 8-bit together + * with zero extending to 16-bit data. + * D8SUMC XRa, XRb, XRc + * Double parallel add of quadruple unsigned 8-bit together + * with zero extending to 16-bit data and adding 2 to each + * parallel result. + */ +static void gen_mxu_d8sum(DisasContext *ctx, bool sumc) +{ + uint32_t pad, pad2, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 24, 2); + pad2 = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0 || pad2 != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to zero */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + if (XRb != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8); + tcg_gen_add_tl(t4, t0, t1); + tcg_gen_add_tl(t4, t4, t2); + tcg_gen_add_tl(t4, t4, t3); + } else { + tcg_gen_mov_tl(t4, 0); + } + if (XRc != 0) { + tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8); + tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8); + tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8); + tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); + tcg_gen_add_tl(t5, t0, t1); + tcg_gen_add_tl(t5, t5, t2); + tcg_gen_add_tl(t5, t5, t3); + } else { + tcg_gen_mov_tl(t5, 0); + } + + if (sumc) { + tcg_gen_addi_tl(t4, t4, 2); + tcg_gen_addi_tl(t5, t5, 2); + } + tcg_gen_shli_tl(t4, t4, 16); + + tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); + } +} + +/* + * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed + * 16-bit pattern addition. + */ +static void gen_mxu_q16add(DisasContext *ctx) +{ + uint32_t aptn2, optn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + optn2 = extract32(ctx->opcode, 22, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + gen_load_mxu_gpr(t1, XRb); + tcg_gen_extract_tl(t0, t1, 0, 16); + tcg_gen_extract_tl(t1, t1, 16, 16); + + gen_load_mxu_gpr(t3, XRc); + tcg_gen_extract_tl(t2, t3, 0, 16); + tcg_gen_extract_tl(t3, t3, 16, 16); + + switch (optn2) { + case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */ + tcg_gen_mov_tl(t4, t1); + tcg_gen_mov_tl(t5, t0); + break; + case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */ + tcg_gen_mov_tl(t4, t0); + tcg_gen_mov_tl(t5, t0); + break; + case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */ + tcg_gen_mov_tl(t4, t1); + tcg_gen_mov_tl(t5, t1); + break; + case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */ + tcg_gen_mov_tl(t4, t0); + tcg_gen_mov_tl(t5, t1); + break; + } + + switch (aptn2) { + case MXU_APTN2_AA: /* lop +, rop + */ + tcg_gen_add_tl(t0, t4, t3); + tcg_gen_add_tl(t1, t5, t2); + tcg_gen_add_tl(t4, t4, t3); + tcg_gen_add_tl(t5, t5, t2); + break; + case MXU_APTN2_AS: /* lop +, rop + */ + tcg_gen_sub_tl(t0, t4, t3); + tcg_gen_sub_tl(t1, t5, t2); + tcg_gen_add_tl(t4, t4, t3); + tcg_gen_add_tl(t5, t5, t2); + break; + case MXU_APTN2_SA: /* lop +, rop + */ + tcg_gen_add_tl(t0, t4, t3); + tcg_gen_add_tl(t1, t5, t2); + tcg_gen_sub_tl(t4, t4, t3); + tcg_gen_sub_tl(t5, t5, t2); + break; + case MXU_APTN2_SS: /* lop +, rop + */ + tcg_gen_sub_tl(t0, t4, t3); + tcg_gen_sub_tl(t1, t5, t2); + tcg_gen_sub_tl(t4, t4, t3); + tcg_gen_sub_tl(t5, t5, t2); + break; + } + + tcg_gen_shli_tl(t0, t0, 16); + tcg_gen_extract_tl(t1, t1, 0, 16); + tcg_gen_shli_tl(t4, t4, 16); + tcg_gen_extract_tl(t5, t5, 0, 16); + + tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); + tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1); +} + +/* + * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed + * 16-bit addition/subtraction with accumulate. + */ +static void gen_mxu_q16acc(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv s3 = tcg_temp_new(); + TCGv s2 = tcg_temp_new(); + TCGv s1 = tcg_temp_new(); + TCGv s0 = tcg_temp_new(); + + gen_load_mxu_gpr(t1, XRb); + tcg_gen_extract_tl(t0, t1, 0, 16); + tcg_gen_extract_tl(t1, t1, 16, 16); + + gen_load_mxu_gpr(t3, XRc); + tcg_gen_extract_tl(t2, t3, 0, 16); + tcg_gen_extract_tl(t3, t3, 16, 16); + + switch (aptn2) { + case MXU_APTN2_AA: /* lop +, rop + */ + tcg_gen_add_tl(s3, t1, t3); + tcg_gen_add_tl(s2, t0, t2); + tcg_gen_add_tl(s1, t1, t3); + tcg_gen_add_tl(s0, t0, t2); + break; + case MXU_APTN2_AS: /* lop +, rop - */ + tcg_gen_sub_tl(s3, t1, t3); + tcg_gen_sub_tl(s2, t0, t2); + tcg_gen_add_tl(s1, t1, t3); + tcg_gen_add_tl(s0, t0, t2); + break; + case MXU_APTN2_SA: /* lop -, rop + */ + tcg_gen_add_tl(s3, t1, t3); + tcg_gen_add_tl(s2, t0, t2); + tcg_gen_sub_tl(s1, t1, t3); + tcg_gen_sub_tl(s0, t0, t2); + break; + case MXU_APTN2_SS: /* lop -, rop - */ + tcg_gen_sub_tl(s3, t1, t3); + tcg_gen_sub_tl(s2, t0, t2); + tcg_gen_sub_tl(s1, t1, t3); + tcg_gen_sub_tl(s0, t0, t2); + break; + } + + if (XRa != 0) { + tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16); + tcg_gen_add_tl(t1, t1, s1); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0); + } + + if (XRd != 0) { + tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2); + tcg_gen_extract_tl(t0, t0, 0, 16); + tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16); + tcg_gen_add_tl(t1, t1, s3); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0); + } +} + +/* + * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed + * 16-bit accumulate. + */ +static void gen_mxu_q16accm(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + + gen_load_mxu_gpr(t2, XRb); + gen_load_mxu_gpr(t3, XRc); + + if (XRa != 0) { + TCGv a0 = tcg_temp_new(); + TCGv a1 = tcg_temp_new(); + + tcg_gen_extract_tl(t0, t2, 0, 16); + tcg_gen_extract_tl(t1, t2, 16, 16); + + gen_load_mxu_gpr(a1, XRa); + tcg_gen_extract_tl(a0, a1, 0, 16); + tcg_gen_extract_tl(a1, a1, 16, 16); + + if (aptn2 & 2) { + tcg_gen_sub_tl(a0, a0, t0); + tcg_gen_sub_tl(a1, a1, t1); + } else { + tcg_gen_add_tl(a0, a0, t0); + tcg_gen_add_tl(a1, a1, t1); + } + tcg_gen_extract_tl(a0, a0, 0, 16); + tcg_gen_shli_tl(a1, a1, 16); + tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0); + } + + if (XRd != 0) { + TCGv a0 = tcg_temp_new(); + TCGv a1 = tcg_temp_new(); + + tcg_gen_extract_tl(t0, t3, 0, 16); + tcg_gen_extract_tl(t1, t3, 16, 16); + + gen_load_mxu_gpr(a1, XRd); + tcg_gen_extract_tl(a0, a1, 0, 16); + tcg_gen_extract_tl(a1, a1, 16, 16); + + if (aptn2 & 1) { + tcg_gen_sub_tl(a0, a0, t0); + tcg_gen_sub_tl(a1, a1, t1); + } else { + tcg_gen_add_tl(a0, a0, t0); + tcg_gen_add_tl(a1, a1, t1); + } + tcg_gen_extract_tl(a0, a0, 0, 16); + tcg_gen_shli_tl(a1, a1, 16); + tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0); + } +} + + +/* + * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed + * 16-bit sign extended addition and accumulate. + */ +static void gen_mxu_d16asum(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + + gen_load_mxu_gpr(t2, XRb); + gen_load_mxu_gpr(t3, XRc); + + if (XRa != 0) { + tcg_gen_sextract_tl(t0, t2, 0, 16); + tcg_gen_sextract_tl(t1, t2, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + if (aptn2 & 2) { + tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + } else { + tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + } + } + + if (XRd != 0) { + tcg_gen_sextract_tl(t0, t3, 0, 16); + tcg_gen_sextract_tl(t1, t3, 16, 16); + tcg_gen_add_tl(t0, t0, t1); + if (aptn2 & 1) { + tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); + } else { + tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); + } + } +} + +/* + * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double + * 32 bit pattern addition/subtraction, set carry. + * + * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double + * 32 bit pattern addition/subtraction with carry. + */ +static void gen_mxu_d32add(DisasContext *ctx) +{ + uint32_t aptn2, addc, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + addc = extract32(ctx->opcode, 22, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv cr = tcg_temp_new(); + + if (unlikely(addc > 1)) { + /* opcode incorrect -> do nothing */ + } else if (addc == 1) { + if (unlikely(XRa == 0 && XRd == 0)) { + /* destinations are zero register -> do nothing */ + } else { + /* FIXME ??? What if XRa == XRd ??? */ + /* aptn2 is unused here */ + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + gen_load_mxu_cr(cr); + if (XRa != 0) { + tcg_gen_extract_tl(t2, cr, 31, 1); + tcg_gen_add_tl(t0, t0, t2); + tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + } + if (XRd != 0) { + tcg_gen_extract_tl(t2, cr, 30, 1); + tcg_gen_add_tl(t1, t1, t2); + tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); + } + } + } else if (unlikely(XRa == 0 && XRd == 0)) { + /* destinations are zero register -> do nothing */ + } else { + /* common case */ + /* FIXME ??? What if XRa == XRd ??? */ + TCGv carry = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + gen_load_mxu_cr(cr); + if (XRa != 0) { + if (aptn2 & 2) { + tcg_gen_sub_i32(t2, t0, t1); + tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); + } else { + tcg_gen_add_i32(t2, t0, t1); + tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); + } + tcg_gen_andi_tl(cr, cr, 0x7fffffff); + tcg_gen_shli_tl(carry, carry, 31); + tcg_gen_or_tl(cr, cr, carry); + gen_store_mxu_gpr(t2, XRa); + } + if (XRd != 0) { + if (aptn2 & 1) { + tcg_gen_sub_i32(t2, t0, t1); + tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); + } else { + tcg_gen_add_i32(t2, t0, t1); + tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); + } + tcg_gen_andi_tl(cr, cr, 0xbfffffff); + tcg_gen_shli_tl(carry, carry, 30); + tcg_gen_or_tl(cr, cr, carry); + gen_store_mxu_gpr(t2, XRd); + } + gen_store_mxu_cr(cr); + } +} + +/* + * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double + * 32 bit pattern addition/subtraction and accumulate. + */ +static void gen_mxu_d32acc(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + if (unlikely(XRa == 0 && XRd == 0)) { + /* destinations are zero register -> do nothing */ + } else { + /* common case */ + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + if (XRa != 0) { + if (aptn2 & 2) { + tcg_gen_sub_tl(t2, t0, t1); + } else { + tcg_gen_add_tl(t2, t0, t1); + } + tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); + } + if (XRd != 0) { + if (aptn2 & 1) { + tcg_gen_sub_tl(t2, t0, t1); + } else { + tcg_gen_add_tl(t2, t0, t1); + } + tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); + } + } +} + +/* + * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double + * 32 bit pattern addition/subtraction and accumulate. + */ +static void gen_mxu_d32accm(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + if (unlikely(XRa == 0 && XRd == 0)) { + /* destinations are zero register -> do nothing */ + } else { + /* common case */ + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + if (XRa != 0) { + tcg_gen_add_tl(t2, t0, t1); + if (aptn2 & 2) { + tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); + } else { + tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); + } + } + if (XRd != 0) { + tcg_gen_sub_tl(t2, t0, t1); + if (aptn2 & 1) { + tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); + } else { + tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); + } + } + } +} + +/* + * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double + * 32 bit pattern addition/subtraction. + */ +static void gen_mxu_d32asum(DisasContext *ctx) +{ + uint32_t aptn2, XRc, XRb, XRa, XRd; + + aptn2 = extract32(ctx->opcode, 24, 2); + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + if (unlikely(XRa == 0 && XRd == 0)) { + /* destinations are zero register -> do nothing */ + } else { + /* common case */ + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + if (XRa != 0) { + if (aptn2 & 2) { + tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + } else { + tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); + } + } + if (XRd != 0) { + if (aptn2 & 1) { + tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); + } else { + tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); + } + } + } +} + +/* + * MXU instruction category: Miscellaneous + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * S32EXTR S32LUI + * S32EXTRV + * Q16SAT + * Q16SCOP + */ + +/* + * S32EXTR XRa, XRd, rs, bits5 + * Extract bits5 bits from 64-bit pair {XRa:XRd} + * starting from rs[4:0] offset and put to the XRa. + */ +static void gen_mxu_s32extr(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3; + uint32_t XRa, XRd, rs, bits5; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + + XRa = extract32(ctx->opcode, 6, 4); + XRd = extract32(ctx->opcode, 10, 4); + bits5 = extract32(ctx->opcode, 16, 5); + rs = extract32(ctx->opcode, 21, 5); + + /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */ + /* {XRa} = extract({tmp}, 0, bits5); */ + if (bits5 > 0) { + TCGLabel *l_xra_only = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + gen_load_mxu_gpr(t0, XRd); + gen_load_mxu_gpr(t1, XRa); + gen_load_gpr(t2, rs); + tcg_gen_andi_tl(t2, t2, 0x1f); + tcg_gen_subfi_tl(t2, 32, t2); + tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only); + tcg_gen_subfi_tl(t2, bits5, t2); + tcg_gen_subfi_tl(t3, 32, t2); + tcg_gen_shr_tl(t0, t0, t3); + tcg_gen_shl_tl(t1, t1, t2); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_br(l_done); + gen_set_label(l_xra_only); + tcg_gen_subi_tl(t2, t2, bits5); + tcg_gen_shr_tl(t0, t1, t2); + gen_set_label(l_done); + tcg_gen_extract_tl(t0, t0, 0, bits5); + } else { + /* unspecified behavior but matches tests on real hardware*/ + tcg_gen_movi_tl(t0, 0); + } + gen_store_mxu_gpr(t0, XRa); +} + +/* + * S32EXTRV XRa, XRd, rs, rt + * Extract rt[4:0] bits from 64-bit pair {XRa:XRd} + * starting from rs[4:0] offset and put to the XRa. + */ +static void gen_mxu_s32extrv(DisasContext *ctx) +{ + TCGv t0, t1, t2, t3, t4; + uint32_t XRa, XRd, rs, rt; + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); + t3 = tcg_temp_new(); + t4 = tcg_temp_new(); + TCGLabel *l_xra_only = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + TCGLabel *l_zero = gen_new_label(); + TCGLabel *l_extract = gen_new_label(); + + XRa = extract32(ctx->opcode, 6, 4); + XRd = extract32(ctx->opcode, 10, 4); + rt = extract32(ctx->opcode, 16, 5); + rs = extract32(ctx->opcode, 21, 5); + + /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */ + gen_load_mxu_gpr(t0, XRd); + gen_load_mxu_gpr(t1, XRa); + gen_load_gpr(t2, rs); + gen_load_gpr(t4, rt); + tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero); + tcg_gen_andi_tl(t2, t2, 0x1f); + tcg_gen_subfi_tl(t2, 32, t2); + tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only); + tcg_gen_sub_tl(t2, t4, t2); + tcg_gen_subfi_tl(t3, 32, t2); + tcg_gen_shr_tl(t0, t0, t3); + tcg_gen_shl_tl(t1, t1, t2); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_br(l_extract); + + gen_set_label(l_xra_only); + tcg_gen_sub_tl(t2, t2, t4); + tcg_gen_shr_tl(t0, t1, t2); + tcg_gen_br(l_extract); + + /* unspecified behavior but matches tests on real hardware*/ + gen_set_label(l_zero); + tcg_gen_movi_tl(t0, 0); + tcg_gen_br(l_done); + + /* {XRa} = extract({tmp}, 0, rt) */ + gen_set_label(l_extract); + tcg_gen_subfi_tl(t4, 32, t4); + tcg_gen_shl_tl(t0, t0, t4); + tcg_gen_shr_tl(t0, t0, t4); + + gen_set_label(l_done); + gen_store_mxu_gpr(t0, XRa); +} + +/* + * S32LUI XRa, S8, optn3 + * Permutate the immediate S8 value to form a word + * to update XRa. + */ +static void gen_mxu_s32lui(DisasContext *ctx) +{ + uint32_t XRa, s8, optn3, pad; + + XRa = extract32(ctx->opcode, 6, 4); + s8 = extract32(ctx->opcode, 10, 8); + pad = extract32(ctx->opcode, 21, 2); + optn3 = extract32(ctx->opcode, 23, 3); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else { + uint32_t s16; + TCGv t0 = tcg_temp_new(); + + switch (optn3) { + case 0: + tcg_gen_movi_tl(t0, s8); + break; + case 1: + tcg_gen_movi_tl(t0, s8 << 8); + break; + case 2: + tcg_gen_movi_tl(t0, s8 << 16); + break; + case 3: + tcg_gen_movi_tl(t0, s8 << 24); + break; + case 4: + tcg_gen_movi_tl(t0, (s8 << 16) | s8); + break; + case 5: + tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8)); + break; + case 6: + s16 = (uint16_t)(int16_t)(int8_t)s8; + tcg_gen_movi_tl(t0, (s16 << 16) | s16); + break; + case 7: + tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8); + break; + } + gen_store_mxu_gpr(t0, XRa); + } +} + +/* + * Q16SAT XRa, XRb, XRc + * Packs four 16-bit signed integers in XRb and XRc to + * four saturated unsigned 8-bit into XRa. + * + */ +static void gen_mxu_Q16SAT(DisasContext *ctx) +{ + uint32_t pad, XRc, XRb, XRa; + + pad = extract32(ctx->opcode, 21, 3); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(pad != 0)) { + /* opcode padding incorrect -> do nothing */ + } else if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + tcg_gen_movi_tl(t2, 0); + if (XRb != 0) { + TCGLabel *l_less_hi = gen_new_label(); + TCGLabel *l_less_lo = gen_new_label(); + TCGLabel *l_lo = gen_new_label(); + TCGLabel *l_greater_hi = gen_new_label(); + TCGLabel *l_greater_lo = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); + tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); + tcg_gen_br(l_lo); + gen_set_label(l_less_hi); + tcg_gen_movi_tl(t0, 0); + tcg_gen_br(l_lo); + gen_set_label(l_greater_hi); + tcg_gen_movi_tl(t0, 255); + + gen_set_label(l_lo); + tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16); + tcg_gen_sari_tl(t1, t1, 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); + tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); + tcg_gen_br(l_done); + gen_set_label(l_less_lo); + tcg_gen_movi_tl(t1, 0); + tcg_gen_br(l_done); + gen_set_label(l_greater_lo); + tcg_gen_movi_tl(t1, 255); + + gen_set_label(l_done); + tcg_gen_shli_tl(t2, t0, 24); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(t2, t2, t1); + } + + if (XRc != 0) { + TCGLabel *l_less_hi = gen_new_label(); + TCGLabel *l_less_lo = gen_new_label(); + TCGLabel *l_lo = gen_new_label(); + TCGLabel *l_greater_hi = gen_new_label(); + TCGLabel *l_greater_lo = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); + tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); + tcg_gen_br(l_lo); + gen_set_label(l_less_hi); + tcg_gen_movi_tl(t0, 0); + tcg_gen_br(l_lo); + gen_set_label(l_greater_hi); + tcg_gen_movi_tl(t0, 255); + + gen_set_label(l_lo); + tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16); + tcg_gen_sari_tl(t1, t1, 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); + tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); + tcg_gen_br(l_done); + gen_set_label(l_less_lo); + tcg_gen_movi_tl(t1, 0); + tcg_gen_br(l_done); + gen_set_label(l_greater_lo); + tcg_gen_movi_tl(t1, 255); + + gen_set_label(l_done); + tcg_gen_shli_tl(t0, t0, 8); + tcg_gen_or_tl(t2, t2, t0); + tcg_gen_or_tl(t2, t2, t1); + } + gen_store_mxu_gpr(t2, XRa); + } +} + +/* + * Q16SCOP XRa, XRd, XRb, XRc + * Determine sign of quad packed 16-bit signed values + * in XRb and XRc put result in XRa and XRd respectively. + */ +static void gen_mxu_q16scop(DisasContext *ctx) +{ + uint32_t XRd, XRc, XRb, XRa; + + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + + TCGLabel *l_b_hi_lt = gen_new_label(); + TCGLabel *l_b_hi_gt = gen_new_label(); + TCGLabel *l_b_lo = gen_new_label(); + TCGLabel *l_b_lo_lt = gen_new_label(); + TCGLabel *l_c_hi = gen_new_label(); + TCGLabel *l_c_hi_lt = gen_new_label(); + TCGLabel *l_c_hi_gt = gen_new_label(); + TCGLabel *l_c_lo = gen_new_label(); + TCGLabel *l_c_lo_lt = gen_new_label(); + TCGLabel *l_done = gen_new_label(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + + tcg_gen_sextract_tl(t2, t0, 16, 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt); + tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt); + tcg_gen_movi_tl(t3, 0); + tcg_gen_br(l_b_lo); + gen_set_label(l_b_hi_lt); + tcg_gen_movi_tl(t3, 0xffff0000); + tcg_gen_br(l_b_lo); + gen_set_label(l_b_hi_gt); + tcg_gen_movi_tl(t3, 0x00010000); + + gen_set_label(l_b_lo); + tcg_gen_sextract_tl(t2, t0, 0, 16); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi); + tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt); + tcg_gen_ori_tl(t3, t3, 0x00000001); + tcg_gen_br(l_c_hi); + gen_set_label(l_b_lo_lt); + tcg_gen_ori_tl(t3, t3, 0x0000ffff); + tcg_gen_br(l_c_hi); + + gen_set_label(l_c_hi); + tcg_gen_sextract_tl(t2, t1, 16, 16); + tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt); + tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt); + tcg_gen_movi_tl(t4, 0); + tcg_gen_br(l_c_lo); + gen_set_label(l_c_hi_lt); + tcg_gen_movi_tl(t4, 0xffff0000); + tcg_gen_br(l_c_lo); + gen_set_label(l_c_hi_gt); + tcg_gen_movi_tl(t4, 0x00010000); + + gen_set_label(l_c_lo); + tcg_gen_sextract_tl(t2, t1, 0, 16); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done); + tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt); + tcg_gen_ori_tl(t4, t4, 0x00000001); + tcg_gen_br(l_done); + gen_set_label(l_c_lo_lt); + tcg_gen_ori_tl(t4, t4, 0x0000ffff); + + gen_set_label(l_done); + gen_store_mxu_gpr(t3, XRa); + gen_store_mxu_gpr(t4, XRd); +} + +/* + * S32SFL XRa, XRd, XRb, XRc + * Shuffle bytes according to one of four patterns. + */ +static void gen_mxu_s32sfl(DisasContext *ctx) +{ + uint32_t XRd, XRc, XRb, XRa, ptn2; + + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + ptn2 = extract32(ctx->opcode, 24, 2); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + + switch (ptn2) { + case 0: + tcg_gen_andi_tl(t2, t0, 0xff000000); + tcg_gen_andi_tl(t3, t1, 0x000000ff); + tcg_gen_deposit_tl(t3, t3, t0, 8, 8); + tcg_gen_shri_tl(t0, t0, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t3, t3, t0, 24, 8); + tcg_gen_deposit_tl(t3, t3, t1, 16, 8); + tcg_gen_shri_tl(t0, t0, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t2, t2, t0, 8, 8); + tcg_gen_deposit_tl(t2, t2, t1, 0, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t2, t2, t1, 16, 8); + break; + case 1: + tcg_gen_andi_tl(t2, t0, 0xff000000); + tcg_gen_andi_tl(t3, t1, 0x000000ff); + tcg_gen_deposit_tl(t3, t3, t0, 16, 8); + tcg_gen_shri_tl(t0, t0, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t2, t2, t0, 16, 8); + tcg_gen_deposit_tl(t2, t2, t1, 0, 8); + tcg_gen_shri_tl(t0, t0, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t3, t3, t0, 24, 8); + tcg_gen_deposit_tl(t3, t3, t1, 8, 8); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t2, t2, t1, 8, 8); + break; + case 2: + tcg_gen_andi_tl(t2, t0, 0xff00ff00); + tcg_gen_andi_tl(t3, t1, 0x00ff00ff); + tcg_gen_deposit_tl(t3, t3, t0, 8, 8); + tcg_gen_shri_tl(t0, t0, 16); + tcg_gen_shri_tl(t1, t1, 8); + tcg_gen_deposit_tl(t2, t2, t1, 0, 8); + tcg_gen_deposit_tl(t3, t3, t0, 24, 8); + tcg_gen_shri_tl(t1, t1, 16); + tcg_gen_deposit_tl(t2, t2, t1, 16, 8); + break; + case 3: + tcg_gen_andi_tl(t2, t0, 0xffff0000); + tcg_gen_andi_tl(t3, t1, 0x0000ffff); + tcg_gen_shri_tl(t1, t1, 16); + tcg_gen_deposit_tl(t2, t2, t1, 0, 16); + tcg_gen_deposit_tl(t3, t3, t0, 16, 16); + break; + } + + gen_store_mxu_gpr(t2, XRa); + gen_store_mxu_gpr(t3, XRd); +} + +/* + * Q8SAD XRa, XRd, XRb, XRc + * Typical SAD opration for motion estimation. + */ +static void gen_mxu_q8sad(DisasContext *ctx) +{ + uint32_t XRd, XRc, XRb, XRa; + + XRd = extract32(ctx->opcode, 18, 4); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGv t4 = tcg_temp_new(); + TCGv t5 = tcg_temp_new(); + + gen_load_mxu_gpr(t2, XRb); + gen_load_mxu_gpr(t3, XRc); + gen_load_mxu_gpr(t5, XRd); + tcg_gen_movi_tl(t4, 0); + + for (int i = 0; i < 4; i++) { + tcg_gen_andi_tl(t0, t2, 0xff); + tcg_gen_andi_tl(t1, t3, 0xff); + tcg_gen_sub_tl(t0, t0, t1); + tcg_gen_abs_tl(t0, t0); + tcg_gen_add_tl(t4, t4, t0); + if (i < 3) { + tcg_gen_shri_tl(t2, t2, 8); + tcg_gen_shri_tl(t3, t3, 8); + } + } + tcg_gen_add_tl(t5, t5, t4); + gen_store_mxu_gpr(t4, XRa); + gen_store_mxu_gpr(t5, XRd); +} /* * MXU instruction category: align @@ -1408,6 +4258,129 @@ static void gen_mxu_S32ALNI(DisasContext *ctx) } } +/* + * S32ALN XRc, XRb, XRa, rs + * Arrange bytes from XRb and XRc according to one of five sets of + * rules determined by rs[2:0], and place the result in XRa. + */ +static void gen_mxu_S32ALN(DisasContext *ctx) +{ + uint32_t rs, XRc, XRb, XRa; + + rs = extract32(ctx->opcode, 21, 5); + XRc = extract32(ctx->opcode, 14, 4); + XRb = extract32(ctx->opcode, 10, 4); + XRa = extract32(ctx->opcode, 6, 4); + + if (unlikely(XRa == 0)) { + /* destination is zero register -> do nothing */ + } else if (unlikely((XRb == 0) && (XRc == 0))) { + /* both operands zero registers -> just set destination to all 0s */ + tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); + } else { + /* the most general case */ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv t3 = tcg_temp_new(); + TCGLabel *l_exit = gen_new_label(); + TCGLabel *l_b_only = gen_new_label(); + TCGLabel *l_c_only = gen_new_label(); + + gen_load_mxu_gpr(t0, XRb); + gen_load_mxu_gpr(t1, XRc); + gen_load_gpr(t2, rs); + tcg_gen_andi_tl(t2, t2, 0x07); + + /* do nothing for undefined cases */ + tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit); + + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only); + + tcg_gen_shli_tl(t2, t2, 3); + tcg_gen_subfi_tl(t3, 32, t2); + + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_shr_tl(t1, t1, t3); + tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1); + tcg_gen_br(l_exit); + + gen_set_label(l_b_only); + gen_store_mxu_gpr(t0, XRa); + tcg_gen_br(l_exit); + + gen_set_label(l_c_only); + gen_store_mxu_gpr(t1, XRa); + + gen_set_label(l_exit); + } +} + +/* + * S32MADD XRa, XRd, rb, rc + * 32 to 64 bit signed multiply with subsequent add + * result stored in {XRa, XRd} pair, stain HI/LO. + * S32MADDU XRa, XRd, rb, rc + * 32 to 64 bit unsigned multiply with subsequent add + * result stored in {XRa, XRd} pair, stain HI/LO. + * S32MSUB XRa, XRd, rb, rc + * 32 to 64 bit signed multiply with subsequent subtract + * result stored in {XRa, XRd} pair, stain HI/LO. + * S32MSUBU XRa, XRd, rb, rc + * 32 to 64 bit unsigned multiply with subsequent subtract + * result stored in {XRa, XRd} pair, stain HI/LO. + */ +static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns) +{ + uint32_t XRa, XRd, Rb, Rc; + + XRa = extract32(ctx->opcode, 6, 4); + XRd = extract32(ctx->opcode, 10, 4); + Rb = extract32(ctx->opcode, 16, 5); + Rc = extract32(ctx->opcode, 21, 5); + + if (unlikely(Rb == 0 || Rc == 0)) { + /* do nothing because x + 0 * y => x */ + } else if (unlikely(XRa == 0 && XRd == 0)) { + /* do nothing because result just dropped */ + } else { + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + + gen_load_gpr(t0, Rb); + gen_load_gpr(t1, Rc); + + if (uns) { + tcg_gen_extu_tl_i64(t2, t0); + tcg_gen_extu_tl_i64(t3, t1); + } else { + tcg_gen_ext_tl_i64(t2, t0); + tcg_gen_ext_tl_i64(t3, t1); + } + tcg_gen_mul_i64(t2, t2, t3); + + gen_load_mxu_gpr(t0, XRa); + gen_load_mxu_gpr(t1, XRd); + + tcg_gen_concat_tl_i64(t3, t1, t0); + if (sub) { + tcg_gen_sub_i64(t3, t3, t2); + } else { + tcg_gen_add_i64(t3, t3, t2); + } + gen_move_low32(t1, t3); + gen_move_high32(t0, t3); + + tcg_gen_mov_tl(cpu_HI[0], t0); + tcg_gen_mov_tl(cpu_LO[0], t1); + + gen_store_mxu_gpr(t1, XRd); + gen_store_mxu_gpr(t0, XRa); + } +} /* * Decoding engine for MXU @@ -1431,6 +4404,116 @@ static void decode_opc_mxu__pool00(DisasContext *ctx) case OPC_MXU_Q8MIN: gen_mxu_Q8MAX_Q8MIN(ctx); break; + case OPC_MXU_Q8SLT: + gen_mxu_q8slt(ctx, false); + break; + case OPC_MXU_Q8SLTU: + gen_mxu_q8slt(ctx, true); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 0, 6); + uint32_t pad = extract32(ctx->opcode, 14, 2); + + if (pad != 2) { + /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */ + return false; + } + + switch (opcode) { + case OPC_MXU_S32MADD: + gen_mxu_s32madd_sub(ctx, false, false); + break; + case OPC_MXU_S32MADDU: + gen_mxu_s32madd_sub(ctx, false, true); + break; + case OPC_MXU_S32MSUB: + gen_mxu_s32madd_sub(ctx, true, false); + break; + case OPC_MXU_S32MSUBU: + gen_mxu_s32madd_sub(ctx, true, true); + break; + default: + return false; + } + return true; +} + +static void decode_opc_mxu__pool01(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_S32SLT: + gen_mxu_S32SLT(ctx); + break; + case OPC_MXU_D16SLT: + gen_mxu_D16SLT(ctx); + break; + case OPC_MXU_D16AVG: + gen_mxu_d16avg(ctx, false); + break; + case OPC_MXU_D16AVGR: + gen_mxu_d16avg(ctx, true); + break; + case OPC_MXU_Q8AVG: + gen_mxu_q8avg(ctx, false); + break; + case OPC_MXU_Q8AVGR: + gen_mxu_q8avg(ctx, true); + break; + case OPC_MXU_Q8ADD: + gen_mxu_Q8ADD(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool02(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_S32CPS: + gen_mxu_S32CPS(ctx); + break; + case OPC_MXU_D16CPS: + gen_mxu_D16CPS(ctx); + break; + case OPC_MXU_Q8ABD: + gen_mxu_Q8ABD(ctx); + break; + case OPC_MXU_Q16SAT: + gen_mxu_Q16SAT(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool03(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 24, 2); + + switch (opcode) { + case OPC_MXU_D16MULF: + gen_mxu_d16mul(ctx, true, true); + break; + case OPC_MXU_D16MULE: + gen_mxu_d16mul(ctx, true, false); + break; default: MIPS_INVAL("decode_opc_mxu"); gen_reserved_instruction(ctx); @@ -1440,12 +4523,215 @@ static void decode_opc_mxu__pool00(DisasContext *ctx) static void decode_opc_mxu__pool04(DisasContext *ctx) { - uint32_t opcode = extract32(ctx->opcode, 20, 1); + uint32_t reversed = extract32(ctx->opcode, 20, 1); + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + /* Don't care about opcode bits as their meaning is unknown yet */ + switch (opcode) { + default: + gen_mxu_s32ldxx(ctx, reversed, false); + break; + } +} + +static void decode_opc_mxu__pool05(DisasContext *ctx) +{ + uint32_t reversed = extract32(ctx->opcode, 20, 1); + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + /* Don't care about opcode bits as their meaning is unknown yet */ + switch (opcode) { + default: + gen_mxu_s32stxx(ctx, reversed, false); + break; + } +} + +static void decode_opc_mxu__pool06(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + uint32_t strd2 = extract32(ctx->opcode, 14, 2); switch (opcode) { - case OPC_MXU_S32LDD: - case OPC_MXU_S32LDDR: - gen_mxu_s32ldd_s32lddr(ctx); + case OPC_MXU_S32LDST: + case OPC_MXU_S32LDSTR: + if (strd2 <= 2) { + gen_mxu_s32ldxvx(ctx, opcode, false, strd2); + break; + } + /* fallthrough */ + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool07(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + uint32_t strd2 = extract32(ctx->opcode, 14, 2); + + switch (opcode) { + case OPC_MXU_S32LDST: + case OPC_MXU_S32LDSTR: + if (strd2 <= 2) { + gen_mxu_s32stxvx(ctx, opcode, false, strd2); + break; + } + /* fallthrough */ + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool08(DisasContext *ctx) +{ + uint32_t reversed = extract32(ctx->opcode, 20, 1); + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + /* Don't care about opcode bits as their meaning is unknown yet */ + switch (opcode) { + default: + gen_mxu_s32ldxx(ctx, reversed, true); + break; + } +} + +static void decode_opc_mxu__pool09(DisasContext *ctx) +{ + uint32_t reversed = extract32(ctx->opcode, 20, 1); + uint32_t opcode = extract32(ctx->opcode, 10, 4); + + /* Don't care about opcode bits as their meaning is unknown yet */ + switch (opcode) { + default: + gen_mxu_s32stxx(ctx, reversed, true); + break; + } +} + +static void decode_opc_mxu__pool10(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + uint32_t strd2 = extract32(ctx->opcode, 14, 2); + + switch (opcode) { + case OPC_MXU_S32LDST: + case OPC_MXU_S32LDSTR: + if (strd2 <= 2) { + gen_mxu_s32ldxvx(ctx, opcode, true, strd2); + break; + } + /* fallthrough */ + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool11(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 10, 4); + uint32_t strd2 = extract32(ctx->opcode, 14, 2); + + switch (opcode) { + case OPC_MXU_S32LDST: + case OPC_MXU_S32LDSTR: + if (strd2 <= 2) { + gen_mxu_s32stxvx(ctx, opcode, true, strd2); + break; + } + /* fallthrough */ + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool12(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_D32ACC: + gen_mxu_d32acc(ctx); + break; + case OPC_MXU_D32ACCM: + gen_mxu_d32accm(ctx); + break; + case OPC_MXU_D32ASUM: + gen_mxu_d32asum(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool13(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q16ACC: + gen_mxu_q16acc(ctx); + break; + case OPC_MXU_Q16ACCM: + gen_mxu_q16accm(ctx); + break; + case OPC_MXU_D16ASUM: + gen_mxu_d16asum(ctx); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool14(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8ADDE: + gen_mxu_q8adde(ctx, false); + break; + case OPC_MXU_D8SUM: + gen_mxu_d8sum(ctx, false); + break; + case OPC_MXU_D8SUMC: + gen_mxu_d8sum(ctx, true); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool15(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 14, 2); + + switch (opcode) { + case OPC_MXU_S32MUL: + gen_mxu_s32mul(ctx, false); + break; + case OPC_MXU_S32MULU: + gen_mxu_s32mul(ctx, true); + break; + case OPC_MXU_S32EXTR: + gen_mxu_s32extr(ctx); + break; + case OPC_MXU_S32EXTRV: + gen_mxu_s32extrv(ctx); break; default: MIPS_INVAL("decode_opc_mxu"); @@ -1459,9 +4745,18 @@ static void decode_opc_mxu__pool16(DisasContext *ctx) uint32_t opcode = extract32(ctx->opcode, 18, 3); switch (opcode) { + case OPC_MXU_D32SARW: + gen_mxu_d32sarl(ctx, true); + break; + case OPC_MXU_S32ALN: + gen_mxu_S32ALN(ctx); + break; case OPC_MXU_S32ALNI: gen_mxu_S32ALNI(ctx); break; + case OPC_MXU_S32LUI: + gen_mxu_s32lui(ctx); + break; case OPC_MXU_S32NOR: gen_mxu_S32NOR(ctx); break; @@ -1481,14 +4776,128 @@ static void decode_opc_mxu__pool16(DisasContext *ctx) } } +static void decode_opc_mxu__pool17(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 6, 3); + uint32_t strd2 = extract32(ctx->opcode, 9, 2); + + if (strd2 > 2) { + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + return; + } + + switch (opcode) { + case OPC_MXU_LXW: + gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL); + break; + case OPC_MXU_LXB: + gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB); + break; + case OPC_MXU_LXH: + gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW); + break; + case OPC_MXU_LXBU: + gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB); + break; + case OPC_MXU_LXHU: + gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool18(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_D32SLLV: + gen_mxu_d32sxxv(ctx, false, false); + break; + case OPC_MXU_D32SLRV: + gen_mxu_d32sxxv(ctx, true, false); + break; + case OPC_MXU_D32SARV: + gen_mxu_d32sxxv(ctx, true, true); + break; + case OPC_MXU_Q16SLLV: + gen_mxu_q16sxxv(ctx, false, false); + break; + case OPC_MXU_Q16SLRV: + gen_mxu_q16sxxv(ctx, true, false); + break; + case OPC_MXU_Q16SARV: + gen_mxu_q16sxxv(ctx, true, true); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + static void decode_opc_mxu__pool19(DisasContext *ctx) { - uint32_t opcode = extract32(ctx->opcode, 22, 2); + uint32_t opcode = extract32(ctx->opcode, 22, 4); switch (opcode) { case OPC_MXU_Q8MUL: + gen_mxu_q8mul_mac(ctx, false, false); + break; case OPC_MXU_Q8MULSU: - gen_mxu_q8mul_q8mulsu(ctx); + gen_mxu_q8mul_mac(ctx, true, false); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool20(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 18, 3); + + switch (opcode) { + case OPC_MXU_Q8MOVZ: + gen_mxu_q8movzn(ctx, TCG_COND_NE); + break; + case OPC_MXU_Q8MOVN: + gen_mxu_q8movzn(ctx, TCG_COND_EQ); + break; + case OPC_MXU_D16MOVZ: + gen_mxu_d16movzn(ctx, TCG_COND_NE); + break; + case OPC_MXU_D16MOVN: + gen_mxu_d16movzn(ctx, TCG_COND_EQ); + break; + case OPC_MXU_S32MOVZ: + gen_mxu_s32movzn(ctx, TCG_COND_NE); + break; + case OPC_MXU_S32MOVN: + gen_mxu_s32movzn(ctx, TCG_COND_EQ); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + +static void decode_opc_mxu__pool21(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8MAC: + gen_mxu_q8mul_mac(ctx, false, true); + break; + case OPC_MXU_Q8MACSU: + gen_mxu_q8mul_mac(ctx, true, true); break; default: MIPS_INVAL("decode_opc_mxu"); @@ -1497,6 +4906,7 @@ static void decode_opc_mxu__pool19(DisasContext *ctx) } } + bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) { uint32_t opcode = extract32(insn, 0, 6); @@ -1520,30 +4930,163 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit); switch (opcode) { + case OPC_MXU_S32MADD: + case OPC_MXU_S32MADDU: + case OPC_MXU_S32MSUB: + case OPC_MXU_S32MSUBU: + return decode_opc_mxu_s32madd_sub(ctx); case OPC_MXU__POOL00: decode_opc_mxu__pool00(ctx); break; case OPC_MXU_D16MUL: - gen_mxu_d16mul(ctx); + gen_mxu_d16mul(ctx, false, false); break; case OPC_MXU_D16MAC: - gen_mxu_d16mac(ctx); + gen_mxu_d16mac(ctx, false, false); + break; + case OPC_MXU_D16MACF: + gen_mxu_d16mac(ctx, true, true); + break; + case OPC_MXU_D16MADL: + gen_mxu_d16madl(ctx); + break; + case OPC_MXU_S16MAD: + gen_mxu_s16mad(ctx); + break; + case OPC_MXU_Q16ADD: + gen_mxu_q16add(ctx); + break; + case OPC_MXU_D16MACE: + gen_mxu_d16mac(ctx, true, false); + break; + case OPC_MXU__POOL01: + decode_opc_mxu__pool01(ctx); + break; + case OPC_MXU__POOL02: + decode_opc_mxu__pool02(ctx); + break; + case OPC_MXU__POOL03: + decode_opc_mxu__pool03(ctx); break; case OPC_MXU__POOL04: decode_opc_mxu__pool04(ctx); break; + case OPC_MXU__POOL05: + decode_opc_mxu__pool05(ctx); + break; + case OPC_MXU__POOL06: + decode_opc_mxu__pool06(ctx); + break; + case OPC_MXU__POOL07: + decode_opc_mxu__pool07(ctx); + break; + case OPC_MXU__POOL08: + decode_opc_mxu__pool08(ctx); + break; + case OPC_MXU__POOL09: + decode_opc_mxu__pool09(ctx); + break; + case OPC_MXU__POOL10: + decode_opc_mxu__pool10(ctx); + break; + case OPC_MXU__POOL11: + decode_opc_mxu__pool11(ctx); + break; + case OPC_MXU_D32ADD: + gen_mxu_d32add(ctx); + break; + case OPC_MXU__POOL12: + decode_opc_mxu__pool12(ctx); + break; + case OPC_MXU__POOL13: + decode_opc_mxu__pool13(ctx); + break; + case OPC_MXU__POOL14: + decode_opc_mxu__pool14(ctx); + break; + case OPC_MXU_Q8ACCE: + gen_mxu_q8adde(ctx, true); + break; case OPC_MXU_S8LDD: - gen_mxu_s8ldd(ctx); + gen_mxu_s8ldd(ctx, false); + break; + case OPC_MXU_S8STD: + gen_mxu_s8std(ctx, false); + break; + case OPC_MXU_S8LDI: + gen_mxu_s8ldd(ctx, true); + break; + case OPC_MXU_S8SDI: + gen_mxu_s8std(ctx, true); + break; + case OPC_MXU__POOL15: + decode_opc_mxu__pool15(ctx); break; case OPC_MXU__POOL16: decode_opc_mxu__pool16(ctx); break; + case OPC_MXU__POOL17: + decode_opc_mxu__pool17(ctx); + break; + case OPC_MXU_S16LDD: + gen_mxu_s16ldd(ctx, false); + break; + case OPC_MXU_S16STD: + gen_mxu_s16std(ctx, false); + break; + case OPC_MXU_S16LDI: + gen_mxu_s16ldd(ctx, true); + break; + case OPC_MXU_S16SDI: + gen_mxu_s16std(ctx, true); + break; + case OPC_MXU_D32SLL: + gen_mxu_d32sxx(ctx, false, false); + break; + case OPC_MXU_D32SLR: + gen_mxu_d32sxx(ctx, true, false); + break; + case OPC_MXU_D32SARL: + gen_mxu_d32sarl(ctx, false); + break; + case OPC_MXU_D32SAR: + gen_mxu_d32sxx(ctx, true, true); + break; + case OPC_MXU_Q16SLL: + gen_mxu_q16sxx(ctx, false, false); + break; + case OPC_MXU__POOL18: + decode_opc_mxu__pool18(ctx); + break; + case OPC_MXU_Q16SLR: + gen_mxu_q16sxx(ctx, true, false); + break; + case OPC_MXU_Q16SAR: + gen_mxu_q16sxx(ctx, true, true); + break; case OPC_MXU__POOL19: decode_opc_mxu__pool19(ctx); break; + case OPC_MXU__POOL20: + decode_opc_mxu__pool20(ctx); + break; + case OPC_MXU__POOL21: + decode_opc_mxu__pool21(ctx); + break; + case OPC_MXU_Q16SCOP: + gen_mxu_q16scop(ctx); + break; + case OPC_MXU_Q8MADL: + gen_mxu_q8madl(ctx); + break; + case OPC_MXU_S32SFL: + gen_mxu_s32sfl(ctx); + break; + case OPC_MXU_Q8SAD: + gen_mxu_q8sad(ctx); + break; default: - MIPS_INVAL("decode_opc_mxu"); - gen_reserved_instruction(ctx); + return false; } gen_set_label(l_exit); diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c index ef3dafc..98935b5 100644 --- a/target/mips/tcg/op_helper.c +++ b/target/mips/tcg/op_helper.c @@ -257,6 +257,22 @@ void helper_pmon(CPUMIPSState *env, int function) } } +#ifdef TARGET_MIPS64 +target_ulong helper_lcsr_cpucfg(CPUMIPSState *env, target_ulong rs) +{ + switch (rs) { + case 0: + return env->CP0_PRid; + case 1: + return env->lcsr_cpucfg1; + case 2: + return env->lcsr_cpucfg2; + default: + return 0; + } +} +#endif + #if !defined(CONFIG_USER_ONLY) void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr, diff --git a/target/mips/tcg/sysemu/lcsr_helper.c b/target/mips/tcg/sysemu/lcsr_helper.c new file mode 100644 index 0000000..942143d --- /dev/null +++ b/target/mips/tcg/sysemu/lcsr_helper.c @@ -0,0 +1,45 @@ +/* + * Loongson CSR instructions translation routines + * + * Copyright (c) 2023 Jiaxun Yang <jiaxun.yang@flygoat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "internal.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" + +#define GET_MEMTXATTRS(cas) \ + ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index}) + +uint64_t helper_lcsr_rdcsr(CPUMIPSState *env, target_ulong r_addr) +{ + return address_space_ldl(&env->iocsr.as, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +uint64_t helper_lcsr_drdcsr(CPUMIPSState *env, target_ulong r_addr) +{ + return address_space_ldq(&env->iocsr.as, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +void helper_lcsr_wrcsr(CPUMIPSState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stl(&env->iocsr.as, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} + +void helper_lcsr_dwrcsr(CPUMIPSState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stq(&env->iocsr.as, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} diff --git a/target/mips/tcg/sysemu/meson.build b/target/mips/tcg/sysemu/meson.build index 43b35b3..ec665a4 100644 --- a/target/mips/tcg/sysemu/meson.build +++ b/target/mips/tcg/sysemu/meson.build @@ -4,3 +4,7 @@ mips_system_ss.add(files( 'special_helper.c', 'tlb_helper.c', )) + +mips_system_ss.add(when: 'TARGET_MIPS64', if_true: files( + 'lcsr_helper.c', +)) diff --git a/target/mips/tcg/sysemu_helper.h.inc b/target/mips/tcg/sysemu_helper.h.inc index af585b5..f163af1 100644 --- a/target/mips/tcg/sysemu_helper.h.inc +++ b/target/mips/tcg/sysemu_helper.h.inc @@ -181,3 +181,11 @@ DEF_HELPER_1(eret, void, env) DEF_HELPER_1(eretnc, void, env) DEF_HELPER_1(deret, void, env) DEF_HELPER_3(cache, void, env, tl, i32) + +#ifdef TARGET_MIPS64 +/* Longson CSR */ +DEF_HELPER_2(lcsr_rdcsr, i64, env, tl) +DEF_HELPER_2(lcsr_drdcsr, i64, env, tl) +DEF_HELPER_3(lcsr_wrcsr, void, env, tl, tl) +DEF_HELPER_3(lcsr_dwrcsr, void, env, tl, tl) +#endif diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 74af91e..9bb40f1 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -14644,12 +14644,9 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx) } #endif if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) { - if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) { - gen_arith(ctx, OPC_MUL, rd, rs, rt); - } else { - decode_ase_mxu(ctx, ctx->opcode); + if (decode_ase_mxu(ctx, ctx->opcode)) { + break; } - break; } decode_opc_special2_legacy(env, ctx); break; @@ -15352,6 +15349,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) return; } #if defined(TARGET_MIPS64) + if (ase_lcsr_available(env) && decode_ase_lcsr(ctx, ctx->opcode)) { + return; + } if (cpu_supports_isa(env, INSN_OCTEON) && decode_ext_octeon(ctx, ctx->opcode)) { return; } diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h index 3b0498a..db3dc93 100644 --- a/target/mips/tcg/translate.h +++ b/target/mips/tcg/translate.h @@ -221,6 +221,7 @@ bool decode_isa_rel6(DisasContext *ctx, uint32_t insn); bool decode_ase_msa(DisasContext *ctx, uint32_t insn); bool decode_ext_txx9(DisasContext *ctx, uint32_t insn); #if defined(TARGET_MIPS64) +bool decode_ase_lcsr(DisasContext *ctx, uint32_t insn); bool decode_ext_tx79(DisasContext *ctx, uint32_t insn); bool decode_ext_octeon(DisasContext *ctx, uint32_t insn); #endif |