diff options
50 files changed, 1922 insertions, 630 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 49f9bce818..e41f0eb92c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1729,6 +1729,9 @@ F: hw/riscv/opentitan.c F: hw/*/ibex_*.c F: include/hw/riscv/opentitan.h F: include/hw/*/ibex_*.h +F: target/riscv/insn_trans/trans_xthead.c.inc +F: target/riscv/xlrbr.decode +F: tests/tcg/riscv64/test-crc32.S Microchip PolarFire SoC Icicle Kit L: qemu-riscv@nongnu.org @@ -4132,7 +4135,7 @@ R: Daniel Henrique Barboza <daniel.barboza@oss.qualcomm.com> L: qemu-riscv@nongnu.org S: Maintained F: tcg/riscv64/ -F: disas/riscv.[ch] +F: disas/riscv*.[ch] S390 TCG target M: Richard Henderson <richard.henderson@linaro.org> diff --git a/configs/targets/riscv32-linux-user.mak b/configs/targets/riscv32-linux-user.mak index f069ab9a0f..d88fdf5e1b 100644 --- a/configs/targets/riscv32-linux-user.mak +++ b/configs/targets/riscv32-linux-user.mak @@ -8,3 +8,4 @@ TARGET_SYSTBL_ABI=32 TARGET_SYSTBL_ABI=common,32,riscv,memfd_secret TARGET_SYSTBL=syscall.tbl TARGET_LONG_BITS=32 +TARGET_NOT_USING_LEGACY_NATIVE_ENDIAN_API=y diff --git a/configs/targets/riscv32-softmmu.mak b/configs/targets/riscv32-softmmu.mak index 26080599be..5d5016d008 100644 --- a/configs/targets/riscv32-softmmu.mak +++ b/configs/targets/riscv32-softmmu.mak @@ -5,3 +5,4 @@ TARGET_XML_FILES= riscv-32bit-cpu.xml riscv-32bit-fpu.xml riscv-64bit-fpu.xml ri TARGET_NEED_FDT=y TARGET_LONG_BITS=32 TARGET_NOT_USING_LEGACY_LDST_PHYS_API=y +TARGET_NOT_USING_LEGACY_NATIVE_ENDIAN_API=y diff --git a/configs/targets/riscv64-bsd-user.mak b/configs/targets/riscv64-bsd-user.mak index bc85d9ed04..5b4e138099 100644 --- a/configs/targets/riscv64-bsd-user.mak +++ b/configs/targets/riscv64-bsd-user.mak @@ -3,3 +3,4 @@ TARGET_BASE_ARCH=riscv TARGET_ABI_DIR=riscv TARGET_XML_FILES= riscv-64bit-cpu.xml riscv-32bit-fpu.xml riscv-64bit-fpu.xml riscv-64bit-virtual.xml TARGET_LONG_BITS=64 +TARGET_NOT_USING_LEGACY_NATIVE_ENDIAN_API=y diff --git a/configs/targets/riscv64-linux-user.mak b/configs/targets/riscv64-linux-user.mak index bca0864512..35621520c5 100644 --- a/configs/targets/riscv64-linux-user.mak +++ b/configs/targets/riscv64-linux-user.mak @@ -8,3 +8,4 @@ TARGET_SYSTBL_ABI=64 TARGET_SYSTBL_ABI=common,64,riscv,rlimit,memfd_secret TARGET_SYSTBL=syscall.tbl TARGET_LONG_BITS=64 +TARGET_NOT_USING_LEGACY_NATIVE_ENDIAN_API=y diff --git a/configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak index 5059c55048..a10dc03c04 100644 --- a/configs/targets/riscv64-softmmu.mak +++ b/configs/targets/riscv64-softmmu.mak @@ -6,3 +6,4 @@ TARGET_XML_FILES= riscv-64bit-cpu.xml riscv-32bit-fpu.xml riscv-64bit-fpu.xml ri TARGET_NEED_FDT=y TARGET_LONG_BITS=64 TARGET_NOT_USING_LEGACY_LDST_PHYS_API=y +TARGET_NOT_USING_LEGACY_NATIVE_ENDIAN_API=y diff --git a/disas/meson.build b/disas/meson.build index bbfa119783..42977a1f74 100644 --- a/disas/meson.build +++ b/disas/meson.build @@ -7,7 +7,8 @@ common_ss.add(when: 'CONFIG_MIPS_DIS', if_true: files('mips.c', 'nanomips.c')) common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files( 'riscv.c', 'riscv-xthead.c', - 'riscv-xventana.c' + 'riscv-xventana.c', + 'riscv-xlrbr.c' )) common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c')) common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c')) diff --git a/disas/riscv-xlrbr.c b/disas/riscv-xlrbr.c new file mode 100644 index 0000000000..57cb434523 --- /dev/null +++ b/disas/riscv-xlrbr.c @@ -0,0 +1,79 @@ +/* + * QEMU RISC-V Disassembler for xlrbr matching the unratified Zbr CRC32 + * bitmanip extension v0.93. + * + * Copyright (c) 2023 Rivos Inc + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "disas/riscv.h" +#include "disas/riscv-xlrbr.h" + +typedef enum { + /* 0 is reserved for rv_op_illegal. */ + rv_op_crc32_b = 1, + rv_op_crc32_h = 2, + rv_op_crc32_w = 3, + rv_op_crc32_d = 4, + rv_op_crc32c_b = 5, + rv_op_crc32c_h = 6, + rv_op_crc32c_w = 7, + rv_op_crc32c_d = 8, +} rv_xlrbr_op; + +const rv_opcode_data rv_xlrbr_opcode_data[] = { + { "illegal", rv_codec_illegal, rv_fmt_none, NULL, 0, 0, 0 }, + { "crc32.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32.w", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32.d", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32c.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32c.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32c.w", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "crc32c.d", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, +}; + +void decode_xlrbr(rv_decode *dec, rv_isa isa) +{ + rv_inst inst = dec->inst; + rv_opcode op = rv_op_illegal; + + switch ((inst >> 0) & 0b1111111) { + case 0b0010011: + switch ((inst >> 12) & 0b111) { + case 0b001: + switch ((inst >> 20 & 0b111111111111)) { + case 0b011000010000: + op = rv_op_crc32_b; + break; + case 0b011000010001: + op = rv_op_crc32_h; + break; + case 0b011000010010: + op = rv_op_crc32_w; + break; + case 0b011000010011: + op = rv_op_crc32_d; + break; + case 0b011000011000: + op = rv_op_crc32c_b; + break; + case 0b011000011001: + op = rv_op_crc32c_h; + break; + case 0b011000011010: + op = rv_op_crc32c_w; + break; + case 0b011000011011: + op = rv_op_crc32c_d; + break; + } + break; + } + break; + } + dec->op = op; +} diff --git a/disas/riscv-xlrbr.h b/disas/riscv-xlrbr.h new file mode 100644 index 0000000000..939a69ea6d --- /dev/null +++ b/disas/riscv-xlrbr.h @@ -0,0 +1,19 @@ +/* + * QEMU RISC-V Disassembler for xlrbr matching the unratified Zbr CRC32 + * bitmanip extension v0.93. + * + * Copyright (c) 2023 Rivos Inc + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef DISAS_RISCV_XLRBR_H +#define DISAS_RISCV_XLRBR_H + +#include "disas/riscv.h" + +extern const rv_opcode_data rv_xlrbr_opcode_data[]; + +void decode_xlrbr(rv_decode *, rv_isa); + +#endif /* DISAS_RISCV_XLRBR_H */ diff --git a/disas/riscv.c b/disas/riscv.c index 6f2667482d..d416a4d6b3 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -26,6 +26,7 @@ /* Vendor extensions */ #include "disas/riscv-xthead.h" #include "disas/riscv-xventana.h" +#include "disas/riscv-xlrbr.h" typedef enum { /* 0 is reserved for rv_op_illegal. */ @@ -5434,6 +5435,7 @@ static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst, { has_xtheadmempair_p, xthead_opcode_data, decode_xtheadmempair }, { has_xtheadsync_p, xthead_opcode_data, decode_xtheadsync }, { has_XVentanaCondOps_p, ventana_opcode_data, decode_xventanacondops }, + { has_xlrbr_p, rv_xlrbr_opcode_data, decode_xlrbr }, }; for (size_t i = 0; i < ARRAY_SIZE(decoders); i++) { diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index e9efab16e9..a53d2ace02 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -171,6 +171,12 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) * begin/end_signature symbols exist. */ if (sig_file && begin_sig_addr && end_sig_addr) { + if (end_sig_addr <= begin_sig_addr) { + error_report("Invalid HTIF signature range:" + " begin=0x%" PRIx64 " end=0x%" PRIx64, + begin_sig_addr, end_sig_addr); + return; + } uint64_t sig_len = end_sig_addr - begin_sig_addr; char *sig_data = g_malloc(sig_len); dma_memory_read(&address_space_memory, begin_sig_addr, diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index 9c1491bd04..e27e5fb394 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -131,6 +131,7 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); + size_t hartid_offset = hartid - mtimer->hartid_base; CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu_env(cpu) : NULL; if (!env) { @@ -138,11 +139,11 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, "aclint-mtimer: invalid hartid: %zu", hartid); } else if ((addr & 0x7) == 0) { /* timecmp_lo for RV32/RV64 or timecmp for RV64 */ - uint64_t timecmp = mtimer->timecmp[hartid]; + uint64_t timecmp = mtimer->timecmp[hartid_offset]; return (size == 4) ? (timecmp & 0xFFFFFFFF) : timecmp; } else if ((addr & 0x7) == 4) { /* timecmp_hi */ - uint64_t timecmp = mtimer->timecmp[hartid]; + uint64_t timecmp = mtimer->timecmp[hartid_offset]; return (timecmp >> 32) & 0xFFFFFFFF; } else { qemu_log_mask(LOG_UNIMP, @@ -174,6 +175,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); + size_t hartid_offset = hartid - mtimer->hartid_base; CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu_env(cpu) : NULL; if (!env) { @@ -182,7 +184,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, } else if ((addr & 0x7) == 0) { if (size == 4) { /* timecmp_lo for RV32/RV64 */ - uint64_t timecmp_hi = mtimer->timecmp[hartid] >> 32; + uint64_t timecmp_hi = mtimer->timecmp[hartid_offset] >> 32; riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid, timecmp_hi << 32 | (value & 0xFFFFFFFF)); } else { @@ -193,7 +195,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, } else if ((addr & 0x7) == 4) { if (size == 4) { /* timecmp_hi for RV32/RV64 */ - uint64_t timecmp_lo = mtimer->timecmp[hartid]; + uint64_t timecmp_lo = mtimer->timecmp[hartid_offset]; riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid, value << 32 | (timecmp_lo & 0xFFFFFFFF)); } else { diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index e5490beda0..9086793b7a 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -163,13 +163,27 @@ hwaddr riscv_load_firmware(const char *firmware_filename, g_assert(firmware_filename != NULL); - if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, - &firmware_entry, NULL, &firmware_end, NULL, - 0, EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { + firmware_size = load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, + &firmware_entry, NULL, &firmware_end, + NULL, 0, EM_RISCV, 1, 0, NULL, false, + sym_cb); + if (firmware_size > 0) { *firmware_load_addr = firmware_entry; return firmware_end; } + if (firmware_size != ELF_LOAD_NOT_ELF) { + /* + * If the user specified an ELF format firmware that could not be + * loaded as an ELF, it's possible that loading it as a binary is + * not what was intended. + */ + warn_report("could not load ELF format firmware '%s' (%s). " + "Attempting to load as binary.", + firmware_filename, + load_elf_strerror(firmware_size)); + } + firmware_size = load_image_targphys_as(firmware_filename, *firmware_load_addr, current_machine->ram_size, NULL, @@ -179,7 +193,8 @@ hwaddr riscv_load_firmware(const char *firmware_filename, return *firmware_load_addr + firmware_size; } - error_report("could not load firmware '%s'", firmware_filename); + error_report("could not load firmware '%s': %s", firmware_filename, + load_elf_strerror(firmware_size)); exit(1); } diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index 47fe01bee5..a938fd3eb4 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -189,6 +189,7 @@ enum riscv_iommu_ddtp_modes { #define RISCV_IOMMU_REG_IPSR 0x0054 #define RISCV_IOMMU_IPSR_CIP BIT(0) #define RISCV_IOMMU_IPSR_FIP BIT(1) +#define RISCV_IOMMU_IPSR_PMIP BIT(2) #define RISCV_IOMMU_IPSR_PIP BIT(3) enum { diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c index c5034bff79..e8d284ac8b 100644 --- a/hw/riscv/riscv-iommu-hpm.c +++ b/hw/riscv/riscv-iommu-hpm.c @@ -228,6 +228,7 @@ static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value) } overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns; + s->irq_overflow_left = 0; if (overflow_at > INT64_MAX) { s->irq_overflow_left = overflow_at - INT64_MAX; diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index c3c9ed6469..7ba3240552 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -1572,11 +1572,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); iot_cache = g_hash_table_ref(s->iot_cache); - /* - * TC[32] is reserved for custom extensions, used here to temporarily - * enable automatic page-request generation for ATS queries. - */ - enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); + enable_pri = (iotlb->perm == IOMMU_NONE) && + (ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI); enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); /* Check for ATS request. */ @@ -2153,6 +2150,10 @@ static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) ipsr_clr |= RISCV_IOMMU_IPSR_FIP; } + if (!(data & RISCV_IOMMU_IPSR_PMIP)) { + ipsr_clr |= RISCV_IOMMU_IPSR_PMIP; + } + if (data & RISCV_IOMMU_IPSR_PIP) { pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index f1406cb683..fd6ca5dbc4 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -35,9 +35,11 @@ #include "hw/riscv/virt.h" #include "hw/riscv/numa.h" #include "hw/virtio/virtio-acpi.h" +#include "kvm/kvm_riscv.h" #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" +#include "system/kvm.h" #include "system/reset.h" #define ACPI_BUILD_TABLE_SIZE 0x20000 @@ -296,7 +298,10 @@ static void build_rhct(GArray *table_data, /* Time Base Frequency */ build_append_int_noprefix(table_data, - RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, 8); + kvm_enabled() ? + kvm_riscv_get_timebase_frequency(&s->soc->harts[0]) : + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, + 8); /* ISA + N hart info */ num_rhct_nodes = 1 + ms->smp.cpus; diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h index 4f83d5bec9..40cc699031 100644 --- a/include/exec/translation-block.h +++ b/include/exec/translation-block.h @@ -65,6 +65,7 @@ struct TranslationBlock { * arm: an extension of tb->flags, * s390x: instruction data for EXECUTE, * sparc: the next pc of the instruction queue (for delay slots). + * riscv: an extension of tb->flags, */ uint64_t cs_base; diff --git a/include/qemu/crc32.h b/include/qemu/crc32.h new file mode 100644 index 0000000000..9824a8cb5d --- /dev/null +++ b/include/qemu/crc32.h @@ -0,0 +1,14 @@ +/* + * CRC32 Checksum + * + * Copyright (c) 2026 QEMU contributors + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_CRC32_H +#define QEMU_CRC32_H + +extern const uint32_t crc32_table[256]; + +#endif diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h index 88b4d2b3b3..3d5ba189ef 100644 --- a/include/qemu/crc32c.h +++ b/include/qemu/crc32c.h @@ -28,6 +28,7 @@ #ifndef QEMU_CRC32C_H #define QEMU_CRC32C_H +extern const uint32_t crc32c_table[256]; uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length); uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt); diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/bitmanip_helper.c index e9c8d7f778..1156a87dd3 100644 --- a/target/riscv/bitmanip_helper.c +++ b/target/riscv/bitmanip_helper.c @@ -23,6 +23,8 @@ #include "exec/target_long.h" #include "exec/helper-proto.h" #include "tcg/tcg.h" +#include "qemu/crc32.h" +#include "qemu/crc32c.h" target_ulong HELPER(clmul)(target_ulong rs1, target_ulong rs2) { @@ -129,3 +131,21 @@ target_ulong HELPER(xperm8)(target_ulong rs1, target_ulong rs2) { return do_xperm(rs1, rs2, 3); } + +target_ulong HELPER(crc32)(target_ulong rs1, target_ulong sz) +{ + for (target_ulong i = 0; i < sz; i++) { + rs1 = crc32_table[rs1 & 0xFF] ^ (rs1 >> 8); + } + + return rs1; +} + +target_ulong HELPER(crc32c)(target_ulong rs1, target_ulong sz) +{ + for (target_ulong i = 0; i < sz; i++) { + rs1 = crc32c_table[rs1 & 0xFF] ^ (rs1 >> 8); + } + + return rs1; +} diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 8ac935ac06..ce15a17c37 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -189,6 +189,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), ISA_EXT_DATA_ENTRY(zve64x, PRIV_VERSION_1_10_0, ext_zve64x), + ISA_EXT_DATA_ENTRY(zvfbfa, PRIV_VERSION_1_13_0, ext_zvfbfa), ISA_EXT_DATA_ENTRY(zvfbfmin, PRIV_VERSION_1_12_0, ext_zvfbfmin), ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), @@ -327,60 +328,61 @@ const char * const riscv_rvv_regnames[] = { }; static const char * const riscv_excp_names[] = { - "misaligned_fetch", - "fault_fetch", - "illegal_instruction", - "breakpoint", - "misaligned_load", - "fault_load", - "misaligned_store", - "fault_store", - "user_ecall", - "supervisor_ecall", - "hypervisor_ecall", - "machine_ecall", - "exec_page_fault", - "load_page_fault", - "reserved", - "store_page_fault", - "double_trap", - "reserved", - "reserved", - "reserved", - "guest_exec_page_fault", - "guest_load_page_fault", - "reserved", - "guest_store_page_fault", + [RISCV_EXCP_INST_ADDR_MIS] = "misaligned_fetch", + [RISCV_EXCP_INST_ACCESS_FAULT] = "fault_fetch", + [RISCV_EXCP_ILLEGAL_INST] = "illegal_instruction", + [RISCV_EXCP_BREAKPOINT] = "breakpoint", + [RISCV_EXCP_LOAD_ADDR_MIS] = "misaligned_load", + [RISCV_EXCP_LOAD_ACCESS_FAULT] = "fault_load", + [RISCV_EXCP_STORE_AMO_ADDR_MIS] = "misaligned_store", + [RISCV_EXCP_STORE_AMO_ACCESS_FAULT] = "fault_store", + [RISCV_EXCP_U_ECALL] = "user_ecall", + [RISCV_EXCP_S_ECALL] = "supervisor_ecall", + [RISCV_EXCP_VS_ECALL] = "hypervisor_ecall", + [RISCV_EXCP_M_ECALL] = "machine_ecall", + [RISCV_EXCP_INST_PAGE_FAULT] = "exec_page_fault", + [RISCV_EXCP_LOAD_PAGE_FAULT] = "load_page_fault", + [RISCV_EXCP_STORE_PAGE_FAULT] = "store_page_fault", + [RISCV_EXCP_DOUBLE_TRAP] = "double_trap", + [RISCV_EXCP_SW_CHECK] = "sw_check", + [RISCV_EXCP_HW_ERR] = "hw_error", + [RISCV_EXCP_INST_GUEST_PAGE_FAULT] = "guest_exec_page_fault", + [RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT] = "guest_load_page_fault", + [RISCV_EXCP_VIRT_INSTRUCTION_FAULT] = "virt_illegal_instruction", + [RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT] = "guest_store_page_fault", + [RISCV_EXCP_SEMIHOST] = "semihost", }; static const char * const riscv_intr_names[] = { - "u_software", - "s_software", - "vs_software", - "m_software", - "u_timer", - "s_timer", - "vs_timer", - "m_timer", - "u_external", - "s_external", - "vs_external", - "m_external", - "reserved", - "reserved", - "reserved", - "reserved" + [IRQ_U_SOFT] = "u_software", + [IRQ_S_SOFT] = "s_software", + [IRQ_VS_SOFT] = "vs_software", + [IRQ_M_SOFT] = "m_software", + [IRQ_U_TIMER] = "u_timer", + [IRQ_S_TIMER] = "s_timer", + [IRQ_VS_TIMER] = "vs_timer", + [IRQ_M_TIMER] = "m_timer", + [IRQ_U_EXT] = "u_external", + [IRQ_S_EXT] = "s_external", + [IRQ_VS_EXT] = "vs_external", + [IRQ_M_EXT] = "m_external", + [IRQ_S_GEXT] = "s_guest_external", + [IRQ_PMU_OVF] = "counter_overflow", }; const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) { if (async) { - return (cause < ARRAY_SIZE(riscv_intr_names)) ? - riscv_intr_names[cause] : "(unknown)"; + if ((cause < ARRAY_SIZE(riscv_intr_names)) && riscv_intr_names[cause]) { + return riscv_intr_names[cause]; + } } else { - return (cause < ARRAY_SIZE(riscv_excp_names)) ? - riscv_excp_names[cause] : "(unknown)"; + if ((cause < ARRAY_SIZE(riscv_excp_names)) && riscv_excp_names[cause]) { + return riscv_excp_names[cause]; + } } + + return "(unknown)"; } void riscv_cpu_set_misa_ext(CPURISCVState *env, uint32_t ext) @@ -1265,6 +1267,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false), MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false), MULTI_EXT_CFG_BOOL("zve64x", ext_zve64x, false), + MULTI_EXT_CFG_BOOL("zvfbfa", ext_zvfbfa, false), MULTI_EXT_CFG_BOOL("zvfbfmin", ext_zvfbfmin, false), MULTI_EXT_CFG_BOOL("zvfbfwma", ext_zvfbfwma, false), MULTI_EXT_CFG_BOOL("zvfh", ext_zvfh, false), @@ -1373,6 +1376,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = { MULTI_EXT_CFG_BOOL("xmipscbop", ext_xmipscbop, false), MULTI_EXT_CFG_BOOL("xmipscmov", ext_xmipscmov, false), MULTI_EXT_CFG_BOOL("xmipslsp", ext_xmipslsp, false), + MULTI_EXT_CFG_BOOL("xlrbr", ext_xlrbr, false), { }, }; @@ -2620,6 +2624,15 @@ static RISCVCPUImpliedExtsRule SSCTR_IMPLIED = { }, }; +static RISCVCPUImpliedExtsRule ZVFBFA_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfbfa), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), CPU_CFG_OFFSET(ext_zfbfmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[] = { &RVA_IMPLIED, &RVD_IMPLIED, &RVF_IMPLIED, &RVM_IMPLIED, &RVV_IMPLIED, NULL @@ -2633,8 +2646,8 @@ RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = { &ZHINX_IMPLIED, &ZHINXMIN_IMPLIED, &ZICNTR_IMPLIED, &ZIHPM_IMPLIED, &ZK_IMPLIED, &ZKN_IMPLIED, &ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED, - &ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, - &ZVE64X_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED, + &ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, &ZVE64X_IMPLIED, + &ZVFBFA_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED, &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED, &ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED, &ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, &SSCFG_IMPLIED, @@ -3059,7 +3072,8 @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.ext_zba = true, .cfg.ext_zbb = true, .cfg.ext_zbc = true, - .cfg.ext_zbs = true + .cfg.ext_zbs = true, + .cfg.ext_xlrbr = true ), DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E31, TYPE_RISCV_CPU_SIFIVE_E, @@ -3194,6 +3208,8 @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.ext_svnapot = true, .cfg.ext_svpbmt = true, + .cfg.mvendorid = TENSTORRENT_VENDOR_ID, + .cfg.max_satp_mode = VM_1_10_SV57, ), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 35d1f6362c..81c41e3429 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -191,8 +191,8 @@ FIELD(VTYPE, VLMUL, 0, 3) FIELD(VTYPE, VSEW, 3, 3) FIELD(VTYPE, VTA, 6, 1) FIELD(VTYPE, VMA, 7, 1) -FIELD(VTYPE, VEDIV, 8, 2) -FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11) +FIELD(VTYPE, ALTFMT, 8, 1) +FIELD(VTYPE, RESERVED, 9, sizeof(target_ulong) * 8 - 10) typedef struct PMUCTRState { /* Current value of a counter */ @@ -703,6 +703,9 @@ FIELD(TB_FLAGS, BCFI_ENABLED, 28, 1) FIELD(TB_FLAGS, PM_PMM, 29, 2) FIELD(TB_FLAGS, PM_SIGNEXTEND, 31, 1) +FIELD(EXT_TB_FLAGS, MISA_EXT, 0, 32) +FIELD(EXT_TB_FLAGS, ALTFMT, 32, 1) + #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) #else @@ -803,6 +806,43 @@ static inline RISCVMXL riscv_cpu_sxl(CPURISCVState *env) } #endif +/* + * Returns the current effective privilege mode. + * + * @env: CPURISCVState + * @priv: The returned effective privilege mode. + * @virt: The returned effective virtualization mode. + * + * Returns true if the effective privilege mode is modified. + */ +static inline QEMU_ALWAYS_INLINE +bool riscv_cpu_eff_priv(CPURISCVState *env, int *priv, bool *virt) +{ + int mode = env->priv; + bool virt_enabled = false; + bool mode_modified = false; + +#ifndef CONFIG_USER_ONLY + if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) { + mode = get_field(env->mstatus, MSTATUS_MPP); + virt_enabled = get_field(env->mstatus, MSTATUS_MPV) && (mode != PRV_M); + mode_modified = true; + } else { + virt_enabled = env->virt_enabled; + } +#endif + + if (priv) { + *priv = mode; + } + + if (virt) { + *virt = virt_enabled; + } + + return mode_modified; +} + static inline bool riscv_cpu_allow_16bit_insn(const RISCVCPUConfig *cfg, target_long priv_ver, uint32_t misa_ext) @@ -848,9 +888,9 @@ static inline uint32_t vext_get_vlmax(uint32_t vlenb, uint32_t vsew, bool riscv_cpu_is_32bit(RISCVCPU *cpu); -bool riscv_cpu_virt_mem_enabled(CPURISCVState *env); +bool riscv_cpu_virt_mem_enabled(CPURISCVState *env, bool is_vm_ldst); RISCVPmPmm riscv_pm_get_pmm(CPURISCVState *env); -RISCVPmPmm riscv_pm_get_virt_pmm(CPURISCVState *env); +RISCVPmPmm riscv_pm_get_vm_ldst_pmm(CPURISCVState *env); uint32_t riscv_pm_get_pmlen(RISCVPmPmm pmm); RISCVException riscv_csrr(CPURISCVState *env, int csrno, diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index cd1cba797c..211d0708ba 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -69,5 +69,6 @@ MATERIALISE_EXT_PREDICATE(xtheadmemidx) MATERIALISE_EXT_PREDICATE(xtheadmempair) MATERIALISE_EXT_PREDICATE(xtheadsync) MATERIALISE_EXT_PREDICATE(XVentanaCondOps) +MATERIALISE_EXT_PREDICATE(xlrbr); #endif diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index cd1a5ec56b..734fa079f2 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -100,6 +100,7 @@ BOOL_FIELD(ext_zvks) BOOL_FIELD(ext_zvksc) BOOL_FIELD(ext_zvksg) BOOL_FIELD(ext_zmmul) +BOOL_FIELD(ext_zvfbfa) BOOL_FIELD(ext_zvfbfmin) BOOL_FIELD(ext_zvfbfwma) BOOL_FIELD(ext_zvfh) @@ -154,6 +155,7 @@ BOOL_FIELD(ext_XVentanaCondOps) BOOL_FIELD(ext_xmipscbop) BOOL_FIELD(ext_xmipscmov) BOOL_FIELD(ext_xmipslsp) +BOOL_FIELD(ext_xlrbr) BOOL_FIELD(mmu) BOOL_FIELD(pmp) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index dd6c861a90..39c3486ae0 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -45,19 +45,14 @@ int riscv_env_mmu_index(CPURISCVState *env, bool ifetch) #else bool virt = env->virt_enabled; int mode = env->priv; + bool mode_modified = false; /* All priv -> mmu_idx mapping are here */ if (!ifetch) { - uint64_t status = env->mstatus; - - if (mode == PRV_M && get_field(status, MSTATUS_MPRV)) { - mode = get_field(env->mstatus, MSTATUS_MPP); - virt = get_field(env->mstatus, MSTATUS_MPV) && - (mode != PRV_M); - if (virt) { - status = env->vsstatus; - } - } + mode_modified = riscv_cpu_eff_priv(env, &mode, &virt); + uint64_t status = (mode_modified && virt) ? env->vsstatus : + env->mstatus; + if (mode == PRV_S && get_field(status, MSTATUS_SUM)) { mode = MMUIdx_S_SUM; } @@ -136,13 +131,47 @@ bool riscv_env_smode_dbltrp_enabled(CPURISCVState *env, bool virt) #endif } +/* + * Returns the effective PMM field. + * + * @env: CPURISCVState + * + * The PMM field selection logic for each effective privilege mode + * is as follows: + * + * - mstatus.MXR = 1: disabled + * + * - Smmpm + Smnpm + Ssnpm: + * M-mode: mseccfg.PMM + * S-mode: menvcfg.PMM + * U-mode: senvcfg.PMM + * VS-mode: henvcfg.PMM + * VU-mode: senvcfg.PMM + * + * - Smmpm + Smnpm (RVS implemented): + * M-mode: mseccfg.PMM + * S-mode: menvcfg.PMM + * U/VS/VU: disabled (Ssnpm not present) + * + * - Smmpm + Smnpm (RVS not implemented): + * M-mode: mseccfg.PMM + * U-mode: menvcfg.PMM + * S/VS/VU: disabled (no S-mode) + * + * - Smmpm only: + * M-mode: mseccfg.PMM + * Other existing modes: disabled + */ RISCVPmPmm riscv_pm_get_pmm(CPURISCVState *env) { #ifndef CONFIG_USER_ONLY - int priv_mode = cpu_address_mode(env); + int priv_mode; + bool virt; + + riscv_cpu_eff_priv(env, &priv_mode, &virt); - if (get_field(env->mstatus, MSTATUS_MPRV) && - get_field(env->mstatus, MSTATUS_MXR)) { + if ((priv_mode != PRV_M && get_field(env->mstatus, MSTATUS_MXR)) || + (virt && get_field(env->vsstatus, MSTATUS_MXR))) { return PMM_FIELD_DISABLED; } @@ -154,12 +183,14 @@ RISCVPmPmm riscv_pm_get_pmm(CPURISCVState *env) } break; case PRV_S: - if (riscv_cpu_cfg(env)->ext_smnpm) { - if (get_field(env->mstatus, MSTATUS_MPV)) { - return get_field(env->henvcfg, HENVCFG_PMM); - } else { + if (!virt) { + if (riscv_cpu_cfg(env)->ext_smnpm) { return get_field(env->menvcfg, MENVCFG_PMM); } + } else { + if (riscv_cpu_cfg(env)->ext_ssnpm) { + return get_field(env->henvcfg, HENVCFG_PMM); + } } break; case PRV_U: @@ -176,41 +207,60 @@ RISCVPmPmm riscv_pm_get_pmm(CPURISCVState *env) default: g_assert_not_reached(); } + return PMM_FIELD_DISABLED; #else return PMM_FIELD_DISABLED; #endif } -RISCVPmPmm riscv_pm_get_virt_pmm(CPURISCVState *env) +RISCVPmPmm riscv_pm_get_vm_ldst_pmm(CPURISCVState *env) { #ifndef CONFIG_USER_ONLY - int priv_mode = cpu_address_mode(env); + int priv_mode; + + if (!riscv_cpu_cfg(env)->ext_ssnpm || + get_field(env->mstatus, MSTATUS_MXR) || + get_field(env->vsstatus, MSTATUS_MXR)) { + return PMM_FIELD_DISABLED; + } + + priv_mode = get_field(env->hstatus, HSTATUS_SPVP); - if (priv_mode == PRV_U) { - return get_field(env->hstatus, HSTATUS_HUPMM); + if (priv_mode == PRV_S) { + /* Effective privilege mode: VS */ + return get_field(env->henvcfg, HENVCFG_PMM); } else { - if (get_field(env->hstatus, HSTATUS_SPVP)) { - return get_field(env->henvcfg, HENVCFG_PMM); - } else { - return get_field(env->senvcfg, SENVCFG_PMM); - } + /* Effective privilege mode: VU */ + return (env->priv == PRV_U) ? get_field(env->hstatus, HSTATUS_HUPMM) : + get_field(env->senvcfg, SENVCFG_PMM); } #else return PMM_FIELD_DISABLED; #endif } -bool riscv_cpu_virt_mem_enabled(CPURISCVState *env) +bool riscv_cpu_virt_mem_enabled(CPURISCVState *env, bool is_vm_ldst) { #ifndef CONFIG_USER_ONLY int satp_mode = 0; - int priv_mode = cpu_address_mode(env); + uint64_t satp; + int priv_mode; + bool virt = false; + + if (!is_vm_ldst) { + riscv_cpu_eff_priv(env, &priv_mode, &virt); + } else { + priv_mode = get_field(env->hstatus, HSTATUS_SPVP); + virt = true; + } + + satp = virt ? env->vsatp : env->satp; if (riscv_cpu_mxl(env) == MXL_RV32) { - satp_mode = get_field(env->satp, SATP32_MODE); + satp_mode = get_field(satp, SATP32_MODE); } else { - satp_mode = get_field(env->satp, SATP64_MODE); + satp_mode = get_field(satp, SATP64_MODE); } return ((satp_mode != VM_1_10_MBARE) && (priv_mode != PRV_M)); @@ -1316,12 +1366,15 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, adue = adue && (env->henvcfg & HENVCFG_ADUE); } - int ptshift = (levels - 1) * ptidxbits; + int ptshift; target_ulong pte; hwaddr pte_addr; + const hwaddr base_root = base; int i; restart: + ptshift = (levels - 1) * ptidxbits; + base = base_root; for (i = 0; i < levels; i++, ptshift -= ptidxbits) { target_ulong idx; if (i == 0) { @@ -1365,9 +1418,9 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, } if (riscv_cpu_mxl(env) == MXL_RV32) { - pte = address_space_ldl(cs->as, pte_addr, attrs, &res); + pte = address_space_ldl_le(cs->as, pte_addr, attrs, &res); } else { - pte = address_space_ldq(cs->as, pte_addr, attrs, &res); + pte = address_space_ldq_le(cs->as, pte_addr, attrs, &res); } if (res != MEMTX_OK) { diff --git a/target/riscv/cpu_vendorid.h b/target/riscv/cpu_vendorid.h index f1ffc66542..751a13aace 100644 --- a/target/riscv/cpu_vendorid.h +++ b/target/riscv/cpu_vendorid.h @@ -8,4 +8,6 @@ #define VEYRON_V1_MIMPID 0x111 #define VEYRON_V1_MVENDORID 0x61f +#define TENSTORRENT_VENDOR_ID 0x7a1 + #endif /* TARGET_RISCV_CPU_VENDORID_H */ diff --git a/target/riscv/csr.c b/target/riscv/csr.c index a75281539b..da366cf562 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -502,6 +502,15 @@ static RISCVException mstateen(CPURISCVState *env, int csrno) return any(env, csrno); } +static RISCVException mstateen_32(CPURISCVState *env, int csrno) +{ + if (riscv_cpu_mxl(env) != MXL_RV32) { + return RISCV_EXCP_ILLEGAL_INST; + } + + return mstateen(env, csrno); +} + static RISCVException hstateen_pred(CPURISCVState *env, int csrno, int base) { if (!riscv_cpu_cfg(env)->ext_smstateen) { @@ -533,6 +542,10 @@ static RISCVException hstateen(CPURISCVState *env, int csrno) static RISCVException hstateenh(CPURISCVState *env, int csrno) { + if (riscv_cpu_mxl(env) != MXL_RV32) { + return RISCV_EXCP_ILLEGAL_INST; + } + return hstateen_pred(env, csrno, CSR_HSTATEEN0H); } @@ -3353,7 +3366,15 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, } } - env->henvcfg = val & mask; + if (riscv_cpu_mxl(env) == MXL_RV32) { + /* + * RV32 stores STCE/ADUE/PBMTE/DTE in henvcfgh, so a low-half henvcfg + * write must not clobber the upper 32 bits. + */ + env->henvcfg = (env->henvcfg & ~0xFFFFFFFFULL) | (val & mask); + } else { + env->henvcfg = val & mask; + } if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } @@ -3439,25 +3460,29 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno, wr_mask |= SMSTATEEN0_FCSR; } - if (env->priv_ver >= PRIV_VERSION_1_13_0) { - wr_mask |= SMSTATEEN0_P1P13; - } + if (riscv_cpu_mxl(env) == MXL_RV64) { + if (env->priv_ver >= PRIV_VERSION_1_13_0) { + wr_mask |= SMSTATEEN0_P1P13; + } - if (riscv_cpu_cfg(env)->ext_smaia || riscv_cpu_cfg(env)->ext_smcsrind) { - wr_mask |= SMSTATEEN0_SVSLCT; - } + if (riscv_cpu_cfg(env)->ext_smaia || + riscv_cpu_cfg(env)->ext_smcsrind) { + wr_mask |= SMSTATEEN0_SVSLCT; + } - /* - * As per the AIA specification, SMSTATEEN0_IMSIC is valid only if IMSIC is - * implemented. However, that information is with MachineState and we can't - * figure that out in csr.c. Just enable if Smaia is available. - */ - if (riscv_cpu_cfg(env)->ext_smaia) { - wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); - } + /* + * As per the AIA specification, SMSTATEEN0_IMSIC is valid + * only if IMSIC is implemented. However, that information is + * with MachineState and we can't figure that out in csr.c. + * Just enable if Smaia is available. + */ + if (riscv_cpu_cfg(env)->ext_smaia) { + wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); + } - if (riscv_cpu_cfg(env)->ext_ssctr) { - wr_mask |= SMSTATEEN0_CTR; + if (riscv_cpu_cfg(env)->ext_ssctr) { + wr_mask |= SMSTATEEN0_CTR; + } } return write_mstateen(env, csrno, wr_mask, new_val); @@ -3499,6 +3524,20 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, wr_mask |= SMSTATEEN0_P1P13; } + if (riscv_cpu_cfg(env)->ext_smaia || riscv_cpu_cfg(env)->ext_smcsrind) { + wr_mask |= SMSTATEEN0_SVSLCT; + } + + /* + * As per the AIA specification, SMSTATEEN0_IMSIC is valid only if + * IMSIC is implemented. However, that information is with + * MachineState and we can't figure that out in csr.c. Just enable + * if Smaia is available. + */ + if (riscv_cpu_cfg(env)->ext_smaia) { + wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); + } + if (riscv_cpu_cfg(env)->ext_ssctr) { wr_mask |= SMSTATEEN0_CTR; } @@ -3544,21 +3583,25 @@ static RISCVException write_hstateen0(CPURISCVState *env, int csrno, wr_mask |= SMSTATEEN0_FCSR; } - if (riscv_cpu_cfg(env)->ext_ssaia || riscv_cpu_cfg(env)->ext_sscsrind) { - wr_mask |= SMSTATEEN0_SVSLCT; - } + if (riscv_cpu_mxl(env) == MXL_RV64) { + if (riscv_cpu_cfg(env)->ext_ssaia || + riscv_cpu_cfg(env)->ext_sscsrind) { + wr_mask |= SMSTATEEN0_SVSLCT; + } - /* - * As per the AIA specification, SMSTATEEN0_IMSIC is valid only if IMSIC is - * implemented. However, that information is with MachineState and we can't - * figure that out in csr.c. Just enable if Ssaia is available. - */ - if (riscv_cpu_cfg(env)->ext_ssaia) { - wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); - } + /* + * As per the AIA specification, SMSTATEEN0_IMSIC is valid + * only if IMSIC is implemented. However, that information is + * with MachineState and we can't figure that out in csr.c. + * Just enable if Ssaia is available. + */ + if (riscv_cpu_cfg(env)->ext_ssaia) { + wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); + } - if (riscv_cpu_cfg(env)->ext_ssctr) { - wr_mask |= SMSTATEEN0_CTR; + if (riscv_cpu_cfg(env)->ext_ssctr) { + wr_mask |= SMSTATEEN0_CTR; + } } return write_hstateen(env, csrno, wr_mask, new_val); @@ -3600,6 +3643,20 @@ static RISCVException write_hstateen0h(CPURISCVState *env, int csrno, { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + if (riscv_cpu_cfg(env)->ext_ssaia || riscv_cpu_cfg(env)->ext_sscsrind) { + wr_mask |= SMSTATEEN0_SVSLCT; + } + + /* + * As per the AIA specification, SMSTATEEN0_IMSIC is valid only if + * IMSIC is implemented. However, that information is with + * MachineState and we can't figure that out in csr.c. Just enable + * if Ssaia is available. + */ + if (riscv_cpu_cfg(env)->ext_ssaia) { + wr_mask |= (SMSTATEEN0_AIA | SMSTATEEN0_IMSIC); + } + if (riscv_cpu_cfg(env)->ext_ssctr) { wr_mask |= SMSTATEEN0_CTR; } @@ -3649,7 +3706,7 @@ static RISCVException write_sstateen(CPURISCVState *env, int csrno, static RISCVException write_sstateen0(CPURISCVState *env, int csrno, target_ulong new_val, uintptr_t ra) { - uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + uint64_t wr_mask = 0; if (!riscv_has_ext(env, RVF)) { wr_mask |= SMSTATEEN0_FCSR; @@ -3671,6 +3728,14 @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, uint64_t old_mip, mask = wr_mask & delegable_ints; uint32_t gin; + /* + * When mvien[9]=1, mip.SEIP is read-only and reflects only + * the external interrupt signal from the interrupt controller. + */ + if (env->mvien & MIP_SEIP) { + mask &= ~MIP_SEIP; + } + if (mask & MIP_SEIP) { env->software_seip = new_val & MIP_SEIP; new_val |= env->external_seip * MIP_SEIP; @@ -5929,25 +5994,25 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { /* Smstateen extension CSRs */ [CSR_MSTATEEN0] = { "mstateen0", mstateen, read_mstateen, write_mstateen0, .min_priv_ver = PRIV_VERSION_1_12_0 }, - [CSR_MSTATEEN0H] = { "mstateen0h", mstateen, read_mstateenh, + [CSR_MSTATEEN0H] = { "mstateen0h", mstateen_32, read_mstateenh, write_mstateen0h, .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_MSTATEEN1] = { "mstateen1", mstateen, read_mstateen, write_mstateen_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, - [CSR_MSTATEEN1H] = { "mstateen1h", mstateen, read_mstateenh, + [CSR_MSTATEEN1H] = { "mstateen1h", mstateen_32, read_mstateenh, write_mstateenh_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_MSTATEEN2] = { "mstateen2", mstateen, read_mstateen, write_mstateen_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, - [CSR_MSTATEEN2H] = { "mstateen2h", mstateen, read_mstateenh, + [CSR_MSTATEEN2H] = { "mstateen2h", mstateen_32, read_mstateenh, write_mstateenh_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_MSTATEEN3] = { "mstateen3", mstateen, read_mstateen, write_mstateen_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, - [CSR_MSTATEEN3H] = { "mstateen3h", mstateen, read_mstateenh, + [CSR_MSTATEEN3H] = { "mstateen3h", mstateen_32, read_mstateenh, write_mstateenh_1_3, .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_HSTATEEN0] = { "hstateen0", hstateen, read_hstateen, write_hstateen0, diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c index 6a5b7a82fd..2c6ccd4761 100644 --- a/target/riscv/gdbstub.c +++ b/target/riscv/gdbstub.c @@ -20,6 +20,7 @@ #include "exec/gdbstub.h" #include "gdbstub/helpers.h" #include "cpu.h" +#include "internals.h" struct TypeSize { const char *gdb_type; @@ -47,6 +48,11 @@ static const struct TypeSize vec_lanes[] = { { "uint8", "bytes", 8, 'b' }, }; +static uint64_t ldn(CPURISCVState *env, uint8_t *mem_buf, size_t regsz) +{ + return (mo_endian_env(env) == MO_LE ? ldn_le_p : ldn_be_p)(mem_buf, regsz); +} + int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cs); @@ -79,33 +85,20 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cs); RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; - int length = 0; - uint64_t tmp; + const size_t regsize = mcc->def->misa_mxl_max == MXL_RV32 ? 4 : 8; + uint64_t tmp = ldn(env, mem_buf, regsize); - switch (mcc->def->misa_mxl_max) { - case MXL_RV32: - tmp = (int32_t)ldl_p(mem_buf); - length = 4; - break; - case MXL_RV64: - case MXL_RV128: - if (env->xl < MXL_RV64) { - tmp = (int32_t)ldq_p(mem_buf); - } else { - tmp = ldq_p(mem_buf); - } - length = 8; - break; - default: - g_assert_not_reached(); + if (env->xl < MXL_RV64) { + tmp = (int32_t)tmp; } + if (n > 0 && n < 32) { env->gpr[n] = tmp; } else if (n == 32) { env->pc = tmp; } - return length; + return regsize; } static int riscv_gdb_get_fpu(CPUState *cs, GByteArray *buf, int n) @@ -130,7 +123,7 @@ static int riscv_gdb_set_fpu(CPUState *cs, uint8_t *mem_buf, int n) CPURISCVState *env = &cpu->env; if (n < 32) { - env->fpr[n] = ldq_p(mem_buf); /* always 64-bit */ + env->fpr[n] = ldn(env, mem_buf, 8); /* always 64-bit */ return sizeof(uint64_t); } return 0; @@ -162,7 +155,7 @@ static int riscv_gdb_set_vector(CPUState *cs, uint8_t *mem_buf, int n) if (n < 32) { int i; for (i = 0; i < vlenb; i += 8) { - env->vreg[(n * vlenb + i) / 8] = ldq_p(mem_buf + i); + env->vreg[(n * vlenb + i) / 8] = ldn(env, mem_buf + i, 8); } return vlenb; } @@ -194,7 +187,7 @@ static int riscv_gdb_set_csr(CPUState *cs, uint8_t *mem_buf, int n) const unsigned regsz = riscv_cpu_is_32bit(cpu) ? 4 : 8; if (n < CSR_TABLE_SIZE) { - uint64_t val = ldn_p(mem_buf, regsz); + uint64_t val = ldn(env, mem_buf, regsz); int result; result = riscv_csrrw_debug(env, n, NULL, val, -1); @@ -230,8 +223,7 @@ static int riscv_gdb_set_virtual(CPUState *cs, uint8_t *mem_buf, int n) const unsigned regsz = riscv_cpu_is_32bit(cpu) ? 4 : 8; #ifndef CONFIG_USER_ONLY CPURISCVState *env = &cpu->env; - - target_ulong new_priv = ldn_p(mem_buf, regsz) & 0x3; + uint64_t new_priv = ldn(env, mem_buf, regsz) & 0x3; bool new_virt = 0; if (new_priv == PRV_RESERVED) { @@ -239,7 +231,7 @@ static int riscv_gdb_set_virtual(CPUState *cs, uint8_t *mem_buf, int n) } if (new_priv != PRV_M) { - new_virt = (ldn_p(mem_buf, regsz) & BIT(2)) >> 2; + new_virt = (ldn(env, mem_buf, regsz) & BIT(2)) >> 2; } if (riscv_has_ext(env, RVH) && new_virt != env->virt_enabled) { diff --git a/target/riscv/helper.h b/target/riscv/helper.h index b785456ee0..36cdacfb0e 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -84,6 +84,8 @@ DEF_HELPER_FLAGS_1(unzip, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(zip, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(xperm4, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(xperm8, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(crc32, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(crc32c, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* Floating Point - Half Precision */ DEF_HELPER_FLAGS_3(fadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64) @@ -766,45 +768,60 @@ DEF_HELPER_6(vnclip_wx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnclip_wx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnclip_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32) @@ -815,74 +832,98 @@ DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmul_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) @@ -890,23 +931,29 @@ DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfrsqrt7_v_h_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrsqrt7_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrsqrt7_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrsqrt7_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfrec7_v_h_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrec7_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrec7_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfrec7_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) @@ -930,37 +977,48 @@ DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_h_bf16, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_h_bf16, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_5(vfclass_v_h_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) @@ -987,18 +1045,22 @@ DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_xu_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_b_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_x_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_b_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_xu_f_w_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_xu_f_w_b_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_xu_f_w_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_xu_f_w_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_x_f_w_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_w_b_bf16, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_x_f_w_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_x_f_w_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_f_xu_w_h, void, ptr, ptr, ptr, env, i32) @@ -1289,3 +1351,6 @@ DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_1(ssamoswap_disabled, void, env) #endif + +/* Zalrsc SC write probe */ +DEF_HELPER_FLAGS_3(sc_probe_write, TCG_CALL_NO_WG, void, env, tl, tl) diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc index a7a3278d24..62c0fe673d 100644 --- a/target/riscv/insn_trans/trans_rva.c.inc +++ b/target/riscv/insn_trans/trans_rva.c.inc @@ -90,6 +90,12 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop) */ TCGBar bar_strl = (ctx->ztso || a->rl) ? TCG_BAR_STRL : 0; tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + bar_strl); + /* + * "For the purposes of memory protection, a failed SC.W may be treated + * like a store." so let's check the write access permissions + */ + gen_helper_sc_probe_write(tcg_env, src1, + tcg_constant_tl(memop_size(mop))); gen_set_gpr(ctx, a->rd, tcg_constant_tl(1)); gen_set_label(l2); diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc index 54b9b4f241..2c82ae41a7 100644 --- a/target/riscv/insn_trans/trans_rvi.c.inc +++ b/target/riscv/insn_trans/trans_rvi.c.inc @@ -377,6 +377,9 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop) TCGv destl = dest_gpr(ctx, a->rd); TCGv desth = dest_gprh(ctx, a->rd); TCGv addrl = tcg_temp_new(); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 th = tcg_temp_new_i64(); tcg_gen_addi_tl(addrl, src1l, a->imm); @@ -388,10 +391,14 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop) tcg_gen_movi_tl(desth, 0); } } else { - /* assume little-endian memory access for now */ - tcg_gen_qemu_ld_tl(destl, addrl, ctx->mem_idx, MO_TEUQ); - tcg_gen_addi_tl(addrl, addrl, 8); - tcg_gen_qemu_ld_tl(desth, addrl, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_ld_i128(t16, addrl, ctx->mem_idx, memop); + if (mo_endian(ctx) == MO_LE) { + tcg_gen_extr_i128_i64(tl, th, t16); + } else { + tcg_gen_extr_i128_i64(th, tl, t16); + } + tcg_gen_trunc_i64_tl(destl, tl); + tcg_gen_trunc_i64_tl(desth, th); } gen_set_gpr128(ctx, a->rd, destl, desth); @@ -488,16 +495,25 @@ static bool gen_store_i128(DisasContext *ctx, arg_sb *a, MemOp memop) TCGv src2l = get_gpr(ctx, a->rs2, EXT_NONE); TCGv src2h = get_gprh(ctx, a->rs2); TCGv addrl = tcg_temp_new(); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 th = tcg_temp_new_i64(); tcg_gen_addi_tl(addrl, src1l, a->imm); if ((memop & MO_SIZE) <= MO_64) { tcg_gen_qemu_st_tl(src2l, addrl, ctx->mem_idx, memop); } else { - /* little-endian memory access assumed for now */ - tcg_gen_qemu_st_tl(src2l, addrl, ctx->mem_idx, MO_TEUQ); - tcg_gen_addi_tl(addrl, addrl, 8); - tcg_gen_qemu_st_tl(src2h, addrl, ctx->mem_idx, MO_TEUQ); + + tcg_gen_ext_tl_i64(tl, src2l); + tcg_gen_ext_tl_i64(th, src2h); + + if (mo_endian(ctx) == MO_LE) { + tcg_gen_concat_i64_i128(t16, tl, th); + } else { + tcg_gen_concat_i64_i128(t16, th, tl); + } + tcg_gen_qemu_st_i128(t16, addrl, ctx->mem_idx, memop); } return true; } diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 4df9a40b44..e65356eb7c 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -40,6 +40,9 @@ static bool require_rvf(DisasContext *s) switch (s->sew) { case MO_16: + if (s->altfmt) { + return s->cfg_ptr->ext_zvfbfa; + } return s->cfg_ptr->ext_zvfh; case MO_32: return s->cfg_ptr->ext_zve32f; @@ -58,6 +61,9 @@ static bool require_rvfmin(DisasContext *s) switch (s->sew) { case MO_16: + if (s->altfmt) { + return s->cfg_ptr->ext_zvfbfa; + } return s->cfg_ptr->ext_zvfhmin; case MO_32: return s->cfg_ptr->ext_zve32f; @@ -74,6 +80,9 @@ static bool require_scale_rvf(DisasContext *s) switch (s->sew) { case MO_8: + if (s->altfmt) { + return s->cfg_ptr->ext_zvfbfa; + } return s->cfg_ptr->ext_zvfh; case MO_16: return s->cfg_ptr->ext_zve32f; @@ -2319,40 +2328,54 @@ GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx) */ static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in) { - switch (s->sew) { - case 1: - gen_check_nanbox_h(out, in); - break; - case 2: + if (s->sew == MO_16) { + if (s->altfmt) { + gen_check_nanbox_h_bf16(out, in); + } else { + gen_check_nanbox_h(out, in); + } + } else if (s->sew == MO_32) { gen_check_nanbox_s(out, in); - break; - case 3: + } else if (s->sew == MO_64) { tcg_gen_mov_i64(out, in); - break; - default: + } else { g_assert_not_reached(); } } +/* + * Check altfmt & sew combinations when Zvfbfa extension is enabled. + */ +static bool vext_check_altfmt(DisasContext *s, int8_t valid_vsew) +{ + if (s->cfg_ptr->ext_zvfbfa) { + if (s->altfmt && (valid_vsew == -1 || s->sew != valid_vsew)) { + return false; + } + } + return true; +} + /* Vector Single-Width Floating-Point Add/Subtract Instructions */ /* * If the current SEW does not correspond to a supported IEEE floating-point * type, an illegal instruction exception is raised. */ -static bool opfvv_check(DisasContext *s, arg_rmrr *a) +static bool opfvv_check(DisasContext *s, arg_rmrr *a, int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && - vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); + vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } /* OPFVV without GVEC IR */ #define GEN_OPFVV_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ - if (CHECK(s, a)) { \ + if (CHECK(s, a, -1)) { \ uint32_t data = 0; \ static gen_helper_gvec_4_ptr * const fns[3] = { \ gen_helper_##NAME##_h, \ @@ -2378,8 +2401,41 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ } \ return false; \ } -GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) -GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) + +#define GEN_OPFVV_BFA_TRANS(NAME, CHECK, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1])); \ + tcg_gen_movi_tl(cpu_vstart, 0); \ + finalize_rvv_inst(s); \ + \ + return true; \ + } \ + return false; \ +} + +GEN_OPFVV_BFA_TRANS(vfadd_vv, opfvv_check, vfadd_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfsub_vv, opfvv_check, vfsub_vv_h_bf16) typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, TCGv_env, TCGv_i32); @@ -2415,244 +2471,316 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, * If the current SEW does not correspond to a supported IEEE floating-point * type, an illegal instruction exception is raised */ -static bool opfvf_check(DisasContext *s, arg_rmrr *a) +static bool opfvf_check(DisasContext *s, arg_rmrr *a, int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && - vext_check_ss(s, a->rd, a->rs2, a->vm); + vext_check_ss(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } /* OPFVF without GVEC IR */ -#define GEN_OPFVF_TRANS(NAME, CHECK) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_opfvf *const fns[3] = { \ - gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w, \ - gen_helper_##NAME##_d, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ - s->cfg_vta_all_1s); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ - fns[s->sew - 1], s); \ - } \ - return false; \ -} - -GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) -GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) -GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) +#define GEN_OPFVF_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, -1)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ + s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +#define GEN_OPFVF_BFA_TRANS(NAME, CHECK, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ + s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1]), \ + s); \ + } \ + return false; \ +} + +GEN_OPFVF_BFA_TRANS(vfadd_vf, opfvf_check, vfadd_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfsub_vf, opfvf_check, vfsub_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfrsub_vf, opfvf_check, vfrsub_vf_h_bf16) /* Vector Widening Floating-Point Add/Subtract Instructions */ -static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) +static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && - vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { - return require_rvv(s) && - require_rvf(s) && - require_scale_rvf(s) && - vext_check_isa_ill(s) && - vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + return opfvv_widen_check(s, a, valid_bfa_vsew) && vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); } /* OPFVV with WIDEN */ -#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_4_ptr * const fns[2] = { \ - gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs1), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew - 1]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ -} - -GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) - -static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) +#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, -1)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + fns[s->sew - 1]); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ +} + +#define GEN_OPFVV_WIDEN_BFA_TRANS(NAME, CHECK, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFVV_WIDEN_BFA_TRANS(vfwadd_vv, opfvv_widen_check, vfwadd_vv_h_bf16) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwsub_vv, opfvv_widen_check, vfwsub_vv_h_bf16) + +static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && - vext_check_ds(s, a->rd, a->rs2, a->vm); + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { - return require_rvv(s) && - require_rvf(s) && - require_scale_rvf(s) && - vext_check_isa_ill(s) && - vext_check_ds(s, a->rd, a->rs2, a->vm) && + return opfvf_widen_check(s, a, valid_bfa_vsew) && vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); } /* OPFVF with WIDEN */ -#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_opfvf *const fns[2] = { \ - gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ - fns[s->sew - 1], s); \ - } \ - return false; \ -} - -GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) - -static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) +#define GEN_OPFVF_WIDEN_BFA_TRANS(NAME, CHECK, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1]), \ + s); \ + } \ + return false; \ +} + +GEN_OPFVF_WIDEN_BFA_TRANS(vfwadd_vf, opfvf_widen_check, vfwadd_vf_h_bf16) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwsub_vf, opfvf_widen_check, vfwsub_vf_h_bf16) + +static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && - vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); + vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } /* WIDEN OPFVV with WIDEN */ -#define GEN_OPFWV_WIDEN_TRANS(NAME) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (opfwv_widen_check(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_4_ptr * const fns[2] = { \ - gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs1), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew - 1]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ +#define GEN_OPFWV_WIDEN_BFA_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwv_widen_check(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##NAME##_h_bf16 : \ + fns[s->sew - 1])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ } -GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) -GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) +GEN_OPFWV_WIDEN_BFA_TRANS(vfwadd_wv) +GEN_OPFWV_WIDEN_BFA_TRANS(vfwsub_wv) -static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) +static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && - vext_check_dd(s, a->rd, a->rs2, a->vm); + vext_check_dd(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } /* WIDEN OPFVF with WIDEN */ -#define GEN_OPFWF_WIDEN_TRANS(NAME) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (opfwf_widen_check(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_opfvf *const fns[2] = { \ - gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ - fns[s->sew - 1], s); \ - } \ - return false; \ -} - -GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) -GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) +#define GEN_OPFWF_WIDEN_BFA_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwf_widen_check(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + (s->altfmt ? gen_helper_##NAME##_h_bf16 : \ + fns[s->sew - 1]), \ + s); \ + } \ + return false; \ +} + +GEN_OPFWF_WIDEN_BFA_TRANS(vfwadd_wf) +GEN_OPFWF_WIDEN_BFA_TRANS(vfwsub_wf) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ -GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) +GEN_OPFVV_BFA_TRANS(vfmul_vv, opfvv_check, vfmul_vv_h_bf16) GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) -GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) +GEN_OPFVF_BFA_TRANS(vfmul_vf, opfvf_check, vfmul_vf_h_bf16) GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ -GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwmul_vv, opfvv_widen_check, vfwmul_vv_h_bf16) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwmul_vf, opfvf_widen_check, vfwmul_vf_h_bf16) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ -GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) -GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) -GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) -GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) -GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) -GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) -GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) -GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) -GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) -GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) -GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) -GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) -GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) -GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) -GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) -GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) +GEN_OPFVV_BFA_TRANS(vfmacc_vv, opfvv_check, vfmacc_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfnmacc_vv, opfvv_check, vfnmacc_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfmsac_vv, opfvv_check, vfmsac_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfnmsac_vv, opfvv_check, vfnmsac_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfmadd_vv, opfvv_check, vfmadd_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfnmadd_vv, opfvv_check, vfnmadd_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfmsub_vv, opfvv_check, vfmsub_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfnmsub_vv, opfvv_check, vfnmsub_vv_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmacc_vf, opfvf_check, vfmacc_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfnmacc_vf, opfvf_check, vfnmacc_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmsac_vf, opfvf_check, vfmsac_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfnmsac_vf, opfvf_check, vfnmsac_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmadd_vf, opfvf_check, vfmadd_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfnmadd_vf, opfvf_check, vfnmadd_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmsub_vf, opfvf_check, vfmsub_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfnmsub_vf, opfvf_check, vfnmsub_vf_h_bf16) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ -GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check, + vfwmaccbf16_vv) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check, + vfwnmacc_vv_h_bf16) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check, + vfwmsac_vv_h_bf16) +GEN_OPFVV_WIDEN_BFA_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check, + vfwnmsac_vv_h_bf16) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check, + vfwmaccbf16_vf) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check, + vfwnmacc_vf_h_bf16) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check, + vfwmsac_vf_h_bf16) +GEN_OPFVF_WIDEN_BFA_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check, + vfwnmsac_vf_h_bf16) /* Vector Floating-Point Square-Root Instruction */ @@ -2660,21 +2788,23 @@ GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) * If the current SEW does not correspond to a supported IEEE floating-point * type, an illegal instruction exception is raised */ -static bool opfv_check(DisasContext *s, arg_rmr *a) +static bool opfv_check(DisasContext *s, arg_rmr *a, int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && /* OPFV instructions ignore vs1 check */ - vext_check_ss(s, a->rd, a->rs2, a->vm); + vext_check_ss(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } static bool do_opfv(DisasContext *s, arg_rmr *a, gen_helper_gvec_3_ptr *fn, - bool (*checkfn)(DisasContext *, arg_rmr *), - int rm) + bool (*checkfn)(DisasContext *, arg_rmr *, int8_t), + int rm, + int8_t valid_bfa_vsew) { - if (checkfn(s, a)) { + if (checkfn(s, a, valid_bfa_vsew)) { uint32_t data = 0; gen_set_rm_chkfrm(s, rm); @@ -2692,76 +2822,95 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, return false; } -#define GEN_OPFV_TRANS(NAME, CHECK, FRM) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - static gen_helper_gvec_3_ptr * const fns[3] = { \ - gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w, \ - gen_helper_##NAME##_d \ - }; \ - return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM); \ +#define GEN_OPFV_TRANS(NAME, CHECK, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d \ + }; \ + return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM, -1); \ +} + +#define GEN_OPFV_BFA_TRANS(NAME, CHECK, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d \ + }; \ + return do_opfv(s, a, \ + (s->altfmt ? gen_helper_##NAME##_h_bf16 : \ + fns[s->sew - 1]), \ + CHECK, FRM, MO_16); \ } GEN_OPFV_TRANS(vfsqrt_v, opfv_check, RISCV_FRM_DYN) -GEN_OPFV_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN) -GEN_OPFV_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN) +GEN_OPFV_BFA_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN) +GEN_OPFV_BFA_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN) /* Vector Floating-Point MIN/MAX Instructions */ -GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) -GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) -GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) -GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) +GEN_OPFVV_BFA_TRANS(vfmin_vv, opfvv_check, vfmin_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vfmax_vv, opfvv_check, vfmax_vv_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmin_vf, opfvf_check, vfmin_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vfmax_vf, opfvf_check, vfmax_vf_h_bf16) /* Vector Floating-Point Sign-Injection Instructions */ -GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) -GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) -GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) -GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) -GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) -GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) +GEN_OPFVV_BFA_TRANS(vfsgnj_vv, opfvv_check, vfsgnj_vv_h) +GEN_OPFVV_BFA_TRANS(vfsgnjn_vv, opfvv_check, vfsgnjn_vv_h) +GEN_OPFVV_BFA_TRANS(vfsgnjx_vv, opfvv_check, vfsgnjx_vv_h) +GEN_OPFVF_BFA_TRANS(vfsgnj_vf, opfvf_check, vfsgnj_vf_h) +GEN_OPFVF_BFA_TRANS(vfsgnjn_vf, opfvf_check, vfsgnjn_vf_h) +GEN_OPFVF_BFA_TRANS(vfsgnjx_vf, opfvf_check, vfsgnjx_vf_h) /* Vector Floating-Point Compare Instructions */ -static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) +static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && - vext_check_mss(s, a->rd, a->rs1, a->rs2); + vext_check_mss(s, a->rd, a->rs1, a->rs2) && + vext_check_altfmt(s, valid_bfa_vsew); } -GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) -GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) -GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) -GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) +GEN_OPFVV_BFA_TRANS(vmfeq_vv, opfvv_cmp_check, vmfeq_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vmfne_vv, opfvv_cmp_check, vmfne_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vmflt_vv, opfvv_cmp_check, vmflt_vv_h_bf16) +GEN_OPFVV_BFA_TRANS(vmfle_vv, opfvv_cmp_check, vmfle_vv_h_bf16) -static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) +static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && - vext_check_ms(s, a->rd, a->rs2); + vext_check_ms(s, a->rd, a->rs2) && + vext_check_altfmt(s, valid_bfa_vsew); } -GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) -GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) -GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) -GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) -GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) -GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) +GEN_OPFVF_BFA_TRANS(vmfeq_vf, opfvf_cmp_check, vmfeq_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vmfne_vf, opfvf_cmp_check, vmfne_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vmflt_vf, opfvf_cmp_check, vmflt_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vmfle_vf, opfvf_cmp_check, vmfle_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vmfgt_vf, opfvf_cmp_check, vmfgt_vf_h_bf16) +GEN_OPFVF_BFA_TRANS(vmfge_vf, opfvf_cmp_check, vmfge_vf_h_bf16) /* Vector Floating-Point Classify Instruction */ -GEN_OPFV_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN) +GEN_OPFV_BFA_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN) /* Vector Floating-Point Merge Instruction */ -GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) +GEN_OPFVF_BFA_TRANS(vfmerge_vfm, opfvf_check, vfmerge_vfm_h) static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) { if (require_rvv(s) && require_rvf(s) && vext_check_isa_ill(s) && - require_align(a->rd, s->lmul)) { + require_align(a->rd, s->lmul) && + vext_check_altfmt(s, MO_16)) { gen_set_rm(s, RISCV_FRM_DYN); TCGv_i64 t1; @@ -2782,7 +2931,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) static gen_helper_vmv_vx * const fns[3] = { gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, - gen_helper_vmv_v_x_d, + gen_helper_vmv_v_x_d }; t1 = tcg_temp_new_i64(); @@ -2803,15 +2952,15 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) } /* Single-Width Floating-Point/Integer Type-Convert Instructions */ -#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - static gen_helper_gvec_3_ptr * const fns[3] = { \ - gen_helper_##HELPER##_h, \ - gen_helper_##HELPER##_w, \ - gen_helper_##HELPER##_d \ - }; \ - return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM); \ +#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w, \ + gen_helper_##HELPER##_d \ + }; \ + return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM, -1); \ } GEN_OPFV_CVT_TRANS(vfcvt_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_DYN) @@ -2835,95 +2984,129 @@ static bool opfv_widen_check(DisasContext *s, arg_rmr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } -static bool opxfv_widen_check(DisasContext *s, arg_rmr *a) +static bool opxfv_widen_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return opfv_widen_check(s, a) && - require_rvf(s); + require_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool opffv_widen_check(DisasContext *s, arg_rmr *a) +static bool opffv_widen_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return opfv_widen_check(s, a) && require_rvfmin(s) && - require_scale_rvfmin(s); -} - -#define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_3_ptr * const fns[2] = { \ - gen_helper_##HELPER##_h, \ - gen_helper_##HELPER##_w, \ - }; \ - gen_set_rm_chkfrm(s, FRM); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew - 1]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ + require_scale_rvfmin(s) && + vext_check_altfmt(s, valid_bfa_vsew); +} + +#define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (CHECK(s, a, -1)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w, \ + }; \ + gen_set_rm_chkfrm(s, FRM); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + fns[s->sew - 1]); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ +} + +#define GEN_OPFV_WIDEN_BFA_TRANS(NAME, CHECK, HELPER, FRM, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w, \ + }; \ + gen_set_rm_chkfrm(s, FRM); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ } GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, RISCV_FRM_DYN) GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, RISCV_FRM_DYN) -GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v, opffv_widen_check, vfwcvt_f_f_v, - RISCV_FRM_DYN) +GEN_OPFV_WIDEN_BFA_TRANS(vfwcvt_f_f_v, opffv_widen_check, vfwcvt_f_f_v, + RISCV_FRM_DYN, vfwcvtbf16_f_f_v) /* Reuse the helper functions from vfwcvt.xu.f.v and vfwcvt.x.f.v */ GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, RISCV_FRM_RTZ) GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, RISCV_FRM_RTZ) -static bool opfxv_widen_check(DisasContext *s, arg_rmr *a) +static bool opfxv_widen_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && /* OPFV widening instructions ignore vs1 check */ - vext_check_ds(s, a->rd, a->rs2, a->vm); + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } -#define GEN_OPFXV_WIDEN_TRANS(NAME) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - if (opfxv_widen_check(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_3_ptr * const fns[3] = { \ - gen_helper_##NAME##_b, \ - gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w, \ - }; \ - gen_set_rm(s, RISCV_FRM_DYN); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ +#define GEN_OPFXV_WIDEN_BFA_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (opfxv_widen_check(s, a, MO_8)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + gen_set_rm(s, RISCV_FRM_DYN); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##NAME##_b_bf16 : \ + fns[s->sew])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ } -GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_xu_v) -GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_x_v) +GEN_OPFXV_WIDEN_BFA_TRANS(vfwcvt_f_xu_v) +GEN_OPFXV_WIDEN_BFA_TRANS(vfwcvt_f_x_v) /* Narrowing Floating-Point/Integer Type-Convert Instructions */ @@ -2939,104 +3122,140 @@ static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) vext_check_sd(s, a->rd, a->rs2, a->vm); } -static bool opfxv_narrow_check(DisasContext *s, arg_rmr *a) +static bool opfxv_narrow_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return opfv_narrow_check(s, a) && require_rvf(s) && - (s->sew != MO_64); + (s->sew != MO_64) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool opffv_narrow_check(DisasContext *s, arg_rmr *a) +static bool opffv_narrow_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return opfv_narrow_check(s, a) && require_rvfmin(s) && - require_scale_rvfmin(s); + require_scale_rvfmin(s) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a) +static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return opfv_narrow_check(s, a) && require_rvf(s) && - require_scale_rvf(s); -} - -#define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_3_ptr * const fns[2] = { \ - gen_helper_##HELPER##_h, \ - gen_helper_##HELPER##_w, \ - }; \ - gen_set_rm_chkfrm(s, FRM); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew - 1]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ + require_scale_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); +} + +#define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (CHECK(s, a, -1)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w, \ + }; \ + gen_set_rm_chkfrm(s, FRM); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + fns[s->sew - 1]); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ +} + +#define GEN_OPFV_NARROW_BFA_TRANS(NAME, CHECK, HELPER, FRM, BFA_HELPER) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (CHECK(s, a, MO_16)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w, \ + }; \ + gen_set_rm_chkfrm(s, FRM); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##BFA_HELPER : \ + fns[s->sew - 1])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ } GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_w, opfxv_narrow_check, vfncvt_f_xu_w, RISCV_FRM_DYN) GEN_OPFV_NARROW_TRANS(vfncvt_f_x_w, opfxv_narrow_check, vfncvt_f_x_w, RISCV_FRM_DYN) -GEN_OPFV_NARROW_TRANS(vfncvt_f_f_w, opffv_narrow_check, vfncvt_f_f_w, - RISCV_FRM_DYN) +GEN_OPFV_NARROW_BFA_TRANS(vfncvt_f_f_w, opffv_narrow_check, vfncvt_f_f_w, + RISCV_FRM_DYN, vfncvtbf16_f_f_w) /* Reuse the helper function from vfncvt.f.f.w */ -GEN_OPFV_NARROW_TRANS(vfncvt_rod_f_f_w, opffv_rod_narrow_check, vfncvt_f_f_w, - RISCV_FRM_ROD) +GEN_OPFV_NARROW_BFA_TRANS(vfncvt_rod_f_f_w, opffv_rod_narrow_check, + vfncvt_f_f_w, RISCV_FRM_ROD, vfncvtbf16_f_f_w) -static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a) +static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a, + int8_t valid_bfa_vsew) { return require_rvv(s) && require_scale_rvf(s) && vext_check_isa_ill(s) && /* OPFV narrowing instructions ignore vs1 check */ - vext_check_sd(s, a->rd, a->rs2, a->vm); + vext_check_sd(s, a->rd, a->rs2, a->vm) && + vext_check_altfmt(s, valid_bfa_vsew); } -#define GEN_OPXFV_NARROW_TRANS(NAME, HELPER, FRM) \ -static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ -{ \ - if (opxfv_narrow_check(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_3_ptr * const fns[3] = { \ - gen_helper_##HELPER##_b, \ - gen_helper_##HELPER##_h, \ - gen_helper_##HELPER##_w, \ - }; \ - gen_set_rm_chkfrm(s, FRM); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs2), tcg_env, \ - s->cfg_ptr->vlenb, \ - s->cfg_ptr->vlenb, data, \ - fns[s->sew]); \ - finalize_rvv_inst(s); \ - return true; \ - } \ - return false; \ +#define GEN_OPXFV_NARROW_BFA_TRANS(NAME, HELPER, FRM) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (opxfv_narrow_check(s, a, MO_8)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##HELPER##_b, \ + gen_helper_##HELPER##_h, \ + gen_helper_##HELPER##_w \ + }; \ + gen_set_rm_chkfrm(s, FRM); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_env, \ + s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data, \ + (s->altfmt ? gen_helper_##HELPER##_b_bf16 : \ + fns[s->sew])); \ + finalize_rvv_inst(s); \ + return true; \ + } \ + return false; \ } -GEN_OPXFV_NARROW_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN) -GEN_OPXFV_NARROW_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN) +GEN_OPXFV_NARROW_BFA_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN) +GEN_OPXFV_NARROW_BFA_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN) /* Reuse the helper functions from vfncvt.xu.f.w and vfncvt.x.f.w */ -GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ) -GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ) +GEN_OPXFV_NARROW_BFA_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ) +GEN_OPXFV_NARROW_BFA_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ) /* *** Vector Reduction Operations @@ -3046,6 +3265,8 @@ static bool reduction_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + require_vm(a->vm, a->rs1) && + require_vm(a->vm, a->rs2) && vext_check_reduction(s, a->rs2); } @@ -3069,10 +3290,12 @@ GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check) GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check) /* Vector Single-Width Floating-Point Reduction Instructions */ -static bool freduction_check(DisasContext *s, arg_rmrr *a) +static bool freduction_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return reduction_check(s, a) && - require_rvf(s); + require_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); } GEN_OPFVV_TRANS(vfredusum_vs, freduction_check) @@ -3081,11 +3304,13 @@ GEN_OPFVV_TRANS(vfredmax_vs, freduction_check) GEN_OPFVV_TRANS(vfredmin_vs, freduction_check) /* Vector Widening Floating-Point Reduction Instructions */ -static bool freduction_widen_check(DisasContext *s, arg_rmrr *a) +static bool freduction_widen_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return reduction_widen_check(s, a) && require_rvf(s) && - require_scale_rvf(s); + require_scale_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); } GEN_OPFVV_WIDEN_TRANS(vfwredusum_vs, freduction_widen_check) @@ -3500,7 +3725,8 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) { if (require_rvv(s) && require_rvf(s) && - vext_check_isa_ill(s)) { + vext_check_isa_ill(s) && + vext_check_altfmt(s, MO_16)) { gen_set_rm(s, RISCV_FRM_DYN); /* The instructions ignore LMUL and vector register group. */ @@ -3594,20 +3820,24 @@ GEN_OPIVX_VSLIDE1_TRANS(vslide1up_vx, slideup_check) GEN_OPIVX_VSLIDE1_TRANS(vslide1down_vx, slidedown_check) /* Vector Floating-Point Slide Instructions */ -static bool fslideup_check(DisasContext *s, arg_rmrr *a) +static bool fslideup_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return slideup_check(s, a) && - require_rvf(s); + require_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); } -static bool fslidedown_check(DisasContext *s, arg_rmrr *a) +static bool fslidedown_check(DisasContext *s, arg_rmrr *a, + int8_t valid_bfa_vsew) { return slidedown_check(s, a) && - require_rvf(s); + require_rvf(s) && + vext_check_altfmt(s, valid_bfa_vsew); } -GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check) -GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check) +GEN_OPFVF_BFA_TRANS(vfslide1up_vf, fslideup_check, vfslide1up_vf_h) +GEN_OPFVF_BFA_TRANS(vfslide1down_vf, fslidedown_check, vfslide1down_vf_h) /* Vector Register Gather Instruction */ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index 27bf3f0b68..32255d3aa0 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -426,8 +426,7 @@ static bool vsha_check(DisasContext *s, arg_rmrr *a) vsha_check_sew(s) && MAXSZ(s) >= egw_bytes && !is_overlapped(a->rd, mult, a->rs1, mult) && - !is_overlapped(a->rd, mult, a->rs2, mult) && - s->lmul >= 0; + !is_overlapped(a->rd, mult, a->rs2, mult); } GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS) diff --git a/target/riscv/insn_trans/trans_rvzalasr.c.inc b/target/riscv/insn_trans/trans_rvzalasr.c.inc index bf86805cef..0f307affec 100644 --- a/target/riscv/insn_trans/trans_rvzalasr.c.inc +++ b/target/riscv/insn_trans/trans_rvzalasr.c.inc @@ -29,6 +29,7 @@ static bool gen_load_acquire(DisasContext *ctx, arg_lb_aqrl *a, MemOp memop) return false; } + memop |= MO_ALIGN | mo_endian(ctx); memop |= (ctx->cfg_ptr->ext_zama16b) ? MO_ATOM_WITHIN16 : 0; tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, memop); @@ -43,26 +44,26 @@ static bool gen_load_acquire(DisasContext *ctx, arg_lb_aqrl *a, MemOp memop) static bool trans_lb_aqrl(DisasContext *ctx, arg_lb_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_load_acquire(ctx, a, (MO_ALIGN | MO_SB)); + return gen_load_acquire(ctx, a, MO_SB); } static bool trans_lh_aqrl(DisasContext *ctx, arg_lh_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_load_acquire(ctx, a, (MO_ALIGN | MO_TESW)); + return gen_load_acquire(ctx, a, MO_SW); } static bool trans_lw_aqrl(DisasContext *ctx, arg_lw_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_load_acquire(ctx, a, (MO_ALIGN | MO_TESL)); + return gen_load_acquire(ctx, a, MO_SL); } static bool trans_ld_aqrl(DisasContext *ctx, arg_ld_aqrl *a) { REQUIRE_64BIT(ctx); REQUIRE_ZALASR(ctx); - return gen_load_acquire(ctx, a, (MO_ALIGN | MO_TEUQ)); + return gen_load_acquire(ctx, a, MO_UQ); } static bool gen_store_release(DisasContext *ctx, arg_sb_aqrl *a, MemOp memop) @@ -78,6 +79,7 @@ static bool gen_store_release(DisasContext *ctx, arg_sb_aqrl *a, MemOp memop) return false; } + memop |= MO_ALIGN | mo_endian(ctx); memop |= (ctx->cfg_ptr->ext_zama16b) ? MO_ATOM_WITHIN16 : 0; /* Add a memory barrier implied by RL (mandatory) and AQ (optional) */ @@ -90,24 +92,24 @@ static bool gen_store_release(DisasContext *ctx, arg_sb_aqrl *a, MemOp memop) static bool trans_sb_aqrl(DisasContext *ctx, arg_sb_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_store_release(ctx, a, (MO_ALIGN | MO_SB)); + return gen_store_release(ctx, a, MO_SB); } static bool trans_sh_aqrl(DisasContext *ctx, arg_sh_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_store_release(ctx, a, (MO_ALIGN | MO_TESW)); + return gen_store_release(ctx, a, MO_SW); } static bool trans_sw_aqrl(DisasContext *ctx, arg_sw_aqrl *a) { REQUIRE_ZALASR(ctx); - return gen_store_release(ctx, a, (MO_ALIGN | MO_TESL)); + return gen_store_release(ctx, a, MO_SL); } static bool trans_sd_aqrl(DisasContext *ctx, arg_sd_aqrl *a) { REQUIRE_64BIT(ctx); REQUIRE_ZALASR(ctx); - return gen_store_release(ctx, a, (MO_ALIGN | MO_TEUQ)); + return gen_store_release(ctx, a, MO_UQ); } diff --git a/target/riscv/insn_trans/trans_xlrbr.c.inc b/target/riscv/insn_trans/trans_xlrbr.c.inc new file mode 100644 index 0000000000..01da2b6ce1 --- /dev/null +++ b/target/riscv/insn_trans/trans_xlrbr.c.inc @@ -0,0 +1,45 @@ +/* + * RISC-V translation routines for xlrbr matching the unratified Zbr CRC32 + * bitmanip extension v0.93. + * + * Copyright (c) 2026 Rivos Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#define REQUIRE_XLRBR(ctx) do { \ + if (!ctx->cfg_ptr->ext_xlrbr) { \ + return false; \ + } \ +} while (0) + +static bool gen_crc(DisasContext *ctx, arg_r2 *a, + void (*func)(TCGv, TCGv, TCGv), TCGv tsz) +{ + REQUIRE_XLRBR(ctx); + TCGv dest = dest_gpr(ctx, a->rd); + TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE); + + func(dest, src1, tsz); + gen_set_gpr(ctx, a->rd, dest); + + return true; +} + +#define TRANS_CRC32(NAME, SIZE) \ + static bool trans_crc32_##NAME(DisasContext *ctx, arg_r2 *a) \ + { if (SIZE == 8) { REQUIRE_64BIT(ctx); }; \ + return gen_crc(ctx, a, gen_helper_crc32, tcg_constant_tl(SIZE)); } +#define TRANS_CRC32C(NAME, SIZE) \ + static bool trans_crc32c_##NAME(DisasContext *ctx, arg_r2 *a) \ + { if (SIZE == 8) { REQUIRE_64BIT(ctx); }; \ + return gen_crc(ctx, a, gen_helper_crc32c, tcg_constant_tl(SIZE)); } + +TRANS_CRC32(b, 1); +TRANS_CRC32(h, 2); +TRANS_CRC32(w, 4); +TRANS_CRC32(d, 8); +TRANS_CRC32C(b, 1); +TRANS_CRC32C(h, 2); +TRANS_CRC32C(w, 4); +TRANS_CRC32C(d, 8); diff --git a/target/riscv/insn_trans/trans_xmips.c.inc b/target/riscv/insn_trans/trans_xmips.c.inc index 9a72f3392f..c1a30156d3 100644 --- a/target/riscv/insn_trans/trans_xmips.c.inc +++ b/target/riscv/insn_trans/trans_xmips.c.inc @@ -47,6 +47,8 @@ static bool trans_ccmov(DisasContext *ctx, arg_ccmov *a) /* Load Doubleword Pair. */ static bool trans_ldp(DisasContext *ctx, arg_ldp *a) { + MemOp memop = MO_SQ | mo_endian(ctx); + REQUIRE_XMIPSLSP(ctx); REQUIRE_64_OR_128BIT(ctx); @@ -56,11 +58,11 @@ static bool trans_ldp(DisasContext *ctx, arg_ldp *a) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, src, a->imm_y); - tcg_gen_qemu_ld_tl(dest0, addr, ctx->mem_idx, MO_TESQ); + tcg_gen_qemu_ld_tl(dest0, addr, ctx->mem_idx, memop); gen_set_gpr(ctx, a->rd, dest0); tcg_gen_addi_tl(addr, addr, 8); - tcg_gen_qemu_ld_tl(dest1, addr, ctx->mem_idx, MO_TESQ); + tcg_gen_qemu_ld_tl(dest1, addr, ctx->mem_idx, memop); gen_set_gpr(ctx, a->rs3, dest1); return true; @@ -69,6 +71,8 @@ static bool trans_ldp(DisasContext *ctx, arg_ldp *a) /* Load Word Pair. */ static bool trans_lwp(DisasContext *ctx, arg_lwp *a) { + MemOp memop = MO_SL | mo_endian(ctx); + REQUIRE_XMIPSLSP(ctx); TCGv src = get_gpr(ctx, a->rs1, EXT_NONE); @@ -77,11 +81,11 @@ static bool trans_lwp(DisasContext *ctx, arg_lwp *a) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, src, a->imm_x); - tcg_gen_qemu_ld_tl(dest0, addr, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(dest0, addr, ctx->mem_idx, memop); gen_set_gpr(ctx, a->rd, dest0); tcg_gen_addi_tl(addr, addr, 4); - tcg_gen_qemu_ld_tl(dest1, addr, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_ld_tl(dest1, addr, ctx->mem_idx, memop); gen_set_gpr(ctx, a->rs3, dest1); return true; @@ -90,6 +94,8 @@ static bool trans_lwp(DisasContext *ctx, arg_lwp *a) /* Store Doubleword Pair. */ static bool trans_sdp(DisasContext *ctx, arg_sdp *a) { + MemOp memop = MO_UQ | mo_endian(ctx); + REQUIRE_XMIPSLSP(ctx); REQUIRE_64_OR_128BIT(ctx); @@ -99,10 +105,10 @@ static bool trans_sdp(DisasContext *ctx, arg_sdp *a) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, src, a->imm_w); - tcg_gen_qemu_st_tl(data0, addr, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(data0, addr, ctx->mem_idx, memop); tcg_gen_addi_tl(addr, addr, 8); - tcg_gen_qemu_st_tl(data1, addr, ctx->mem_idx, MO_TEUQ); + tcg_gen_qemu_st_tl(data1, addr, ctx->mem_idx, memop); return true; } @@ -110,6 +116,8 @@ static bool trans_sdp(DisasContext *ctx, arg_sdp *a) /* Store Word Pair. */ static bool trans_swp(DisasContext *ctx, arg_swp *a) { + MemOp memop = MO_SL | mo_endian(ctx); + REQUIRE_XMIPSLSP(ctx); TCGv src = get_gpr(ctx, a->rs1, EXT_NONE); @@ -118,10 +126,10 @@ static bool trans_swp(DisasContext *ctx, arg_swp *a) TCGv addr = tcg_temp_new(); tcg_gen_addi_tl(addr, src, a->imm_v); - tcg_gen_qemu_st_tl(data0, addr, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_st_tl(data0, addr, ctx->mem_idx, memop); tcg_gen_addi_tl(addr, addr, 4); - tcg_gen_qemu_st_tl(data1, addr, ctx->mem_idx, MO_TESL); + tcg_gen_qemu_st_tl(data1, addr, ctx->mem_idx, memop); return true; } diff --git a/target/riscv/insn_trans/trans_zilsd.c.inc b/target/riscv/insn_trans/trans_zilsd.c.inc index 369c33004b..f50c52f22c 100644 --- a/target/riscv/insn_trans/trans_zilsd.c.inc +++ b/target/riscv/insn_trans/trans_zilsd.c.inc @@ -30,7 +30,7 @@ static bool gen_load_i64(DisasContext *ctx, arg_ld *a) TCGv addr = get_address(ctx, a->rs1, a->imm); TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tmp, addr, ctx->mem_idx, MO_TESQ); + tcg_gen_qemu_ld_i64(tmp, addr, ctx->mem_idx, MO_SQ | mo_endian(ctx)); if (a->rd == 0) { return true; @@ -85,7 +85,7 @@ static bool gen_store_i64(DisasContext *ctx, arg_sd *a) } else { tcg_gen_concat_tl_i64(tmp, data_low, data_high); } - tcg_gen_qemu_st_i64(tmp, addr, ctx->mem_idx, MO_TESQ); + tcg_gen_qemu_st_i64(tmp, addr, ctx->mem_idx, MO_SQ | mo_endian(ctx)); return true; } diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 35b923c4bf..bac6c8032a 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -62,6 +62,18 @@ static inline bool mmuidx_2stage(int mmu_idx) return mmu_idx & MMU_2STAGE_BIT; } +static inline MemOp mo_endian_env(CPURISCVState *env) +{ + /* + * A couple of bits in MSTATUS set the endianness: + * - MSTATUS_UBE (User-mode), + * - MSTATUS_SBE (Supervisor-mode), + * - MSTATUS_MBE (Machine-mode) + * but we don't implement that yet. + */ + return MO_LE; +} + /* share data between vector helpers and decode code */ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) @@ -72,6 +84,7 @@ FIELD(VDATA, NF, 7, 4) FIELD(VDATA, WD, 7, 1) /* float point classify helpers */ +target_ulong fclass_h_bf16(uint64_t frs1); target_ulong fclass_h(uint64_t frs1); target_ulong fclass_s(uint64_t frs1); target_ulong fclass_d(uint64_t frs1); @@ -160,9 +173,15 @@ static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) static inline target_ulong get_xepc_mask(CPURISCVState *env) { - /* When IALIGN=32, both low bits must be zero. - * When IALIGN=16 (has C extension), only bit 0 must be zero. */ - if (riscv_has_ext(env, RVC)) { + RISCVCPU *cpu = env_archcpu(env); + + /* + * When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C or Zc* extensions), only bit 0 must be zero. + */ + if (riscv_has_ext(env, RVC) || cpu->cfg.ext_zca || + cpu->cfg.ext_zcb || cpu->cfg.ext_zcd || cpu->cfg.ext_zce || + cpu->cfg.ext_zcf || cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt) { return ~(target_ulong)1; } else { return ~(target_ulong)3; @@ -190,7 +209,7 @@ static inline target_ulong adjust_addr_body(CPURISCVState *env, /* get pmm field depending on whether addr is */ if (is_virt_addr) { - pmm = riscv_pm_get_virt_pmm(env); + pmm = riscv_pm_get_vm_ldst_pmm(env); } else { pmm = riscv_pm_get_pmm(env); } @@ -200,11 +219,9 @@ static inline target_ulong adjust_addr_body(CPURISCVState *env, return addr; } - if (!is_virt_addr) { - signext = riscv_cpu_virt_mem_enabled(env); - } - addr = addr << pmlen; + signext = riscv_cpu_virt_mem_enabled(env, is_virt_addr); pmlen = riscv_pm_get_pmlen(pmm); + addr = addr << pmlen; /* sign/zero extend masked address by N-1 bit */ if (signext) { diff --git a/target/riscv/meson.build b/target/riscv/meson.build index 3842c7c1a8..79f36abd63 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -5,6 +5,7 @@ gen = [ decodetree.process('xthead.decode', extra_args: '--static-decode=decode_xthead'), decodetree.process('XVentanaCondOps.decode', extra_args: '--static-decode=decode_XVentanaCodeOps'), decodetree.process('xmips.decode', extra_args: '--static-decode=decode_xmips'), + decodetree.process('xlrbr.decode', extra_args: '--static-decode=decode_xlrbr'), ] riscv_ss = ss.source_set() diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 6ccc127c30..81873014cb 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -28,20 +28,6 @@ #include "exec/tlb-flags.h" #include "trace.h" -#ifndef CONFIG_USER_ONLY -static inline MemOp mo_endian_env(CPURISCVState *env) -{ - /* - * A couple of bits in MSTATUS set the endianness: - * - MSTATUS_UBE (User-mode), - * - MSTATUS_SBE (Supervisor-mode), - * - MSTATUS_MBE (Machine-mode) - * but we don't implement that yet. - */ - return MO_TE; -} -#endif - /* Exceptions processing helpers */ G_NORETURN void riscv_raise_exception(CPURISCVState *env, RISCVException exception, @@ -281,6 +267,20 @@ void helper_cbo_inval(CPURISCVState *env, target_ulong address) /* We don't emulate the cache-hierarchy, so we're done. */ } +void helper_sc_probe_write(CPURISCVState *env, target_ulong addr, + target_ulong size) +{ + uintptr_t ra = GETPC(); + int mmu_idx = riscv_env_mmu_index(env, false); + + if (addr & (size - 1)) { + env->badaddr = addr; + riscv_raise_exception(env, RISCV_EXCP_STORE_AMO_ADDR_MIS, ra); + } + + probe_write(env, addr, size, mmu_idx, ra); +} + #ifndef CONFIG_USER_ONLY target_ulong helper_sret(CPURISCVState *env) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 988b2d905f..02c98cc2db 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -104,7 +104,8 @@ static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) RISCVCPU *cpu = env_archcpu(env); RISCVExtStatus fs, vs; uint32_t flags = 0; - bool pm_signext = riscv_cpu_virt_mem_enabled(env); + uint64_t ext_flags = 0; + bool pm_signext = riscv_cpu_virt_mem_enabled(env, false); if (cpu->cfg.ext_zve32x) { /* @@ -118,6 +119,7 @@ static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) /* lmul encoded as in DisasContext::lmul */ int8_t lmul = sextract32(FIELD_EX64(env->vtype, VTYPE, VLMUL), 0, 3); + uint8_t altfmt = FIELD_EX64(env->vtype, VTYPE, ALTFMT); uint32_t vsew = FIELD_EX64(env->vtype, VTYPE, VSEW); uint32_t vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); uint32_t maxsz = vlmax << vsew; @@ -133,6 +135,7 @@ static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) flags = FIELD_DP32(flags, TB_FLAGS, VMA, FIELD_EX64(env->vtype, VTYPE, VMA)); flags = FIELD_DP32(flags, TB_FLAGS, VSTART_EQ_ZERO, env->vstart == 0); + ext_flags = FIELD_DP64(ext_flags, EXT_TB_FLAGS, ALTFMT, altfmt); } else { flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); } @@ -189,10 +192,12 @@ static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) flags = FIELD_DP32(flags, TB_FLAGS, PM_PMM, riscv_pm_get_pmm(env)); flags = FIELD_DP32(flags, TB_FLAGS, PM_SIGNEXTEND, pm_signext); + ext_flags = FIELD_DP64(ext_flags, EXT_TB_FLAGS, MISA_EXT, env->misa_ext); + return (TCGTBCPUState){ .pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc, .flags = flags, - .cs_base = env->misa_ext, + .cs_base = ext_flags, }; } @@ -255,7 +260,7 @@ static vaddr riscv_pointer_wrap(CPUState *cs, int mmu_idx, return result; } - pm_signext = riscv_cpu_virt_mem_enabled(env); + pm_signext = riscv_cpu_virt_mem_enabled(env, false); if (pm_signext) { return sextract64(result, 0, 64 - pm_len); } @@ -720,6 +725,14 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } + if (cpu->cfg.ext_zvfbfa) { + if (!cpu->cfg.ext_zve32f || !cpu->cfg.ext_zfbfmin) { + error_setg(errp, "Zvfbfa extension requires Zve32f extension " + "and Zfbfmin extension"); + return; + } + } + if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) { error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx"); return; @@ -1305,16 +1318,6 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) } #ifndef CONFIG_USER_ONLY - RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); - - if (mcc->def->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) { - /* Missing 128-bit aligned atomics */ - error_setg(errp, - "128-bit RISC-V currently does not work with Multi " - "Threaded TCG. Please use: -accel tcg,thread=single"); - return false; - } - CPURISCVState *env = &cpu->env; tcg_cflags_set(CPU(cs), CF_PCREL); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index f42e53df88..1e4f340256 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -101,6 +101,7 @@ typedef struct DisasContext { bool cfg_vta_all_1s; bool vstart_eq_zero; bool vl_eq_vlmax; + bool altfmt; CPUState *cs; TCGv zero; /* actual address width */ @@ -135,7 +136,7 @@ static inline MemOp mo_endian(DisasContext *ctx) * - MSTATUS_MBE (Machine-mode) * but we don't implement that yet. */ - return MO_TE; + return MO_LE; } #ifdef TARGET_RISCV32 @@ -213,6 +214,14 @@ static void gen_check_nanbox_h(TCGv_i64 out, TCGv_i64 in) tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); } +static void gen_check_nanbox_h_bf16(TCGv_i64 out, TCGv_i64 in) +{ + TCGv_i64 t_max = tcg_constant_i64(0xffffffffffff0000ull); + TCGv_i64 t_nan = tcg_constant_i64(0xffffffffffff7fc0ull); + + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); +} + static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) { TCGv_i64 t_max = tcg_constant_i64(0xffffffff00000000ull); @@ -1213,9 +1222,11 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_rvbf16.c.inc" #include "decode-xthead.c.inc" #include "decode-xmips.c.inc" +#include "decode-xlrbr.c.inc" #include "insn_trans/trans_xthead.c.inc" #include "insn_trans/trans_xventanacondops.c.inc" #include "insn_trans/trans_xmips.c.inc" +#include "insn_trans/trans_xlrbr.c.inc" /* Include the auto-generated decoder for 16 bit insn */ #include "decode-insn16.c.inc" @@ -1235,6 +1246,7 @@ const RISCVDecoder decoder_table[] = { { has_xmips_p, decode_xmips}, { has_xthead_p, decode_xthead}, { has_XVentanaCondOps_p, decode_XVentanaCodeOps}, + { has_xlrbr_p, decode_xlrbr}, }; const size_t decoder_table_size = ARRAY_SIZE(decoder_table); @@ -1255,7 +1267,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx) * additional page fault. */ opcode = translator_ldl_end(env, &ctx->base, ctx->base.pc_next, - mo_endian(ctx)); + MO_LE); } else { /* * For unaligned pc, instruction preload may trigger additional @@ -1263,7 +1275,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx) */ opcode = (uint32_t) translator_lduw_end(env, &ctx->base, ctx->base.pc_next, - mo_endian(ctx)); + MO_LE); } ctx->ol = ctx->xl; @@ -1285,7 +1297,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx) opcode = deposit32(opcode, 16, 16, translator_lduw_end(env, &ctx->base, ctx->base.pc_next + 2, - mo_endian(ctx))); + MO_LE)); } ctx->opcode = opcode; @@ -1307,6 +1319,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cs); RISCVCPU *cpu = RISCV_CPU(cs); uint32_t tb_flags = ctx->base.tb->flags; + uint64_t ext_tb_flags = ctx->base.tb->cs_base; ctx->pc_save = ctx->base.pc_first; ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV); @@ -1326,6 +1339,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; ctx->vstart_eq_zero = FIELD_EX32(tb_flags, TB_FLAGS, VSTART_EQ_ZERO); ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); + ctx->altfmt = FIELD_EX64(ext_tb_flags, EXT_TB_FLAGS, ALTFMT); ctx->misa_mxl_max = mcc->def->misa_mxl_max; ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL); ctx->address_xl = FIELD_EX32(tb_flags, TB_FLAGS, AXL); @@ -1401,7 +1415,7 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) { uint16_t next_insn = translator_lduw_end(env, &ctx->base, ctx->base.pc_next, - mo_endian(ctx)); + MO_LE); int len = insn_len(next_insn); if (!translator_is_same_page(&ctx->base, ctx->base.pc_next + len - 1)) { diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 83dd26314d..5a3554dd71 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -33,6 +33,31 @@ #include "vector_internals.h" #include <math.h> +static target_ulong vtype_reserved(CPURISCVState *env, target_ulong vtype) +{ + int xlen = riscv_cpu_xlen(env); + target_ulong reserved = 0; + + if (riscv_cpu_cfg(env)->ext_zvfbfa) { + reserved = vtype & MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, + xlen - 1 - R_VTYPE_RESERVED_SHIFT); + } else { + reserved = vtype & MAKE_64BIT_MASK(R_VTYPE_ALTFMT_SHIFT, + xlen - 1 - R_VTYPE_ALTFMT_SHIFT); + } + + return reserved; +} + +static inline void reset_ill_vtype(CPURISCVState *env) +{ + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; +} + target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, target_ulong s2, target_ulong x0) { @@ -41,34 +66,42 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); uint16_t sew = 8 << vsew; - uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); + uint8_t altfmt = FIELD_EX64(s2, VTYPE, ALTFMT); + bool ill_altfmt = true; int xlen = riscv_cpu_xlen(env); bool vill = (s2 >> (xlen - 1)) & 0x1; - target_ulong reserved = s2 & - MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, - xlen - 1 - R_VTYPE_RESERVED_SHIFT); - uint16_t vlen = cpu->cfg.vlenb << 3; int8_t lmul; if (vlmul & 4) { /* * Fractional LMUL, check: * - * VLEN * LMUL >= SEW - * VLEN >> (8 - lmul) >= sew - * (vlenb << 3) >> (8 - lmul) >= sew + * ELEN * LMUL >= SEW + * ELEN >> (8 - vlmul) >= sew */ - if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { + if (vlmul == 4 || + (cpu->cfg.elen >> (8 - vlmul)) < sew) { vill = true; } } - if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { - /* only set vill bit. */ - env->vill = 1; - env->vtype = 0; - env->vl = 0; - env->vstart = 0; + switch (vsew) { + case MO_8: + ill_altfmt &= !(cpu->cfg.ext_zvfbfa); + break; + case MO_16: + ill_altfmt &= !(cpu->cfg.ext_zvfbfa); + break; + default: + break; + } + + if (altfmt && ill_altfmt) { + vill = true; + } + + if ((sew > cpu->cfg.elen) || vill || (vtype_reserved(env, s2) != 0)) { + reset_ill_vtype(env); return 0; } @@ -84,11 +117,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { - /* only set vill bit. */ - env->vill = 1; - env->vtype = 0; - env->vl = 0; - env->vstart = 0; + reset_ill_vtype(env); return 0; } @@ -3138,9 +3167,11 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ total_elems * ESZ); \ } +RVVCALL(OPFVV2, vfadd_vv_h_bf16, OP_UUU_H, H2, H2, H2, bfloat16_add) RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) +GEN_VEXT_VV_ENV(vfadd_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfadd_vv_h, 2) GEN_VEXT_VV_ENV(vfadd_vv_w, 4) GEN_VEXT_VV_ENV(vfadd_vv_d, 8) @@ -3183,26 +3214,37 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ total_elems * ESZ); \ } +RVVCALL(OPFVF2, vfadd_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_add) RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) +GEN_VEXT_VF(vfadd_vf_h_bf16, 2) GEN_VEXT_VF(vfadd_vf_h, 2) GEN_VEXT_VF(vfadd_vf_w, 4) GEN_VEXT_VF(vfadd_vf_d, 8) +RVVCALL(OPFVV2, vfsub_vv_h_bf16, OP_UUU_H, H2, H2, H2, bfloat16_sub) RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) +GEN_VEXT_VV_ENV(vfsub_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfsub_vv_h, 2) GEN_VEXT_VV_ENV(vfsub_vv_w, 4) GEN_VEXT_VV_ENV(vfsub_vv_d, 8) +RVVCALL(OPFVF2, vfsub_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_sub) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) +GEN_VEXT_VF(vfsub_vf_h_bf16, 2) GEN_VEXT_VF(vfsub_vf_h, 2) GEN_VEXT_VF(vfsub_vf_w, 4) GEN_VEXT_VF(vfsub_vf_d, 8) +static uint16_t bfloat16_rsub(uint16_t a, uint16_t b, float_status * s) +{ + return bfloat16_sub(b, a, s); +} + static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { return float16_sub(b, a, s); @@ -3218,14 +3260,22 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) return float64_sub(b, a, s); } +RVVCALL(OPFVF2, vfrsub_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_rsub) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) +GEN_VEXT_VF(vfrsub_vf_h_bf16, 2) GEN_VEXT_VF(vfrsub_vf_h, 2) GEN_VEXT_VF(vfrsub_vf_w, 4) GEN_VEXT_VF(vfrsub_vf_d, 8) /* Vector Widening Floating-Point Add/Subtract Instructions */ +static uint32_t vfwadd16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_add(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), s); +} + static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) { return float32_add(float16_to_float32(a, true, s), @@ -3239,15 +3289,25 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) } +RVVCALL(OPFVV2, vfwadd_vv_h_bf16, WOP_UUU_H, H4, H2, H2, vfwadd16_bf16) RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) +GEN_VEXT_VV_ENV(vfwadd_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) +RVVCALL(OPFVF2, vfwadd_vf_h_bf16, WOP_UUU_H, H4, H2, vfwadd16_bf16) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) +GEN_VEXT_VF(vfwadd_vf_h_bf16, 4) GEN_VEXT_VF(vfwadd_vf_h, 4) GEN_VEXT_VF(vfwadd_vf_w, 8) +static uint32_t vfwsub16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_sub(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), s); +} + static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { return float32_sub(float16_to_float32(a, true, s), @@ -3261,15 +3321,24 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) } +RVVCALL(OPFVV2, vfwsub_vv_h_bf16, WOP_UUU_H, H4, H2, H2, vfwsub16_bf16) RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) +GEN_VEXT_VV_ENV(vfwsub_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) +RVVCALL(OPFVF2, vfwsub_vf_h_bf16, WOP_UUU_H, H4, H2, vfwsub16_bf16) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) +GEN_VEXT_VF(vfwsub_vf_h_bf16, 4) GEN_VEXT_VF(vfwsub_vf_h, 4) GEN_VEXT_VF(vfwsub_vf_w, 8) +static uint32_t vfwaddw16_bf16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_add(a, bfloat16_to_float32(b, s), s); +} + static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { return float32_add(a, float16_to_float32(b, true, s), s); @@ -3280,15 +3349,24 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) return float64_add(a, float32_to_float64(b, s), s); } +RVVCALL(OPFVV2, vfwadd_wv_h_bf16, WOP_WUUU_H, H4, H2, H2, vfwaddw16_bf16) RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) +GEN_VEXT_VV_ENV(vfwadd_wv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) +RVVCALL(OPFVF2, vfwadd_wf_h_bf16, WOP_WUUU_H, H4, H2, vfwaddw16_bf16) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) +GEN_VEXT_VF(vfwadd_wf_h_bf16, 4) GEN_VEXT_VF(vfwadd_wf_h, 4) GEN_VEXT_VF(vfwadd_wf_w, 8) +static uint32_t vfwsubw16_bf16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_sub(a, bfloat16_to_float32(b, s), s); +} + static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { return float32_sub(a, float16_to_float32(b, true, s), s); @@ -3299,25 +3377,33 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) return float64_sub(a, float32_to_float64(b, s), s); } +RVVCALL(OPFVV2, vfwsub_wv_h_bf16, WOP_WUUU_H, H4, H2, H2, vfwsubw16_bf16) RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) +GEN_VEXT_VV_ENV(vfwsub_wv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) +RVVCALL(OPFVF2, vfwsub_wf_h_bf16, WOP_WUUU_H, H4, H2, vfwsubw16_bf16) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) +GEN_VEXT_VF(vfwsub_wf_h_bf16, 4) GEN_VEXT_VF(vfwsub_wf_h, 4) GEN_VEXT_VF(vfwsub_wf_w, 8) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +RVVCALL(OPFVV2, vfmul_vv_h_bf16, OP_UUU_H, H2, H2, H2, bfloat16_mul) RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) +GEN_VEXT_VV_ENV(vfmul_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmul_vv_h, 2) GEN_VEXT_VV_ENV(vfmul_vv_w, 4) GEN_VEXT_VV_ENV(vfmul_vv_d, 8) +RVVCALL(OPFVF2, vfmul_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_mul) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) +GEN_VEXT_VF(vfmul_vf_h_bf16, 2) GEN_VEXT_VF(vfmul_vf_h, 2) GEN_VEXT_VF(vfmul_vf_w, 4) GEN_VEXT_VF(vfmul_vf_d, 8) @@ -3358,6 +3444,12 @@ GEN_VEXT_VF(vfrdiv_vf_w, 4) GEN_VEXT_VF(vfrdiv_vf_d, 8) /* Vector Widening Floating-Point Multiply */ +static uint32_t vfwmul16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_mul(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), s); +} + static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) { return float32_mul(float16_to_float32(a, true, s), @@ -3370,12 +3462,17 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) float32_to_float64(b, s), s); } + +RVVCALL(OPFVV2, vfwmul_vv_h_bf16, WOP_UUU_H, H4, H2, H2, vfwmul16_bf16) RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) +GEN_VEXT_VV_ENV(vfwmul_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) +RVVCALL(OPFVF2, vfwmul_vf_h_bf16, WOP_UUU_H, H4, H2, vfwmul16_bf16) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) +GEN_VEXT_VF(vfwmul_vf_h_bf16, 4) GEN_VEXT_VF(vfwmul_vf_h, 4) GEN_VEXT_VF(vfwmul_vf_w, 8) @@ -3390,6 +3487,12 @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ } +static uint16_t fmacc16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(a, b, d, 0, s); +} + static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(a, b, d, 0, s); @@ -3405,9 +3508,11 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(a, b, d, 0, s); } +RVVCALL(OPFVV3, vfmacc_vv_h_bf16, OP_UUU_H, H2, H2, H2, fmacc16_bf16) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) +GEN_VEXT_VV_ENV(vfmacc_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) @@ -3421,13 +3526,22 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ } +RVVCALL(OPFVF3, vfmacc_vf_h_bf16, OP_UUU_H, H2, H2, fmacc16_bf16) RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) +GEN_VEXT_VF(vfmacc_vf_h_bf16, 2) GEN_VEXT_VF(vfmacc_vf_h, 2) GEN_VEXT_VF(vfmacc_vf_w, 4) GEN_VEXT_VF(vfmacc_vf_d, 8) +static uint16_t fnmacc16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); +} + static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(a, b, d, float_muladd_negate_c | @@ -3446,19 +3560,29 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfnmacc_vv_h_bf16, OP_UUU_H, H2, H2, H2, fnmacc16_bf16) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) +GEN_VEXT_VV_ENV(vfnmacc_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) +RVVCALL(OPFVF3, vfnmacc_vf_h_bf16, OP_UUU_H, H2, H2, fnmacc16_bf16) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) +GEN_VEXT_VF(vfnmacc_vf_h_bf16, 2) GEN_VEXT_VF(vfnmacc_vf_h, 2) GEN_VEXT_VF(vfnmacc_vf_w, 4) GEN_VEXT_VF(vfnmacc_vf_d, 8) +static uint16_t fmsac16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(a, b, d, float_muladd_negate_c, s); +} + static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(a, b, d, float_muladd_negate_c, s); @@ -3474,19 +3598,29 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(a, b, d, float_muladd_negate_c, s); } +RVVCALL(OPFVV3, vfmsac_vv_h_bf16, OP_UUU_H, H2, H2, H2, fmsac16_bf16) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) +GEN_VEXT_VV_ENV(vfmsac_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) +RVVCALL(OPFVF3, vfmsac_vf_h_bf16, OP_UUU_H, H2, H2, fmsac16_bf16) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) +GEN_VEXT_VF(vfmsac_vf_h_bf16, 2) GEN_VEXT_VF(vfmsac_vf_h, 2) GEN_VEXT_VF(vfmsac_vf_w, 4) GEN_VEXT_VF(vfmsac_vf_d, 8) +static uint16_t fnmsac16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(a, b, d, float_muladd_negate_product, s); +} + static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(a, b, d, float_muladd_negate_product, s); @@ -3502,19 +3636,29 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(a, b, d, float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfnmsac_vv_h_bf16, OP_UUU_H, H2, H2, H2, fnmsac16_bf16) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) +GEN_VEXT_VV_ENV(vfnmsac_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) +RVVCALL(OPFVF3, vfnmsac_vf_h_bf16, OP_UUU_H, H2, H2, fnmsac16_bf16) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) +GEN_VEXT_VF(vfnmsac_vf_h_bf16, 2) GEN_VEXT_VF(vfnmsac_vf_h, 2) GEN_VEXT_VF(vfnmsac_vf_w, 4) GEN_VEXT_VF(vfnmsac_vf_d, 8) +static uint16_t fmadd16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(d, b, a, 0, s); +} + static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(d, b, a, 0, s); @@ -3530,19 +3674,30 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(d, b, a, 0, s); } +RVVCALL(OPFVV3, vfmadd_vv_h_bf16, OP_UUU_H, H2, H2, H2, fmadd16_bf16) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) +GEN_VEXT_VV_ENV(vfmadd_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) +RVVCALL(OPFVF3, vfmadd_vf_h_bf16, OP_UUU_H, H2, H2, fmadd16_bf16) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) +GEN_VEXT_VF(vfmadd_vf_h_bf16, 2) GEN_VEXT_VF(vfmadd_vf_h, 2) GEN_VEXT_VF(vfmadd_vf_w, 4) GEN_VEXT_VF(vfmadd_vf_d, 8) +static uint16_t fnmadd16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); +} + static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(d, b, a, float_muladd_negate_c | @@ -3561,19 +3716,29 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfnmadd_vv_h_bf16, OP_UUU_H, H2, H2, H2, fnmadd16_bf16) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) +GEN_VEXT_VV_ENV(vfnmadd_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) +RVVCALL(OPFVF3, vfnmadd_vf_h_bf16, OP_UUU_H, H2, H2, fnmadd16_bf16) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) +GEN_VEXT_VF(vfnmadd_vf_h_bf16, 2) GEN_VEXT_VF(vfnmadd_vf_h, 2) GEN_VEXT_VF(vfnmadd_vf_w, 4) GEN_VEXT_VF(vfnmadd_vf_d, 8) +static uint16_t fmsub16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(d, b, a, float_muladd_negate_c, s); +} + static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(d, b, a, float_muladd_negate_c, s); @@ -3589,19 +3754,29 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(d, b, a, float_muladd_negate_c, s); } +RVVCALL(OPFVV3, vfmsub_vv_h_bf16, OP_UUU_H, H2, H2, H2, fmsub16_bf16) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) +GEN_VEXT_VV_ENV(vfmsub_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) +RVVCALL(OPFVF3, vfmsub_vf_h_bf16, OP_UUU_H, H2, H2, fmsub16_bf16) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) +GEN_VEXT_VF(vfmsub_vf_h_bf16, 2) GEN_VEXT_VF(vfmsub_vf_h, 2) GEN_VEXT_VF(vfmsub_vf_w, 4) GEN_VEXT_VF(vfmsub_vf_d, 8) +static uint16_t fnmsub16_bf16(uint16_t a, uint16_t b, uint16_t d, + float_status *s) +{ + return bfloat16_muladd(d, b, a, float_muladd_negate_product, s); +} + static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { return float16_muladd(d, b, a, float_muladd_negate_product, s); @@ -3617,15 +3792,19 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) return float64_muladd(d, b, a, float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfnmsub_vv_h_bf16, OP_UUU_H, H2, H2, H2, fnmsub16_bf16) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) +GEN_VEXT_VV_ENV(vfnmsub_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) +RVVCALL(OPFVF3, vfnmsub_vf_h_bf16, OP_UUU_H, H2, H2, fnmsub16_bf16) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) +GEN_VEXT_VF(vfnmsub_vf_h_bf16, 2) GEN_VEXT_VF(vfnmsub_vf_h, 2) GEN_VEXT_VF(vfnmsub_vf_w, 4) GEN_VEXT_VF(vfnmsub_vf_d, 8) @@ -3663,6 +3842,15 @@ GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) GEN_VEXT_VF(vfwmaccbf16_vf, 4) +static uint32_t fwnmacc16_bf16(uint16_t a, uint16_t b, uint32_t d, + float_status *s) +{ + return float32_muladd(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), d, + float_muladd_negate_c | float_muladd_negate_product, + s); +} + static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), @@ -3678,15 +3866,27 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfwnmacc_vv_h_bf16, WOP_UUU_H, H4, H2, H2, fwnmacc16_bf16) RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) +RVVCALL(OPFVF3, vfwnmacc_vf_h_bf16, WOP_UUU_H, H4, H2, fwnmacc16_bf16) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) +GEN_VEXT_VF(vfwnmacc_vf_h_bf16, 4) GEN_VEXT_VF(vfwnmacc_vf_h, 4) GEN_VEXT_VF(vfwnmacc_vf_w, 8) +static uint32_t fwmsac16_bf16(uint16_t a, uint16_t b, uint32_t d, + float_status *s) +{ + return float32_muladd(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), d, + float_muladd_negate_c, s); +} + static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), @@ -3701,15 +3901,27 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) float_muladd_negate_c, s); } +RVVCALL(OPFVV3, vfwmsac_vv_h_bf16, WOP_UUU_H, H4, H2, H2, fwmsac16_bf16) RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) +GEN_VEXT_VV_ENV(vfwmsac_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) +RVVCALL(OPFVF3, vfwmsac_vf_h_bf16, WOP_UUU_H, H4, H2, fwmsac16_bf16) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) +GEN_VEXT_VF(vfwmsac_vf_h_bf16, 4) GEN_VEXT_VF(vfwmsac_vf_h, 4) GEN_VEXT_VF(vfwmsac_vf_w, 8) +static uint32_t fwnmsac16_bf16(uint16_t a, uint16_t b, uint32_t d, + float_status *s) +{ + return float32_muladd(bfloat16_to_float32(a, s), + bfloat16_to_float32(b, s), d, + float_muladd_negate_product, s); +} + static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), @@ -3724,12 +3936,16 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) float_muladd_negate_product, s); } +RVVCALL(OPFVV3, vfwnmsac_vv_h_bf16, WOP_UUU_H, H4, H2, H2, fwnmsac16_bf16) RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h_bf16, 4) GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) +RVVCALL(OPFVF3, vfwnmsac_vf_h_bf16, WOP_UUU_H, H4, H2, fwnmsac16_bf16) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) +GEN_VEXT_VF(vfwnmsac_vf_h_bf16, 4) GEN_VEXT_VF(vfwnmsac_vf_h, 4) GEN_VEXT_VF(vfwnmsac_vf_w, 8) @@ -3835,6 +4051,46 @@ static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) return val; } +static bfloat16 frsqrt7_h_bf16(bfloat16 f, float_status *s) +{ + int exp_size = 8, frac_size = 7; + bool sign = bfloat16_is_neg(f); + + /* + * frsqrt7(sNaN) = canonical NaN + * frsqrt7(-inf) = canonical NaN + * frsqrt7(-normal) = canonical NaN + * frsqrt7(-subnormal) = canonical NaN + */ + if (bfloat16_is_signaling_nan(f, s) || + (bfloat16_is_infinity(f) && sign) || + (bfloat16_is_normal(f) && sign) || + (bfloat16_is_zero_or_denormal(f) && !bfloat16_is_zero(f) && sign)) { + s->float_exception_flags |= float_flag_invalid; + return bfloat16_default_nan(s); + } + + /* frsqrt7(qNaN) = canonical NaN */ + if (bfloat16_is_quiet_nan(f, s)) { + return bfloat16_default_nan(s); + } + + /* frsqrt7(+-0) = +-inf */ + if (bfloat16_is_zero(f)) { + s->float_exception_flags |= float_flag_divbyzero; + return bfloat16_set_sign(bfloat16_infinity, sign); + } + + /* frsqrt7(+inf) = +0 */ + if (bfloat16_is_infinity(f) && !sign) { + return bfloat16_set_sign(bfloat16_zero, sign); + } + + /* +normal, +subnormal */ + uint64_t val = frsqrt7(f, exp_size, frac_size); + return make_float16(val); +} + static float16 frsqrt7_h(float16 f, float_status *s) { int exp_size = 5, frac_size = 10; @@ -3955,9 +4211,11 @@ static float64 frsqrt7_d(float64 f, float_status *s) return make_float64(val); } +RVVCALL(OPFVV1, vfrsqrt7_v_h_bf16, OP_UU_H, H2, H2, frsqrt7_h_bf16) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) +GEN_VEXT_V_ENV(vfrsqrt7_v_h_bf16, 2) GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) @@ -4050,6 +4308,38 @@ static uint64_t frec7(uint64_t f, int exp_size, int frac_size, return val; } +static bfloat16 frec7_h_bf16(bfloat16 f, float_status *s) +{ + int exp_size = 8, frac_size = 7; + bool sign = bfloat16_is_neg(f); + + /* frec7(+-inf) = +-0 */ + if (bfloat16_is_infinity(f)) { + return bfloat16_set_sign(bfloat16_zero, sign); + } + + /* frec7(+-0) = +-inf */ + if (bfloat16_is_zero(f)) { + s->float_exception_flags |= float_flag_divbyzero; + return bfloat16_set_sign(bfloat16_infinity, sign); + } + + /* frec7(sNaN) = canonical NaN */ + if (bfloat16_is_signaling_nan(f, s)) { + s->float_exception_flags |= float_flag_invalid; + return bfloat16_default_nan(s); + } + + /* frec7(qNaN) = canonical NaN */ + if (bfloat16_is_quiet_nan(f, s)) { + return bfloat16_default_nan(s); + } + + /* +-normal, +-subnormal */ + uint64_t val = frec7(f, exp_size, frac_size, s); + return make_float16(val); +} + static float16 frec7_h(float16 f, float_status *s) { int exp_size = 5, frac_size = 10; @@ -4146,36 +4436,46 @@ static float64 frec7_d(float64 f, float_status *s) return make_float64(val); } +RVVCALL(OPFVV1, vfrec7_v_h_bf16, OP_UU_H, H2, H2, frec7_h_bf16) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) +GEN_VEXT_V_ENV(vfrec7_v_h_bf16, 2) GEN_VEXT_V_ENV(vfrec7_v_h, 2) GEN_VEXT_V_ENV(vfrec7_v_w, 4) GEN_VEXT_V_ENV(vfrec7_v_d, 8) /* Vector Floating-Point MIN/MAX Instructions */ +RVVCALL(OPFVV2, vfmin_vv_h_bf16, OP_UUU_H, H2, H2, H2, bfloat16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) +GEN_VEXT_VV_ENV(vfmin_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmin_vv_h, 2) GEN_VEXT_VV_ENV(vfmin_vv_w, 4) GEN_VEXT_VV_ENV(vfmin_vv_d, 8) +RVVCALL(OPFVF2, vfmin_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) +GEN_VEXT_VF(vfmin_vf_h_bf16, 2) GEN_VEXT_VF(vfmin_vf_h, 2) GEN_VEXT_VF(vfmin_vf_w, 4) GEN_VEXT_VF(vfmin_vf_d, 8) +RVVCALL(OPFVV2, vfmax_vv_h_bf16, OP_UUU_H, H2, H2, H2, bfloat16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) +GEN_VEXT_VV_ENV(vfmax_vv_h_bf16, 2) GEN_VEXT_VV_ENV(vfmax_vv_h, 2) GEN_VEXT_VV_ENV(vfmax_vv_w, 4) GEN_VEXT_VV_ENV(vfmax_vv_d, 8) +RVVCALL(OPFVF2, vfmax_vf_h_bf16, OP_UUU_H, H2, H2, bfloat16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) +GEN_VEXT_VF(vfmax_vf_h_bf16, 2) GEN_VEXT_VF(vfmax_vf_h, 2) GEN_VEXT_VF(vfmax_vf_w, 4) GEN_VEXT_VF(vfmax_vf_d, 8) @@ -4304,6 +4604,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ } +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h_bf16, uint16_t, H2, bfloat16_eq_quiet) GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) @@ -4345,10 +4646,17 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ } \ } +GEN_VEXT_CMP_VF(vmfeq_vf_h_bf16, uint16_t, H2, bfloat16_eq_quiet) GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) +static bool vmfne16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = bfloat16_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + static bool vmfne16(uint16_t a, uint16_t b, float_status *s) { FloatRelation compare = float16_compare_quiet(a, b, s); @@ -4367,27 +4675,39 @@ static bool vmfne64(uint64_t a, uint64_t b, float_status *s) return compare != float_relation_equal; } +GEN_VEXT_CMP_VV_ENV(vmfne_vv_h_bf16, uint16_t, H2, vmfne16_bf16) GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) +GEN_VEXT_CMP_VF(vmfne_vf_h_bf16, uint16_t, H2, vmfne16_bf16) GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_h_bf16, uint16_t, H2, bfloat16_lt) GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) +GEN_VEXT_CMP_VF(vmflt_vf_h_bf16, uint16_t, H2, bfloat16_lt) GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_h_bf16, uint16_t, H2, bfloat16_le) GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) +GEN_VEXT_CMP_VF(vmfle_vf_h_bf16, uint16_t, H2, bfloat16_le) GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) +static bool vmfgt16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = bfloat16_compare(a, b, s); + return compare == float_relation_greater; +} + static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) { FloatRelation compare = float16_compare(a, b, s); @@ -4406,10 +4726,18 @@ static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) return compare == float_relation_greater; } +GEN_VEXT_CMP_VF(vmfgt_vf_h_bf16, uint16_t, H2, vmfgt16_bf16) GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) +static bool vmfge16_bf16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = bfloat16_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + static bool vmfge16(uint16_t a, uint16_t b, float_status *s) { FloatRelation compare = float16_compare(a, b, s); @@ -4431,11 +4759,31 @@ static bool vmfge64(uint64_t a, uint64_t b, float_status *s) compare == float_relation_equal; } +GEN_VEXT_CMP_VF(vmfge_vf_h_bf16, uint16_t, H2, vmfge16_bf16) GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) /* Vector Floating-Point Classify Instruction */ +target_ulong fclass_h_bf16(uint64_t frs1) +{ + bfloat16 f = frs1; + bool sign = bfloat16_is_neg(f); + + if (bfloat16_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (bfloat16_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (bfloat16_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (bfloat16_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return bfloat16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + target_ulong fclass_h(uint64_t frs1) { float16 f = frs1; @@ -4493,9 +4841,11 @@ target_ulong fclass_d(uint64_t frs1) } } +RVVCALL(OPIVV1, vfclass_v_h_bf16, OP_UU_H, H2, H2, fclass_h_bf16) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) +GEN_VEXT_V(vfclass_v_h_bf16, 2) GEN_VEXT_V(vfclass_v_h, 2) GEN_VEXT_V(vfclass_v_w, 4) GEN_VEXT_V(vfclass_v_d, 8) @@ -4586,17 +4936,21 @@ GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) +RVVCALL(OPFVV1, vfwcvt_f_xu_v_b_bf16, WOP_UU_B, H2, H1, uint8_to_bfloat16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b_bf16, 2) GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) +RVVCALL(OPFVV1, vfwcvt_f_x_v_b_bf16, WOP_UU_B, H2, H1, int8_to_bfloat16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b_bf16, 2) GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) @@ -4623,17 +4977,21 @@ GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) #define NOP_UU_W uint32_t, uint64_t, uint64_t /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) +RVVCALL(OPFVV1, vfncvt_xu_f_w_b_bf16, NOP_UU_B, H1, H2, bfloat16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b_bf16, 1) GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) +RVVCALL(OPFVV1, vfncvt_x_f_w_b_bf16, NOP_UU_B, H1, H2, bfloat16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b_bf16, 1) GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) diff --git a/target/riscv/xlrbr.decode b/target/riscv/xlrbr.decode new file mode 100644 index 0000000000..893ce6ec71 --- /dev/null +++ b/target/riscv/xlrbr.decode @@ -0,0 +1,30 @@ +# +# Translation routines for the instructions of the xlrbr ISA extension +# (matching the draft encodings in the standard reserved encoding space for the +# unratified Zbr CRC32 bitmanip extension version 0.93). +# +# Copyright (c) 2026 Rivos Inc. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +# Fields: +%rs1 15:5 +%rd 7:5 + +# Argument sets: +&r2 rd rs1 !extern + +# Formats 32: +@r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd + +# *** RV32 xlrbr extension *** +crc32_b 0110000 10000 ..... 001 ..... 0010011 @r2 +crc32_h 0110000 10001 ..... 001 ..... 0010011 @r2 +crc32_w 0110000 10010 ..... 001 ..... 0010011 @r2 +crc32c_b 0110000 11000 ..... 001 ..... 0010011 @r2 +crc32c_h 0110000 11001 ..... 001 ..... 0010011 @r2 +crc32c_w 0110000 11010 ..... 001 ..... 0010011 @r2 + +# *** RV64 xlrbr extension (in addition to RV32) *** +crc32_d 0110000 10011 ..... 001 ..... 0010011 @r2 +crc32c_d 0110000 11011 ..... 001 ..... 0010011 @r2 diff --git a/tests/tcg/riscv64/Makefile.softmmu-target b/tests/tcg/riscv64/Makefile.softmmu-target index eb1ce6504a..82be8a2c91 100644 --- a/tests/tcg/riscv64/Makefile.softmmu-target +++ b/tests/tcg/riscv64/Makefile.softmmu-target @@ -36,5 +36,10 @@ run-plugin-interruptedmemory: interruptedmemory $(QEMU) -plugin ../plugins/libdiscons.so -d plugin -D $<.pout \ $(QEMU_OPTS)$<) +EXTRA_RUNS += run-test-crc32 +comma:= , +run-test-crc32: test-crc32 + $(call run-test, $<, $(QEMU) -cpu rv64$(comma)xlrbr=true $(QEMU_OPTS)$<) + # We don't currently support the multiarch system tests undefine MULTIARCH_TESTS diff --git a/tests/tcg/riscv64/test-crc32.S b/tests/tcg/riscv64/test-crc32.S new file mode 100644 index 0000000000..70d70b16a9 --- /dev/null +++ b/tests/tcg/riscv64/test-crc32.S @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2026 lowRISC CIC + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#define crc32(op, rd, rs1) .insn r 19, 1, 48, rd, rs1, x##op + +#define crc32_b(rd, rs1) crc32(16, rd, rs1) +#define crc32_h(rd, rs1) crc32(17, rd, rs1) +#define crc32_w(rd, rs1) crc32(18, rd, rs1) +#define crc32_d(rd, rs1) crc32(19, rd, rs1) +#define crc32c_b(rd, rs1) crc32(24, rd, rs1) +#define crc32c_h(rd, rs1) crc32(25, rd, rs1) +#define crc32c_w(rd, rs1) crc32(26, rd, rs1) +#define crc32c_d(rd, rs1) crc32(27, rd, rs1) + + .option norvc + + .text + .globl _start +_start: + lla t0, trap + csrw mtvec, t0 + + li t0, 0x34e24a2cd65650d4 + + crc32_b (t0, t0) + crc32_h (t0, t0) + crc32_w (t0, t0) + crc32_d (t0, t0) + crc32c_b (t0, t0) + crc32c_h (t0, t0) + crc32c_w (t0, t0) + crc32c_d (t0, t0) + + li t1, 0x68167e78 + + li a0, 0 + beq t0, t1, _exit +fail: + li a0, 1 +_exit: + lla a1, semiargs + li t0, 0x20026 # ADP_Stopped_ApplicationExit + sd t0, 0(a1) + sd a0, 8(a1) + li a0, 0x20 # TARGET_SYS_EXIT_EXTENDED + + # Semihosting call sequence + .balign 16 + slli zero, zero, 0x1f + ebreak + srai zero, zero, 0x7 + j . + + .data + .balign 16 +semiargs: + .space 16 + +trap: + csrr t0, mepc + addi t0, t0, 4 + mret diff --git a/util/crc32.c b/util/crc32.c new file mode 100644 index 0000000000..a0f91ff09c --- /dev/null +++ b/util/crc32.c @@ -0,0 +1,81 @@ +/* + * Constants for computing CRC32 checksums + * + * Copyright (c) 2026 QEMU contributors + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/crc32.h" + +/* + * CRC-32 table (reversed; polynomial 0xEDB88320). + */ + +const uint32_t crc32_table[256] = { + 0x00000000u, 0x77073096u, 0xee0e612cu, 0x990951bau, + 0x076dc419u, 0x706af48fu, 0xe963a535u, 0x9e6495a3u, + 0x0edb8832u, 0x79dcb8a4u, 0xe0d5e91eu, 0x97d2d988u, + 0x09b64c2bu, 0x7eb17cbdu, 0xe7b82d07u, 0x90bf1d91u, + 0x1db71064u, 0x6ab020f2u, 0xf3b97148u, 0x84be41deu, + 0x1adad47du, 0x6ddde4ebu, 0xf4d4b551u, 0x83d385c7u, + 0x136c9856u, 0x646ba8c0u, 0xfd62f97au, 0x8a65c9ecu, + 0x14015c4fu, 0x63066cd9u, 0xfa0f3d63u, 0x8d080df5u, + 0x3b6e20c8u, 0x4c69105eu, 0xd56041e4u, 0xa2677172u, + 0x3c03e4d1u, 0x4b04d447u, 0xd20d85fdu, 0xa50ab56bu, + 0x35b5a8fau, 0x42b2986cu, 0xdbbbc9d6u, 0xacbcf940u, + 0x32d86ce3u, 0x45df5c75u, 0xdcd60dcfu, 0xabd13d59u, + 0x26d930acu, 0x51de003au, 0xc8d75180u, 0xbfd06116u, + 0x21b4f4b5u, 0x56b3c423u, 0xcfba9599u, 0xb8bda50fu, + 0x2802b89eu, 0x5f058808u, 0xc60cd9b2u, 0xb10be924u, + 0x2f6f7c87u, 0x58684c11u, 0xc1611dabu, 0xb6662d3du, + 0x76dc4190u, 0x01db7106u, 0x98d220bcu, 0xefd5102au, + 0x71b18589u, 0x06b6b51fu, 0x9fbfe4a5u, 0xe8b8d433u, + 0x7807c9a2u, 0x0f00f934u, 0x9609a88eu, 0xe10e9818u, + 0x7f6a0dbbu, 0x086d3d2du, 0x91646c97u, 0xe6635c01u, + 0x6b6b51f4u, 0x1c6c6162u, 0x856530d8u, 0xf262004eu, + 0x6c0695edu, 0x1b01a57bu, 0x8208f4c1u, 0xf50fc457u, + 0x65b0d9c6u, 0x12b7e950u, 0x8bbeb8eau, 0xfcb9887cu, + 0x62dd1ddfu, 0x15da2d49u, 0x8cd37cf3u, 0xfbd44c65u, + 0x4db26158u, 0x3ab551ceu, 0xa3bc0074u, 0xd4bb30e2u, + 0x4adfa541u, 0x3dd895d7u, 0xa4d1c46du, 0xd3d6f4fbu, + 0x4369e96au, 0x346ed9fcu, 0xad678846u, 0xda60b8d0u, + 0x44042d73u, 0x33031de5u, 0xaa0a4c5fu, 0xdd0d7cc9u, + 0x5005713cu, 0x270241aau, 0xbe0b1010u, 0xc90c2086u, + 0x5768b525u, 0x206f85b3u, 0xb966d409u, 0xce61e49fu, + 0x5edef90eu, 0x29d9c998u, 0xb0d09822u, 0xc7d7a8b4u, + 0x59b33d17u, 0x2eb40d81u, 0xb7bd5c3bu, 0xc0ba6cadu, + 0xedb88320u, 0x9abfb3b6u, 0x03b6e20cu, 0x74b1d29au, + 0xead54739u, 0x9dd277afu, 0x04db2615u, 0x73dc1683u, + 0xe3630b12u, 0x94643b84u, 0x0d6d6a3eu, 0x7a6a5aa8u, + 0xe40ecf0bu, 0x9309ff9du, 0x0a00ae27u, 0x7d079eb1u, + 0xf00f9344u, 0x8708a3d2u, 0x1e01f268u, 0x6906c2feu, + 0xf762575du, 0x806567cbu, 0x196c3671u, 0x6e6b06e7u, + 0xfed41b76u, 0x89d32be0u, 0x10da7a5au, 0x67dd4accu, + 0xf9b9df6fu, 0x8ebeeff9u, 0x17b7be43u, 0x60b08ed5u, + 0xd6d6a3e8u, 0xa1d1937eu, 0x38d8c2c4u, 0x4fdff252u, + 0xd1bb67f1u, 0xa6bc5767u, 0x3fb506ddu, 0x48b2364bu, + 0xd80d2bdau, 0xaf0a1b4cu, 0x36034af6u, 0x41047a60u, + 0xdf60efc3u, 0xa867df55u, 0x316e8eefu, 0x4669be79u, + 0xcb61b38cu, 0xbc66831au, 0x256fd2a0u, 0x5268e236u, + 0xcc0c7795u, 0xbb0b4703u, 0x220216b9u, 0x5505262fu, + 0xc5ba3bbeu, 0xb2bd0b28u, 0x2bb45a92u, 0x5cb36a04u, + 0xc2d7ffa7u, 0xb5d0cf31u, 0x2cd99e8bu, 0x5bdeae1du, + 0x9b64c2b0u, 0xec63f226u, 0x756aa39cu, 0x026d930au, + 0x9c0906a9u, 0xeb0e363fu, 0x72076785u, 0x05005713u, + 0x95bf4a82u, 0xe2b87a14u, 0x7bb12baeu, 0x0cb61b38u, + 0x92d28e9bu, 0xe5d5be0du, 0x7cdcefb7u, 0x0bdbdf21u, + 0x86d3d2d4u, 0xf1d4e242u, 0x68ddb3f8u, 0x1fda836eu, + 0x81be16cdu, 0xf6b9265bu, 0x6fb077e1u, 0x18b74777u, + 0x88085ae6u, 0xff0f6a70u, 0x66063bcau, 0x11010b5cu, + 0x8f659effu, 0xf862ae69u, 0x616bffd3u, 0x166ccf45u, + 0xa00ae278u, 0xd70dd2eeu, 0x4e048354u, 0x3903b3c2u, + 0xa7672661u, 0xd06016f7u, 0x4969474du, 0x3e6e77dbu, + 0xaed16a4au, 0xd9d65adcu, 0x40df0b66u, 0x37d83bf0u, + 0xa9bcae53u, 0xdebb9ec5u, 0x47b2cf7fu, 0x30b5ffe9u, + 0xbdbdf21cu, 0xcabac28au, 0x53b39330u, 0x24b4a3a6u, + 0xbad03605u, 0xcdd70693u, 0x54de5729u, 0x23d967bfu, + 0xb3667a2eu, 0xc4614ab8u, 0x5d681b02u, 0x2a6f2b94u, + 0xb40bbe37u, 0xc30c8ea1u, 0x5a05df1bu, 0x2d02ef8du +}; diff --git a/util/crc32c.c b/util/crc32c.c index ea7f345de8..f40597f80d 100644 --- a/util/crc32c.c +++ b/util/crc32c.c @@ -1,7 +1,7 @@ /* * Castagnoli CRC32C Checksum Algorithm * - * Polynomial: 0x11EDC6F41 + * Polynomial: 0x1EDC6F41 * * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman * "Optimization of Cyclic Redundancy-Check Codes with 24 @@ -37,7 +37,7 @@ * reflect output bytes = true */ -static const uint32_t crc32c_table[256] = { +const uint32_t crc32c_table[256] = { 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, diff --git a/util/meson.build b/util/meson.build index 8bed0267c0..e29cbd948a 100644 --- a/util/meson.build +++ b/util/meson.build @@ -45,6 +45,7 @@ util_ss.add(files('id.c')) util_ss.add(files('qemu-config.c', 'notify.c')) util_ss.add(files('qemu-option.c', 'qemu-progress.c')) util_ss.add(files('keyval.c')) +util_ss.add(files('crc32.c')) util_ss.add(files('crc32c.c')) util_ss.add(files('uuid.c')) util_ss.add(files('getauxval.c')) |
