diff options
author | Andrew Waterman <andrew@sifive.com> | 2025-08-05 19:03:43 -0700 |
---|---|---|
committer | Andrew Waterman <andrew@sifive.com> | 2025-08-05 19:03:43 -0700 |
commit | 107adc15a1e7ab2d83bc95e359f460a998b72d5d (patch) | |
tree | 3dfc0ce49fc6567fb795f309db7163147cb44a21 | |
parent | 113ef64f6724b769ef9298e6a4a9802f7671021f (diff) | |
download | riscv-isa-sim-zvmm.zip riscv-isa-sim-zvmm.tar.gz riscv-isa-sim-zvmm.tar.bz2 |
Implement Zvbdot draftzvmm
-rw-r--r-- | disasm/isa_parser.cc | 8 | ||||
-rw-r--r-- | riscv/encoding.h | 51 | ||||
-rw-r--r-- | riscv/insns/vfbdot_vv.h | 13 | ||||
-rw-r--r-- | riscv/insns/vfwbdot_vv.h | 17 | ||||
-rw-r--r-- | riscv/insns/vqbdots_vv.h | 23 | ||||
-rw-r--r-- | riscv/insns/vqbdotu_vv.h | 23 | ||||
-rw-r--r-- | riscv/isa_parser.h | 4 | ||||
-rw-r--r-- | riscv/riscv.mk.in | 7 | ||||
-rw-r--r-- | riscv/v_ext_macros.h | 45 | ||||
-rw-r--r-- | riscv/vector_unit.cc | 9 | ||||
-rw-r--r-- | riscv/vector_unit.h | 2 |
11 files changed, 195 insertions, 7 deletions
diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 24eb5f2..71eea59 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -326,6 +326,14 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvqdotq") { extension_table[EXT_ZVQDOTQ] = true; + } else if (ext_str == "zvqbdot8i") { + extension_table[EXT_ZVQBDOT8I] = true; + } else if (ext_str == "zvqbdot16i") { + extension_table[EXT_ZVQBDOT16I] = true; + } else if (ext_str == "zvfwbdot16bf") { + extension_table[EXT_ZVFWBDOT16BF] = true; + } else if (ext_str == "zvfbdot32f") { + extension_table[EXT_ZVFBDOT32F] = true; } else if (ext_str == "zvkt") { } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; diff --git a/riscv/encoding.h b/riscv/encoding.h index bcc1ace..225aabb 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (8899b32) + * https://github.com/riscv/riscv-opcodes (1d4b38c) */ #ifndef RISCV_CSR_ENCODING_H @@ -199,6 +199,7 @@ #define MSTATEEN0_CTR 0x0040000000000000 #define MSTATEEN0_PRIV113 0x0100000000000000 #define MSTATEEN0_PRIV114 0x0080000000000000 +#define MSTATEEN0_IMSIC 0x0400000000000000 #define MSTATEEN0_HCONTEXT 0x0200000000000000 #define MSTATEEN0_AIA 0x0800000000000000 #define MSTATEEN0_CSRIND 0x1000000000000000 @@ -208,6 +209,7 @@ #define MSTATEEN0H_CTR 0x00400000 #define MSTATEEN0H_PRIV113 0x01000000 #define MSTATEEN0H_PRIV114 0x00800000 +#define MSTATEEN0H_IMSIC 0x04000000 #define MSTATEEN0H_HCONTEXT 0x02000000 #define MSTATEEN0H_AIA 0x08000000 #define MSTATEEN0H_CSRIND 0x10000000 @@ -268,11 +270,25 @@ #define SISELECT_SMCDELEG_HPMEVENT_3 0x43 #define SISELECT_SMCDELEG_END 0x5f +#define MISELECT_IPRIO 0x30 +#define MISELECT_IPRIO_TOP 0x3f +#define MISELECT_IMSIC 0x70 +#define MISELECT_IMSIC_TOP 0xff + +#define SISELECT_IPRIO 0x30 +#define SISELECT_IPRIO_TOP 0x3f +#define SISELECT_IMSIC 0x70 +#define SISELECT_IMSIC_TOP 0xff + +#define VSISELECT_IMSIC 0x70 +#define VSISELECT_IMSIC_TOP 0xff + #define HSTATEEN0_CS 0x00000001 #define HSTATEEN0_FCSR 0x00000002 #define HSTATEEN0_JVT 0x00000004 #define HSTATEEN0_CTR 0x0040000000000000 #define HSTATEEN0_SCONTEXT 0x0200000000000000 +#define HSTATEEN0_IMSIC 0x0400000000000000 #define HSTATEEN0_AIA 0x0800000000000000 #define HSTATEEN0_CSRIND 0x1000000000000000 #define HSTATEEN0_SENVCFG 0x4000000000000000 @@ -280,6 +296,7 @@ #define HSTATEEN0H_CTR 0x00400000 #define HSTATEEN0H_SCONTEXT 0x02000000 +#define HSTATEEN0H_IMSIC 0x04000000 #define HSTATEEN0H_AIA 0x08000000 #define HSTATEEN0H_CSRIND 0x10000000 #define HSTATEEN0H_SENVCFG 0x40000000 @@ -1671,6 +1688,8 @@ #define MASK_VFADD_VF 0xfc00707f #define MATCH_VFADD_VV 0x1057 #define MASK_VFADD_VV 0xfc00707f +#define MATCH_VFBDOT_VV 0xac001077 +#define MASK_VFBDOT_VV 0xfc00707f #define MATCH_VFCLASS_V 0x4c081057 #define MASK_VFCLASS_V 0xfc0ff07f #define MATCH_VFCVT_F_X_V 0x48019057 @@ -1743,8 +1762,14 @@ #define MASK_VFNCVT_X_F_W 0xfc0ff07f #define MATCH_VFNCVT_XU_F_W 0x48081057 #define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVTBF16_F_F_Q 0x480c9057 +#define MASK_VFNCVTBF16_F_F_Q 0xfc0ff07f #define MATCH_VFNCVTBF16_F_F_W 0x480e9057 #define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f +#define MATCH_VFNCVTBF16_SAT_F_F_Q 0x480d9057 +#define MASK_VFNCVTBF16_SAT_F_F_Q 0xfc0ff07f +#define MATCH_VFNCVTBF16_SAT_F_F_W 0x480f9057 +#define MASK_VFNCVTBF16_SAT_F_F_W 0xfc0ff07f #define MATCH_VFNMACC_VF 0xb4005057 #define MASK_VFNMACC_VF 0xfc00707f #define MATCH_VFNMACC_VV 0xb4001057 @@ -1807,6 +1832,8 @@ #define MASK_VFWADD_WF 0xfc00707f #define MATCH_VFWADD_WV 0xd0001057 #define MASK_VFWADD_WV 0xfc00707f +#define MATCH_VFWBDOT_VV 0xb0001077 +#define MASK_VFWBDOT_VV 0xfc00707f #define MATCH_VFWCVT_F_F_V 0x48061057 #define MASK_VFWCVT_F_F_V 0xfc0ff07f #define MATCH_VFWCVT_F_X_V 0x48059057 @@ -2145,6 +2172,10 @@ #define MASK_VOR_VV 0xfc00707f #define MATCH_VOR_VX 0x28004057 #define MASK_VOR_VX 0xfc00707f +#define MATCH_VQBDOTS_VV 0xbc000077 +#define MASK_VQBDOTS_VV 0xfc00707f +#define MATCH_VQBDOTU_VV 0xb8000077 +#define MASK_VQBDOTU_VV 0xfc00707f #define MATCH_VQDOT_VV 0xb0002057 #define MASK_VQDOT_VV 0xfc00707f #define MATCH_VQDOT_VX 0xb0006057 @@ -2498,8 +2529,6 @@ #define CSR_VTYPE 0xc21 #define CSR_VLENB 0xc22 #define CSR_SSTATUS 0x100 -#define CSR_SEDELEG 0x102 -#define CSR_SIDELEG 0x103 #define CSR_SIE 0x104 #define CSR_STVEC 0x105 #define CSR_SCOUNTEREN 0x106 @@ -2967,7 +2996,6 @@ #define INSN_FIELD_IMM4 0xf00000 #define INSN_FIELD_IMM5 0x1f00000 #define INSN_FIELD_IMM6 0x3f00000 -#define INSN_FIELD_ZIMM 0xf8000 #define INSN_FIELD_OPCODE 0x7f #define INSN_FIELD_FUNCT7 0xfe000000 #define INSN_FIELD_VD 0xf80 @@ -3033,6 +3061,12 @@ #define INSN_FIELD_C_RS2 0x7c #define INSN_FIELD_C_SREG1 0x380 #define INSN_FIELD_C_SREG2 0x1c +#define INSN_FIELD_RD_P_E 0x18 +#define INSN_FIELD_RS2_P_E 0x18 +#define INSN_FIELD_RD_N0_E 0xf00 +#define INSN_FIELD_C_RS2_E 0x78 +#define INSN_FIELD_RD_E 0xf00 +#define INSN_FIELD_RS2_E 0x1e00000 #define INSN_FIELD_MOP_R_T_30 0x40000000 #define INSN_FIELD_MOP_R_T_27_26 0xc000000 #define INSN_FIELD_MOP_R_T_21_20 0x300000 @@ -3613,6 +3647,7 @@ DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) +DECLARE_INSN(vfbdot_vv, MATCH_VFBDOT_VV, MASK_VFBDOT_VV) DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) @@ -3649,7 +3684,10 @@ DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfncvtbf16_f_f_q, MATCH_VFNCVTBF16_F_F_Q, MASK_VFNCVTBF16_F_F_Q) DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W) +DECLARE_INSN(vfncvtbf16_sat_f_f_q, MATCH_VFNCVTBF16_SAT_F_F_Q, MASK_VFNCVTBF16_SAT_F_F_Q) +DECLARE_INSN(vfncvtbf16_sat_f_f_w, MATCH_VFNCVTBF16_SAT_F_F_W, MASK_VFNCVTBF16_SAT_F_F_W) DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) @@ -3681,6 +3719,7 @@ DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) +DECLARE_INSN(vfwbdot_vv, MATCH_VFWBDOT_VV, MASK_VFWBDOT_VV) DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) @@ -3850,6 +3889,8 @@ DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) +DECLARE_INSN(vqbdots_vv, MATCH_VQBDOTS_VV, MASK_VQBDOTS_VV) +DECLARE_INSN(vqbdotu_vv, MATCH_VQBDOTU_VV, MASK_VQBDOTU_VV) DECLARE_INSN(vqdot_vv, MATCH_VQDOT_VV, MASK_VQDOT_VV) DECLARE_INSN(vqdot_vx, MATCH_VQDOT_VX, MASK_VQDOT_VX) DECLARE_INSN(vqdotsu_vv, MATCH_VQDOTSU_VV, MASK_VQDOTSU_VV) @@ -4051,8 +4092,6 @@ DECLARE_CSR(vl, CSR_VL) DECLARE_CSR(vtype, CSR_VTYPE) DECLARE_CSR(vlenb, CSR_VLENB) DECLARE_CSR(sstatus, CSR_SSTATUS) -DECLARE_CSR(sedeleg, CSR_SEDELEG) -DECLARE_CSR(sideleg, CSR_SIDELEG) DECLARE_CSR(sie, CSR_SIE) DECLARE_CSR(stvec, CSR_STVEC) DECLARE_CSR(scounteren, CSR_SCOUNTEREN) diff --git a/riscv/insns/vfbdot_vv.h b/riscv/insns/vfbdot_vv.h new file mode 100644 index 0000000..c4db161 --- /dev/null +++ b/riscv/insns/vfbdot_vv.h @@ -0,0 +1,13 @@ +ZVBDOT_INIT(1); +require_fp; + +switch (P.VU.vsew) { + case 32: { + // TODO replace with bulk-norm routine and invoke ZVBDOT_LOOP instead of ZVBDOT_GENERIC_LOOP + require_extension(EXT_ZVFBDOT32F); + auto macc = [](auto a, auto b, auto c) { return f32_add(c, f32_mul(a, b)); }; + ZVBDOT_GENERIC_LOOP(float32_t, float32_t, float32_t, macc); + break; + } + default: require(false); +} diff --git a/riscv/insns/vfwbdot_vv.h b/riscv/insns/vfwbdot_vv.h new file mode 100644 index 0000000..295aee9 --- /dev/null +++ b/riscv/insns/vfwbdot_vv.h @@ -0,0 +1,17 @@ +ZVBDOT_INIT(2); +require_fp; + +switch (P.VU.vsew) { + case 16: { + if (P.VU.altfmt()) { + // TODO replace with bulk-norm routine and invoke ZVBDOT_LOOP instead of ZVBDOT_GENERIC_LOOP + require_extension(EXT_ZVFWBDOT16BF); + auto macc = [](auto a, auto b, auto c) { return f32_add(c, f32_mul(bf16_to_f32(a), bf16_to_f32(b))); }; + ZVBDOT_GENERIC_LOOP(bfloat16_t, bfloat16_t, float32_t, macc); + } else { + require(false); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vqbdots_vv.h b/riscv/insns/vqbdots_vv.h new file mode 100644 index 0000000..2daf868 --- /dev/null +++ b/riscv/insns/vqbdots_vv.h @@ -0,0 +1,23 @@ +ZVBDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQBDOT8I); + if (P.VU.altfmt()) { + ZVBDOT_SIMPLE_LOOP(int8_t, int8_t, uint32_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint8_t, int8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQBDOT16I); + if (P.VU.altfmt()) { + ZVBDOT_SIMPLE_LOOP(int16_t, int16_t, uint64_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint16_t, int16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vqbdotu_vv.h b/riscv/insns/vqbdotu_vv.h new file mode 100644 index 0000000..d6e0c4b --- /dev/null +++ b/riscv/insns/vqbdotu_vv.h @@ -0,0 +1,23 @@ +ZVBDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQBDOT8I); + if (P.VU.altfmt()) { + ZVBDOT_SIMPLE_LOOP(int8_t, uint8_t, uint32_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint8_t, uint8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQBDOT16I); + if (P.VU.altfmt()) { + ZVBDOT_SIMPLE_LOOP(int16_t, uint16_t, uint64_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint16_t, uint16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index e99f720..376a528 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -68,6 +68,10 @@ typedef enum { EXT_ZVKSED, EXT_ZVKSH, EXT_ZVQDOTQ, + EXT_ZVQBDOT8I, + EXT_ZVQBDOT16I, + EXT_ZVFWBDOT16BF, + EXT_ZVFBDOT32F, EXT_SSTC, EXT_ZAAMO, EXT_ZALRSC, diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 4239a66..ab68335 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1074,6 +1074,12 @@ riscv_insn_ext_zvksh = \ vsm3c_vi \ vsm3me_vv \ +riscv_insn_ext_zvbdot = \ + vqbdotu_vv \ + vqbdots_vv \ + vfwbdot_vv \ + vfbdot_vv \ + riscv_insn_ext_zimop = \ mop_r_N \ mop_rr_N \ @@ -1130,6 +1136,7 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ $(riscv_insn_ext_zvk) \ + $(riscv_insn_ext_zvbdot) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 1e33232..86e6033 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -4,6 +4,7 @@ #define _RISCV_V_EXT_MACROS_H #include "vector_unit.h" +#include <functional> // // vector: masking skip helper @@ -2089,6 +2090,50 @@ VI_VX_ULOOP({ \ break; \ } +#define ZVBDOT_INIT(widen) \ + unsigned vd_eew = P.VU.vsew * (widen); \ + unsigned vd_emul = std::max(1U, unsigned((8 * vd_eew) / P.VU.VLEN)); \ + unsigned vs2 = insn.rs2() & ~7; \ + unsigned ci = (insn.rs2() & 7) * 8; \ + require_vector(true); \ + require(P.VU.vstart->read() == 0); \ + require(P.VU.vflmul == 1); \ + require(ci * vd_eew < P.VU.VLEN); \ + require_align(insn.rd(), vd_emul); \ + require_vm; \ + require_noover(insn.rd(), vd_emul, insn.rs1(), 1); \ + require_noover(insn.rd(), vd_emul, vs2, 8) + +template<typename a_t, typename b_t, typename c_t> +c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_t c, std::function<c_t(a_t, b_t, c_t)> macc) +{ + for (size_t i = 0; i < a.size(); i++) + c = macc(a[i], b[i], c); + return c; +} + +#define ZVBDOT_LOOP(a_t, b_t, c_t, dot) \ + for (reg_t idx = 0; idx < 8; idx++) { \ + reg_t i = ci + idx; \ + VI_LOOP_ELEMENT_SKIP(); \ + std::vector<a_t> a(P.VU.vl->read(), a_t()); \ + std::vector<b_t> b(P.VU.vl->read(), b_t()); \ + for (reg_t k = 0; k < a.size(); k++) { \ + a[k] = P.VU.elt<a_t>(insn.rs1(), k); \ + b[k] = P.VU.elt<b_t>(vs2 + idx, k); \ + } \ + auto& acc = P.VU.elt<c_t>(insn.rd(), i, true); \ + acc = dot(a, b, acc); \ + } + +#define ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) \ + auto dot = std::bind(generic_dot_product<a_t, b_t, c_t>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, macc); \ + ZVBDOT_LOOP(a_t, b_t, c_t, dot) + +#define ZVBDOT_SIMPLE_LOOP(a_t, b_t, c_t) \ + auto macc = [](auto a, auto b, auto c) { return c + decltype(c)(a) * decltype(c)(b); }; \ + ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) + #define P_SET_OV(ov) \ if (ov) P.VU.vxsat->write(1); diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index 7c6633c..7a988bb 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -39,9 +39,16 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new vta = extract64(newType, 6, 1); vma = extract64(newType, 7, 1); + bool altfmt_supported = + p->extension_enabled(EXT_ZVQBDOT8I) || + p->extension_enabled(EXT_ZVQBDOT16I) || + p->extension_enabled(EXT_ZVFWBDOT16BF) || + false; + vill = !(vflmul >= 0.125 && vflmul <= 8) || vsew > std::min(vflmul, 1.0f) * ELEN - || (newType >> 8) != 0 + || (newType >> 9) != 0 + || (!altfmt_supported && (newType & 0x100)) || (rd == 0 && rs1 == 0 && old_vlmax != vlmax); if (vill) { diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h index 0e80618..1c370ce 100644 --- a/riscv/vector_unit.h +++ b/riscv/vector_unit.h @@ -156,6 +156,8 @@ public: reg_t get_elen() { return ELEN; } reg_t get_slen() { return VLEN; } + bool altfmt() { return vtype->read() & 0x100; } + VRM get_vround_mode() { return (VRM)(vxrm->read()); } |