diff options
author | Andrew Waterman <andrew@sifive.com> | 2023-06-06 15:14:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 15:14:02 -0700 |
commit | 97fbfec1c21895ccf0b82f777fba684457fde8fe (patch) | |
tree | ab8bd46acfd5bf688546fd5b9bd3a8c761f8cabb | |
parent | 58d2dbe17da09d515db50c20d923ab9d0f81de49 (diff) | |
parent | 8e800d05a4e4322048c942a9925684d6ce4f9de4 (diff) | |
download | riscv-isa-sim-97fbfec1c21895ccf0b82f777fba684457fde8fe.zip riscv-isa-sim-97fbfec1c21895ccf0b82f777fba684457fde8fe.tar.gz riscv-isa-sim-97fbfec1c21895ccf0b82f777fba684457fde8fe.tar.bz2 |
Merge pull request #1321 from plctlab/plct-bf16-dev
Add support for BF16 extensions
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | disasm/disasm.cc | 22 | ||||
-rw-r--r-- | riscv/decode_macros.h | 2 | ||||
-rw-r--r-- | riscv/encoding.h | 23 | ||||
-rw-r--r-- | riscv/insns/fcvt_bf16_s.h | 5 | ||||
-rw-r--r-- | riscv/insns/fcvt_s_bf16.h | 5 | ||||
-rw-r--r-- | riscv/insns/flh.h | 2 | ||||
-rw-r--r-- | riscv/insns/fmv_h_x.h | 2 | ||||
-rw-r--r-- | riscv/insns/fmv_x_h.h | 2 | ||||
-rw-r--r-- | riscv/insns/fsh.h | 2 | ||||
-rw-r--r-- | riscv/insns/vfncvtbf16_f_f_w.h | 5 | ||||
-rw-r--r-- | riscv/insns/vfwcvtbf16_f_f_v.h | 5 | ||||
-rw-r--r-- | riscv/insns/vfwmaccbf16_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vfwmaccbf16_vv.h | 5 | ||||
-rw-r--r-- | riscv/isa_parser.cc | 18 | ||||
-rw-r--r-- | riscv/isa_parser.h | 4 | ||||
-rw-r--r-- | riscv/riscv.mk.in | 22 | ||||
-rw-r--r-- | riscv/v_ext_macros.h | 76 | ||||
-rw-r--r-- | softfloat/bf16_to_f32.c | 80 | ||||
-rw-r--r-- | softfloat/f32_to_bf16.c | 92 | ||||
-rw-r--r-- | softfloat/internals.h | 6 | ||||
-rw-r--r-- | softfloat/s_roundPackToBF16.c | 113 | ||||
-rw-r--r-- | softfloat/softfloat.h | 2 | ||||
-rw-r--r-- | softfloat/softfloat.mk.in | 3 | ||||
-rw-r--r-- | softfloat/softfloat_types.h | 1 | ||||
-rw-r--r-- | softfloat/specialize.h | 19 |
26 files changed, 515 insertions, 9 deletions
@@ -49,6 +49,9 @@ Spike supports the following RISC-V ISA features: - Zcd extension, v1.0 - Zcmp extension, v1.0 - Zcmt extension, v1.0 + - Zfbfmin extension, v0.6 + - Zvfbfmin extension, v0.6 + - Zvfbfwma extension, v0.6 As a Spike extension, the remainder of the proposed [Bit-Manipulation Extensions](https://github.com/riscv/riscv-bitmanip) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 2fce1a1..25de783 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -1196,14 +1196,17 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) } if (isa->extension_enabled(EXT_ZFHMIN)) { - DEFINE_FLOAD(flh) - DEFINE_FSTORE(fsh) DEFINE_FR1TYPE(fcvt_h_s); DEFINE_FR1TYPE(fcvt_h_d); DEFINE_FR1TYPE(fcvt_h_q); DEFINE_FR1TYPE(fcvt_s_h); DEFINE_FR1TYPE(fcvt_d_h); DEFINE_FR1TYPE(fcvt_q_h); + } + + if (isa->extension_enabled(EXT_INTERNAL_ZFH_MOVE)) { + DEFINE_FLOAD(flh) + DEFINE_FSTORE(fsh) DEFINE_XFTYPE(fmv_h_x); DEFINE_FXTYPE(fmv_x_h); } @@ -1251,6 +1254,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_FX2TYPE(fle_q); } + if (isa->extension_enabled(EXT_ZFBFMIN)) { + DEFINE_FR1TYPE(fcvt_bf16_s); + DEFINE_FR1TYPE(fcvt_s_bf16); + } + // ext-h if (isa->extension_enabled('H')) { DEFINE_XLOAD_BASE(hlv_b) @@ -1799,6 +1807,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) } } + if (isa->extension_enabled(EXT_ZVFBFMIN)) { + DEFINE_VECTOR_V(vfncvtbf16_f_f_w); + DEFINE_VECTOR_V(vfwcvtbf16_f_f_v); + } + + if (isa->extension_enabled(EXT_ZVFBFWMA)) { + DEFINE_VECTOR_VV(vfwmaccbf16_vv); + DEFINE_VECTOR_VF(vfwmaccbf16_vf); + } + #define DEFINE_PI3TYPE(code) add_pitype3_insn(this, #code, match_##code, mask_##code); #define DEFINE_PI4TYPE(code) add_pitype4_insn(this, #code, match_##code, mask_##code); #define DEFINE_PI5TYPE(code) add_pitype5_insn(this, #code, match_##code, mask_##code); diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 6bdd574..7ba132c 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -74,6 +74,7 @@ typedef unsigned __int128 uint128_t; #define FRS2 READ_FREG(insn.rs2()) #define FRS3 READ_FREG(insn.rs3()) #define FRS1_H READ_FREG_H(insn.rs1()) +#define FRS1_BF FRS1_H #define FRS1_F READ_FREG_F(insn.rs1()) #define FRS1_D READ_FREG_D(insn.rs1()) #define FRS2_H READ_FREG_H(insn.rs2()) @@ -95,6 +96,7 @@ do { \ WRITE_FRD(value); \ } \ } while (0) +#define WRITE_FRD_BF WRITE_FRD_H #define WRITE_FRD_F(value) \ do { \ if (p->extension_enabled(EXT_ZFINX)) \ diff --git a/riscv/encoding.h b/riscv/encoding.h index 48cb5c0..e39f535 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (5adef50) + * https://github.com/riscv/riscv-opcodes (8d70e77) */ #ifndef RISCV_CSR_ENCODING_H @@ -751,6 +751,8 @@ #define MASK_FCLASS_Q 0xfff0707f #define MATCH_FCLASS_S 0xe0001053 #define MASK_FCLASS_S 0xfff0707f +#define MATCH_FCVT_BF16_S 0x44800053 +#define MASK_FCVT_BF16_S 0xfff0007f #define MATCH_FCVT_D_H 0x42200053 #define MASK_FCVT_D_H 0xfff0007f #define MATCH_FCVT_D_L 0xd2200053 @@ -809,6 +811,8 @@ #define MASK_FCVT_Q_W 0xfff0007f #define MATCH_FCVT_Q_WU 0xd6100053 #define MASK_FCVT_Q_WU 0xfff0007f +#define MATCH_FCVT_S_BF16 0x40600053 +#define MASK_FCVT_S_BF16 0xfff0007f #define MATCH_FCVT_S_D 0x40100053 #define MASK_FCVT_S_D 0xfff0007f #define MATCH_FCVT_S_H 0x40200053 @@ -2165,6 +2169,8 @@ #define MASK_VFNCVT_X_F_W 0xfc0ff07f #define MATCH_VFNCVT_XU_F_W 0x48081057 #define MASK_VFNCVT_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVTBF16_F_F_W 0x480e9057 +#define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f #define MATCH_VFNMACC_VF 0xb4005057 #define MASK_VFNMACC_VF 0xfc00707f #define MATCH_VFNMACC_VV 0xb4001057 @@ -2241,10 +2247,16 @@ #define MASK_VFWCVT_X_F_V 0xfc0ff07f #define MATCH_VFWCVT_XU_F_V 0x48041057 #define MASK_VFWCVT_XU_F_V 0xfc0ff07f +#define MATCH_VFWCVTBF16_F_F_V 0x48069057 +#define MASK_VFWCVTBF16_F_F_V 0xfc0ff07f #define MATCH_VFWMACC_VF 0xf0005057 #define MASK_VFWMACC_VF 0xfc00707f #define MATCH_VFWMACC_VV 0xf0001057 #define MASK_VFWMACC_VV 0xfc00707f +#define MATCH_VFWMACCBF16_VF 0xec005057 +#define MASK_VFWMACCBF16_VF 0xfc00707f +#define MATCH_VFWMACCBF16_VV 0xec001057 +#define MASK_VFWMACCBF16_VV 0xfc00707f #define MATCH_VFWMSAC_VF 0xf8005057 #define MASK_VFWMSAC_VF 0xfc00707f #define MATCH_VFWMSAC_VV 0xf8001057 @@ -3392,8 +3404,11 @@ #define INSN_FIELD_AMOOP 0xf8000000 #define INSN_FIELD_NF 0xe0000000 #define INSN_FIELD_SIMM5 0xf8000 +#define INSN_FIELD_ZIMM5 0xf8000 #define INSN_FIELD_ZIMM10 0x3ff00000 #define INSN_FIELD_ZIMM11 0x7ff00000 +#define INSN_FIELD_ZIMM6HI 0x4000000 +#define INSN_FIELD_ZIMM6LO 0xf8000 #define INSN_FIELD_C_NZUIMM10 0x1fe0 #define INSN_FIELD_C_UIMM7LO 0x60 #define INSN_FIELD_C_UIMM7HI 0x1c00 @@ -3636,6 +3651,7 @@ DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) DECLARE_INSN(fclass_h, MATCH_FCLASS_H, MASK_FCLASS_H) DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) +DECLARE_INSN(fcvt_bf16_s, MATCH_FCVT_BF16_S, MASK_FCVT_BF16_S) DECLARE_INSN(fcvt_d_h, MATCH_FCVT_D_H, MASK_FCVT_D_H) DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) @@ -3665,6 +3681,7 @@ DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) +DECLARE_INSN(fcvt_s_bf16, MATCH_FCVT_S_BF16, MASK_FCVT_S_BF16) DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) DECLARE_INSN(fcvt_s_h, MATCH_FCVT_S_H, MASK_FCVT_S_H) DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) @@ -4343,6 +4360,7 @@ DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) +DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W) DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) @@ -4381,8 +4399,11 @@ DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V) DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) +DECLARE_INSN(vfwcvtbf16_f_f_v, MATCH_VFWCVTBF16_F_F_V, MASK_VFWCVTBF16_F_F_V) DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) +DECLARE_INSN(vfwmaccbf16_vf, MATCH_VFWMACCBF16_VF, MASK_VFWMACCBF16_VF) +DECLARE_INSN(vfwmaccbf16_vv, MATCH_VFWMACCBF16_VV, MASK_VFWMACCBF16_VV) DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF) DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV) DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF) diff --git a/riscv/insns/fcvt_bf16_s.h b/riscv/insns/fcvt_bf16_s.h new file mode 100644 index 0000000..d625df8 --- /dev/null +++ b/riscv/insns/fcvt_bf16_s.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFBFMIN); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD_BF(f32_to_bf16(FRS1_F)); +set_fp_exceptions; diff --git a/riscv/insns/fcvt_s_bf16.h b/riscv/insns/fcvt_s_bf16.h new file mode 100644 index 0000000..59a55cb --- /dev/null +++ b/riscv/insns/fcvt_s_bf16.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZFBFMIN); +require_fp; +softfloat_roundingMode = RM; +WRITE_FRD_F(bf16_to_f32(FRS1_BF)); +set_fp_exceptions; diff --git a/riscv/insns/flh.h b/riscv/insns/flh.h index befff2c..67b538a 100644 --- a/riscv/insns/flh.h +++ b/riscv/insns/flh.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_FRD(f16(MMU.load<uint16_t>(RS1 + insn.i_imm()))); diff --git a/riscv/insns/fmv_h_x.h b/riscv/insns/fmv_h_x.h index e55d607..bc2155c 100644 --- a/riscv/insns/fmv_h_x.h +++ b/riscv/insns/fmv_h_x.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_FRD(f16(RS1)); diff --git a/riscv/insns/fmv_x_h.h b/riscv/insns/fmv_x_h.h index 7a2e5ff..ca823c1 100644 --- a/riscv/insns/fmv_x_h.h +++ b/riscv/insns/fmv_x_h.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; WRITE_RD(sext32((int16_t)(FRS1.v[0]))); diff --git a/riscv/insns/fsh.h b/riscv/insns/fsh.h index dfd6bc5..142d4d4 100644 --- a/riscv/insns/fsh.h +++ b/riscv/insns/fsh.h @@ -1,3 +1,3 @@ -require_extension(EXT_ZFHMIN); +require_extension(EXT_INTERNAL_ZFH_MOVE); require_fp; MMU.store<uint16_t>(RS1 + insn.s_imm(), FRS2.v[0]); diff --git a/riscv/insns/vfncvtbf16_f_f_w.h b/riscv/insns/vfncvtbf16_f_f_w.h new file mode 100644 index 0000000..4708802 --- /dev/null +++ b/riscv/insns/vfncvtbf16_f_f_w.h @@ -0,0 +1,5 @@ +// vfncvtbf16.f.f.w vd, vs2, vm +VI_VFP_NCVT_BF16_TO_FP( + { vd = f32_to_bf16(vs2); }, // BODY16 + { require_extension(EXT_ZVFBFMIN); } // CHECK16 +) diff --git a/riscv/insns/vfwcvtbf16_f_f_v.h b/riscv/insns/vfwcvtbf16_f_f_v.h new file mode 100644 index 0000000..ee9a59c --- /dev/null +++ b/riscv/insns/vfwcvtbf16_f_f_v.h @@ -0,0 +1,5 @@ +// vfwcvtbf16.f.f.v vd, vs2, vm +VI_VFP_WCVT_FP_TO_BF16( + { vd = bf16_to_f32(vs2); }, // BODY16 + { require_extension(EXT_ZVFBFMIN); } // CHECK16 +) diff --git a/riscv/insns/vfwmaccbf16_vf.h b/riscv/insns/vfwmaccbf16_vf.h new file mode 100644 index 0000000..2c77b3b --- /dev/null +++ b/riscv/insns/vfwmaccbf16_vf.h @@ -0,0 +1,5 @@ +// vfwmaccbf16.vf vd, vs2, rs1 +VI_VFP_BF16_VF_LOOP_WIDE +({ + vd = f32_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmaccbf16_vv.h b/riscv/insns/vfwmaccbf16_vv.h new file mode 100644 index 0000000..bd8f305 --- /dev/null +++ b/riscv/insns/vfwmaccbf16_vv.h @@ -0,0 +1,5 @@ +// vfwmaccbf16.vv vd, vs2, vs1 +VI_VFP_BF16_VV_LOOP_WIDE +({ + vd = f32_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/isa_parser.cc b/riscv/isa_parser.cc index 7335a14..bd73b0c 100644 --- a/riscv/isa_parser.cc +++ b/riscv/isa_parser.cc @@ -139,6 +139,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } else if (ext_str == "zdinx") { extension_table[EXT_ZFINX] = true; extension_table[EXT_ZDINX] = true; + } else if (ext_str == "zfbfmin") { + extension_table[EXT_ZFBFMIN] = true; } else if (ext_str == "zfinx") { extension_table[EXT_ZFINX] = true; } else if (ext_str == "zhinx") { @@ -232,6 +234,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZICOND] = true; } else if (ext_str == "zihpm") { extension_table[EXT_ZIHPM] = true; + } else if (ext_str == "zvfbfmin") { + extension_table[EXT_ZVFBFMIN] = true; + } else if (ext_str == "zvfbfwma") { + extension_table[EXT_ZVFBFWMA] = true; } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; } else if (ext_str[0] == 'x') { @@ -279,6 +285,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, ("can't parse: " + std::string(p)).c_str()); } + if (extension_table[EXT_ZFBFMIN] && !extension_table['F']) { + bad_isa_string(str, "'Zfbfmin' extension requires 'F' extension"); + } + + if ((extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZVFBFWMA]) && !extension_table['V']) { + bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension"); + } + + if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZFHMIN]) { + extension_table[EXT_INTERNAL_ZFH_MOVE] = true; + } + if (extension_table['C']) { extension_table[EXT_ZCA] = true; if (extension_table['F'] && max_xlen == 32) diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 9effd16..7558116 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -49,6 +49,7 @@ typedef enum { EXT_SVINVAL, EXT_ZDINX, EXT_ZFA, + EXT_ZFBFMIN, EXT_ZFINX, EXT_ZHINX, EXT_ZHINXMIN, @@ -57,6 +58,8 @@ typedef enum { EXT_ZICNTR, EXT_ZICOND, EXT_ZIHPM, + EXT_ZVFBFMIN, + EXT_ZVFBFWMA, EXT_XZBP, EXT_XZBS, EXT_XZBE, @@ -66,6 +69,7 @@ typedef enum { EXT_XZBR, EXT_XZBT, EXT_SSTC, + EXT_INTERNAL_ZFH_MOVE, NUM_ISA_EXTENSIONS } isa_extension_t; diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 55fadc0..1cfe627 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1356,8 +1356,25 @@ riscv_insn_ext_cmo = \ cbo_zero \ riscv_insn_ext_zicond = \ - czero_eqz \ - czero_nez \ + czero_eqz \ + czero_nez \ + +riscv_insn_ext_zfbfmin = \ + fcvt_bf16_s \ + fcvt_s_bf16 \ + +riscv_insn_ext_zvfbfmin = \ + vfncvtbf16_f_f_w \ + vfwcvtbf16_f_f_v \ + +riscv_insn_ext_zvfbfwma = \ + vfwmaccbf16_vv \ + vfwmaccbf16_vf \ + +riscv_insn_ext_bf16 = \ + $(riscv_insn_ext_zfbfmin) \ + $(riscv_insn_ext_zvfbfmin) \ + $(riscv_insn_ext_zvfbfwma) \ riscv_insn_list = \ $(riscv_insn_ext_a) \ @@ -1383,6 +1400,7 @@ riscv_insn_list = \ $(riscv_insn_smrnmi) \ $(riscv_insn_ext_cmo) \ $(riscv_insn_ext_zicond) \ + $(riscv_insn_ext_bf16) \ riscv_gen_srcs = $(addsuffix .cc,$(riscv_insn_list)) diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 8b0d0fd..41256c7 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -1488,11 +1488,27 @@ reg_t index[P.VU.vlmax]; \ reg_t UNUSED rs2_num = insn.rs2(); \ softfloat_roundingMode = STATE.frm->read(); +#define VI_VFP_BF16_COMMON \ + require_fp; \ + require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFBFWMA))); \ + require_vector(true); \ + require(STATE.frm->read() < 0x5); \ + reg_t UNUSED vl = P.VU.vl->read(); \ + reg_t UNUSED rd_num = insn.rd(); \ + reg_t UNUSED rs1_num = insn.rs1(); \ + reg_t UNUSED rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm->read(); + #define VI_VFP_LOOP_BASE \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); +#define VI_VFP_BF16_LOOP_BASE \ + VI_VFP_BF16_COMMON \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ + VI_LOOP_ELEMENT_SKIP(); + #define VI_VFP_LOOP_CMP_BASE \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ @@ -1818,6 +1834,25 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END +#define VI_VFP_BF16_VF_LOOP_WIDE(BODY) \ + VI_CHECK_DSS(false); \ + VI_VFP_BF16_LOOP_BASE \ + switch (P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ + float32_t vs2 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)); \ + float32_t rs1 = bf16_to_f32(FRS1_BF); \ + BODY; \ + set_fp_exceptions; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + #define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(true); \ VI_VFP_LOOP_BASE \ @@ -1845,6 +1880,25 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END +#define VI_VFP_BF16_VV_LOOP_WIDE(BODY) \ + VI_CHECK_DSS(true); \ + VI_VFP_BF16_LOOP_BASE \ + switch (P.VU.vsew) { \ + case e16: { \ + float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ + float32_t vs2 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)); \ + float32_t vs1 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs1_num, i)); \ + BODY; \ + set_fp_exceptions; \ + break; \ + } \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + #define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(false); \ VI_VFP_LOOP_BASE \ @@ -1980,6 +2034,17 @@ reg_t index[P.VU.vlmax]; \ break; \ } +#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \ + VI_CHECK_DSS(false); \ + switch (P.VU.vsew) { \ + case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + #define VI_VFP_WCVT_INT_TO_FP(BODY8, BODY16, BODY32, \ CHECK8, CHECK16, CHECK32, \ sign) \ @@ -2030,6 +2095,17 @@ reg_t index[P.VU.vlmax]; \ break; \ } +#define VI_VFP_NCVT_BF16_TO_FP(BODY, CHECK) \ + VI_CHECK_SDS(false); \ + switch (P.VU.vsew) { \ + case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + #define VI_VFP_NCVT_INT_TO_FP(BODY32, BODY64, \ CHECK32, CHECK64, \ sign) \ diff --git a/softfloat/bf16_to_f32.c b/softfloat/bf16_to_f32.c new file mode 100644 index 0000000..7e49002 --- /dev/null +++ b/softfloat/bf16_to_f32.c @@ -0,0 +1,80 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float32_t bf16_to_f32( bfloat16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + bool sign; + int_fast16_t exp; + uint_fast16_t frac; + struct commonNaN commonNaN; + uint_fast32_t uiZ; + union ui32_f32 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signBF16UI( uiA ); + exp = expBF16UI( uiA ); + frac = fracBF16UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_bf16UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToF32UI( &commonNaN ); + } else { + uiZ = packToF32UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToF32UI( sign, exp, (uint_fast32_t) frac<<16 ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} diff --git a/softfloat/f32_to_bf16.c b/softfloat/f32_to_bf16.c new file mode 100644 index 0000000..92a2e6d --- /dev/null +++ b/softfloat/f32_to_bf16.c @@ -0,0 +1,92 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +bfloat16_t f32_to_bf16( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + struct commonNaN commonNaN; + struct exp16_sig32 normExpSig; + uint_fast16_t uiZ, frac16; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF32UI( uiA ); + exp = expF32UI( uiA ); + frac = fracF32UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_f32UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToBF16UI( &commonNaN ); + } else { + uiZ = packToBF16UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! (exp | frac) ) { + uiZ = packToBF16UI( sign, 0, 0 ); + goto uiZ; + } else if ( !exp ) { + normExpSig = softfloat_normSubnormalF32Sig( frac ); + exp = normExpSig.exp; + frac = normExpSig.sig; + } + frac16 = frac>>9 | ((frac & 0x1FF) != 0); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + return softfloat_roundPackToBF16( sign, exp - 1, frac16 | 0x4000 ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} diff --git a/softfloat/internals.h b/softfloat/internals.h index 55585e9..ae94427 100644 --- a/softfloat/internals.h +++ b/softfloat/internals.h @@ -89,6 +89,11 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool ); #define fracF16UI( a ) ((a) & 0x03FF) #define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig)) +#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15)) +#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF) +#define fracBF16UI( a ) ((a) & 0x07F) +#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig)) + #define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; }; @@ -103,6 +108,7 @@ float16_t softfloat_mulAddF16( uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t ); +bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define signF32UI( a ) ((bool) ((uint32_t) (a)>>31)) diff --git a/softfloat/s_roundPackToBF16.c b/softfloat/s_roundPackToBF16.c new file mode 100644 index 0000000..f3d0b75 --- /dev/null +++ b/softfloat/s_roundPackToBF16.c @@ -0,0 +1,113 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "softfloat.h" + +bfloat16_t + softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig ) +{ + uint_fast8_t roundingMode; + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + bool isTiny; + uint_fast16_t uiZ; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundingMode = softfloat_roundingMode; + roundNearEven = (roundingMode == softfloat_round_near_even); + roundIncrement = 0x40; + if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { + roundIncrement = + (roundingMode + == (sign ? softfloat_round_min : softfloat_round_max)) + ? 0x7F + : 0; + } + roundBits = sig & 0x7F; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0xFD <= (unsigned int) exp ) { + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + isTiny = + (softfloat_detectTininess == softfloat_tininess_beforeRounding) + || (exp < -1) || (sig + roundIncrement < 0x8000); + sig = softfloat_shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0x7F; + if ( isTiny && roundBits ) { + softfloat_raiseFlags( softfloat_flag_underflow ); + } + } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + softfloat_raiseFlags( + softfloat_flag_overflow | softfloat_flag_inexact ); + uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement; + goto uiZ; + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>7; + if ( roundBits ) { + softfloat_exceptionFlags |= softfloat_flag_inexact; +#ifdef SOFTFLOAT_ROUND_ODD + if ( roundingMode == softfloat_round_odd ) { + sig |= 1; + goto packReturn; + } +#endif + } + sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven); + if ( ! sig ) exp = 0; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + packReturn: + uiZ = packToBF16UI( sign, exp, sig ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index bdac1be..eb78d74 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -154,6 +154,7 @@ uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool ); int_fast32_t f16_to_i32_r_minMag( float16_t, bool ); int_fast64_t f16_to_i64_r_minMag( float16_t, bool ); float32_t f16_to_f32( float16_t ); +float32_t bf16_to_f32( bfloat16_t ); float64_t f16_to_f64( float16_t ); #ifdef SOFTFLOAT_FAST_INT64 extFloat80_t f16_to_extF80( float16_t ); @@ -196,6 +197,7 @@ uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool ); int_fast32_t f32_to_i32_r_minMag( float32_t, bool ); int_fast64_t f32_to_i64_r_minMag( float32_t, bool ); float16_t f32_to_f16( float32_t ); +bfloat16_t f32_to_bf16( float32_t ); float64_t f32_to_f64( float32_t ); #ifdef SOFTFLOAT_FAST_INT64 extFloat80_t f32_to_extF80( float32_t ); diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index e7f4a3e..9c780ac 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -45,6 +45,7 @@ softfloat_c_srcs = \ f16_sqrt.c \ f16_sub.c \ f16_to_f128.c \ + bf16_to_f32.c \ f16_to_f32.c \ f16_to_f64.c \ f16_to_i8.c \ @@ -76,6 +77,7 @@ softfloat_c_srcs = \ f32_sqrt.c \ f32_sub.c \ f32_to_f128.c \ + f32_to_bf16.c \ f32_to_f16.c \ f32_to_f64.c \ f32_to_i16.c \ @@ -181,6 +183,7 @@ softfloat_c_srcs = \ s_roundMToUI64.c \ s_roundPackMToI64.c \ s_roundPackMToUI64.c \ + s_roundPackToBF16.c \ s_roundPackToF128.c \ s_roundPackToF16.c \ s_roundPackToF32.c \ diff --git a/softfloat/softfloat_types.h b/softfloat/softfloat_types.h index af1888f..34c518f 100644 --- a/softfloat/softfloat_types.h +++ b/softfloat/softfloat_types.h @@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ typedef struct { uint16_t v; } float16_t; +typedef float16_t bfloat16_t; typedef struct { uint32_t v; } float32_t; typedef struct { uint64_t v; } float64_t; typedef struct { uint64_t v[2]; } float128_t; diff --git a/softfloat/specialize.h b/softfloat/specialize.h index 19504b6..fb3761d 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -99,6 +99,11 @@ struct commonNaN { char _unused; }; #define defaultNaNF16UI 0x7E00 /*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + +/*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. | Note: This macro evaluates its argument more than once. @@ -114,6 +119,20 @@ struct commonNaN { char _unused; }; #define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- +| Assuming `uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + +/*---------------------------------------------------------------------------- | Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. *----------------------------------------------------------------------------*/ |