aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/apt-packages.txt2
-rw-r--r--.github/workflows/continuous-integration.yml7
-rw-r--r--.github/workflows/debug-smoke.yml7
-rw-r--r--README.md2
-rw-r--r--ci-tests/.gitignore3
-rw-r--r--ci-tests/atomics.c20
-rwxr-xr-xci-tests/build-spike24
-rwxr-xr-xci-tests/create-ci-binary-tarball29
-rw-r--r--ci-tests/custom-csr.cc2
-rwxr-xr-xci-tests/run-snippy-test.sh37
-rwxr-xr-xci-tests/run-snippy-tests.sh97
-rw-r--r--ci-tests/snippy-tests/boot-code-f.s41
-rw-r--r--ci-tests/snippy-tests/boot-code-vf.s45
-rw-r--r--ci-tests/snippy-tests/boot-code.s30
-rwxr-xr-xci-tests/snippy-tests/generate-snippy-test.sh137
-rw-r--r--ci-tests/snippy-tests/linker-entry.ld1
-rw-r--r--ci-tests/snippy-tests/sections.yaml22
-rw-r--r--ci-tests/test-customext.cc2
-rwxr-xr-xci-tests/test-spike56
-rw-r--r--customext/cflush.cc2
-rw-r--r--customext/dummy_rocc.cc2
-rwxr-xr-xdebug_rom/debug_rom.S58
-rw-r--r--debug_rom/debug_rom.h32
-rw-r--r--disasm/disasm.cc52
-rw-r--r--disasm/isa_parser.cc91
-rw-r--r--disasm/regnames.cc18
-rw-r--r--fesvr/term.cc39
-rw-r--r--riscv/abstract_device.h7
-rw-r--r--riscv/bloom_filter.h64
-rw-r--r--riscv/bulknormdot.h328
-rw-r--r--riscv/cfg.cc1
-rw-r--r--riscv/cfg.h1
-rw-r--r--riscv/clint.cc2
-rw-r--r--riscv/common.h12
-rw-r--r--riscv/csr_init.cc216
-rw-r--r--riscv/csrs.cc383
-rw-r--r--riscv/csrs.h101
-rw-r--r--riscv/debug_module.cc506
-rw-r--r--riscv/debug_module.h34
-rw-r--r--riscv/decode.h10
-rw-r--r--riscv/decode_macros.h24
-rw-r--r--riscv/devices.cc8
-rw-r--r--riscv/devices.h6
-rw-r--r--riscv/disasm.h1
-rw-r--r--riscv/encoding.h126
-rw-r--r--riscv/execute.cc34
-rw-r--r--riscv/insn_template.cc3
-rw-r--r--riscv/insns/amoadd_d.h2
-rw-r--r--riscv/insns/amoadd_w.h2
-rw-r--r--riscv/insns/amoand_d.h2
-rw-r--r--riscv/insns/amoand_w.h2
-rw-r--r--riscv/insns/amomax_d.h2
-rw-r--r--riscv/insns/amomax_w.h2
-rw-r--r--riscv/insns/amomaxu_d.h2
-rw-r--r--riscv/insns/amomaxu_w.h2
-rw-r--r--riscv/insns/amomin_d.h2
-rw-r--r--riscv/insns/amomin_w.h2
-rw-r--r--riscv/insns/amominu_d.h2
-rw-r--r--riscv/insns/amominu_w.h2
-rw-r--r--riscv/insns/amoor_d.h2
-rw-r--r--riscv/insns/amoor_w.h2
-rw-r--r--riscv/insns/amoswap_d.h2
-rw-r--r--riscv/insns/amoswap_w.h2
-rw-r--r--riscv/insns/amoxor_d.h2
-rw-r--r--riscv/insns/amoxor_w.h2
-rw-r--r--riscv/insns/beqi.h5
-rw-r--r--riscv/insns/bnei.h5
-rw-r--r--riscv/insns/c_add.h1
-rw-r--r--riscv/insns/c_jalr.h6
-rw-r--r--riscv/insns/c_jr.h5
-rw-r--r--riscv/insns/c_mv.h1
-rw-r--r--riscv/insns/fli_h.h2
-rw-r--r--riscv/insns/flq.h4
-rw-r--r--riscv/insns/fsq.h3
-rw-r--r--riscv/insns/jalr.h5
-rw-r--r--riscv/insns/lr_d.h2
-rw-r--r--riscv/insns/lr_w.h2
-rw-r--r--riscv/insns/sc_d.h2
-rw-r--r--riscv/insns/sc_w.h2
-rw-r--r--riscv/insns/sfence_inval_ir.h2
-rw-r--r--riscv/insns/sfence_vma.h2
-rw-r--r--riscv/insns/sret.h2
-rw-r--r--riscv/insns/ssamoswap_d.h2
-rw-r--r--riscv/insns/ssamoswap_w.h3
-rw-r--r--riscv/insns/vandn_vv.h2
-rw-r--r--riscv/insns/vandn_vx.h2
-rw-r--r--riscv/insns/vbrev8_v.h2
-rw-r--r--riscv/insns/vfadd_vf.h4
-rw-r--r--riscv/insns/vfadd_vv.h4
-rw-r--r--riscv/insns/vfbdot_vv.h16
-rw-r--r--riscv/insns/vfclass_v.h2
-rw-r--r--riscv/insns/vfdiv_vf.h2
-rw-r--r--riscv/insns/vfdiv_vv.h2
-rw-r--r--riscv/insns/vfext_vf2.h14
-rw-r--r--riscv/insns/vfmacc_vf.h5
-rw-r--r--riscv/insns/vfmacc_vv.h5
-rw-r--r--riscv/insns/vfmadd_vf.h4
-rw-r--r--riscv/insns/vfmadd_vv.h4
-rw-r--r--riscv/insns/vfmax_vf.h5
-rw-r--r--riscv/insns/vfmax_vv.h5
-rw-r--r--riscv/insns/vfmin_vf.h5
-rw-r--r--riscv/insns/vfmin_vv.h5
-rw-r--r--riscv/insns/vfmsac_vf.h6
-rw-r--r--riscv/insns/vfmsac_vv.h6
-rw-r--r--riscv/insns/vfmsub_vf.h6
-rw-r--r--riscv/insns/vfmsub_vv.h6
-rw-r--r--riscv/insns/vfmul_vf.h4
-rw-r--r--riscv/insns/vfmul_vv.h4
-rw-r--r--riscv/insns/vfmv_f_s.h8
-rw-r--r--riscv/insns/vfmv_s_f.h11
-rw-r--r--riscv/insns/vfncvt_f_f_q.h7
-rw-r--r--riscv/insns/vfncvt_f_f_w.h8
-rw-r--r--riscv/insns/vfncvt_f_x_w.h12
-rw-r--r--riscv/insns/vfncvt_f_xu_w.h12
-rw-r--r--riscv/insns/vfncvt_rod_f_f_w.h10
-rw-r--r--riscv/insns/vfncvt_rtz_x_f_w.h10
-rw-r--r--riscv/insns/vfncvt_rtz_xu_f_w.h10
-rw-r--r--riscv/insns/vfncvt_sat_f_f_q.h7
-rw-r--r--riscv/insns/vfncvt_x_f_w.h10
-rw-r--r--riscv/insns/vfncvt_xu_f_w.h10
-rw-r--r--riscv/insns/vfncvtbf16_f_f_w.h10
-rw-r--r--riscv/insns/vfncvtbf16_sat_f_f_w.h8
-rw-r--r--riscv/insns/vfnmacc_vf.h7
-rw-r--r--riscv/insns/vfnmacc_vv.h6
-rw-r--r--riscv/insns/vfnmadd_vf.h6
-rw-r--r--riscv/insns/vfnmadd_vv.h6
-rw-r--r--riscv/insns/vfnmsac_vf.h6
-rw-r--r--riscv/insns/vfnmsac_vv.h6
-rw-r--r--riscv/insns/vfnmsub_vf.h6
-rw-r--r--riscv/insns/vfnmsub_vv.h6
-rw-r--r--riscv/insns/vfqbdot_alt_vv.h17
-rw-r--r--riscv/insns/vfqbdot_vv.h17
-rw-r--r--riscv/insns/vfqldot_alt_vv.h17
-rw-r--r--riscv/insns/vfqldot_vv.h17
-rw-r--r--riscv/insns/vfrdiv_vf.h2
-rw-r--r--riscv/insns/vfrec7_v.h2
-rw-r--r--riscv/insns/vfrsqrt7_v.h2
-rw-r--r--riscv/insns/vfrsub_vf.h2
-rw-r--r--riscv/insns/vfsgnj_vf.h3
-rw-r--r--riscv/insns/vfsgnj_vv.h3
-rw-r--r--riscv/insns/vfsgnjn_vf.h3
-rw-r--r--riscv/insns/vfsgnjn_vv.h3
-rw-r--r--riscv/insns/vfsgnjx_vf.h3
-rw-r--r--riscv/insns/vfsgnjx_vv.h3
-rw-r--r--riscv/insns/vfslide1down_vf.h2
-rw-r--r--riscv/insns/vfslide1up_vf.h2
-rw-r--r--riscv/insns/vfsqrt_v.h2
-rw-r--r--riscv/insns/vfsub_vf.h4
-rw-r--r--riscv/insns/vfsub_vv.h4
-rw-r--r--riscv/insns/vfwadd_vv.h3
-rw-r--r--riscv/insns/vfwbdot_vv.h15
-rw-r--r--riscv/insns/vfwcvt_f_f_v.h8
-rw-r--r--riscv/insns/vfwcvt_f_x_v.h9
-rw-r--r--riscv/insns/vfwcvt_f_xu_v.h9
-rw-r--r--riscv/insns/vfwcvt_rtz_x_f_v.h4
-rw-r--r--riscv/insns/vfwcvt_rtz_xu_f_v.h4
-rw-r--r--riscv/insns/vfwcvt_x_f_v.h4
-rw-r--r--riscv/insns/vfwcvt_xu_f_v.h4
-rw-r--r--riscv/insns/vfwcvtbf16_f_f_v.h8
-rw-r--r--riscv/insns/vfwldot_vv.h15
-rw-r--r--riscv/insns/vghsh_vv.h4
-rw-r--r--riscv/insns/vgmul_vv.h4
-rw-r--r--riscv/insns/viota_m.h17
-rw-r--r--riscv/insns/vmandn_mm.h2
-rw-r--r--riscv/insns/vmfeq_vf.h2
-rw-r--r--riscv/insns/vmfeq_vv.h4
-rw-r--r--riscv/insns/vmfge_vf.h2
-rw-r--r--riscv/insns/vmfgt_vf.h2
-rw-r--r--riscv/insns/vmfle_vf.h2
-rw-r--r--riscv/insns/vmfle_vv.h4
-rw-r--r--riscv/insns/vmflt_vf.h2
-rw-r--r--riscv/insns/vmflt_vv.h4
-rw-r--r--riscv/insns/vmfne_vf.h2
-rw-r--r--riscv/insns/vmfne_vv.h4
-rw-r--r--riscv/insns/vmnand_mm.h2
-rw-r--r--riscv/insns/vmnor_mm.h2
-rw-r--r--riscv/insns/vmorn_mm.h2
-rw-r--r--riscv/insns/vmulh_vv.h2
-rw-r--r--riscv/insns/vmulh_vx.h2
-rw-r--r--riscv/insns/vmulhsu_vv.h2
-rw-r--r--riscv/insns/vmulhsu_vx.h2
-rw-r--r--riscv/insns/vmulhu_vv.h2
-rw-r--r--riscv/insns/vmulhu_vx.h2
-rw-r--r--riscv/insns/vmxnor_mm.h2
-rw-r--r--riscv/insns/vqbdots_vv.h23
-rw-r--r--riscv/insns/vqbdotu_vv.h23
-rw-r--r--riscv/insns/vqldots_vv.h23
-rw-r--r--riscv/insns/vqldotu_vv.h23
-rw-r--r--riscv/insns/vrev8_v.h2
-rw-r--r--riscv/insns/vrol_vv.h2
-rw-r--r--riscv/insns/vrol_vx.h2
-rw-r--r--riscv/insns/vror_vi.h2
-rw-r--r--riscv/insns/vror_vv.h2
-rw-r--r--riscv/insns/vror_vx.h2
-rw-r--r--riscv/insns/vsm3c_vi.h1
-rw-r--r--riscv/insns/vsm3me_vv.h1
-rw-r--r--riscv/insns/vsm4k_vi.h1
-rw-r--r--riscv/insns/vsm4r_vs.h4
-rw-r--r--riscv/insns/vsm4r_vv.h2
-rw-r--r--riscv/insns/vsmul_vv.h2
-rw-r--r--riscv/insns/vsmul_vx.h2
-rw-r--r--riscv/insns/vsra_vi.h2
-rw-r--r--riscv/insns/vssra_vi.h4
-rw-r--r--riscv/insns/vssrl_vi.h2
-rw-r--r--riscv/insns/vwsll_vi.h1
-rw-r--r--riscv/insns/vwsll_vv.h1
-rw-r--r--riscv/insns/vwsll_vx.h1
-rw-r--r--riscv/insns/wrs_nto.h2
-rw-r--r--riscv/insns/wrs_sto.h2
-rw-r--r--riscv/interactive.cc20
-rw-r--r--riscv/isa_parser.h26
-rw-r--r--riscv/jtag_dtm.cc2
-rw-r--r--riscv/mmu.cc171
-rw-r--r--riscv/mmu.h120
-rw-r--r--riscv/ns16550.cc10
-rw-r--r--riscv/opcodes.h10
-rw-r--r--riscv/platform.h2
-rw-r--r--riscv/plic.cc2
-rw-r--r--riscv/processor.cc229
-rw-r--r--riscv/processor.h93
-rw-r--r--riscv/riscv.mk.in37
-rw-r--r--riscv/sim.cc61
-rw-r--r--riscv/sim.h2
-rw-r--r--riscv/v_ext_macros.h284
-rw-r--r--riscv/vector_unit.cc116
-rw-r--r--riscv/vector_unit.h119
-rw-r--r--riscv/zvbdot.h59
-rw-r--r--riscv/zvk_ext_macros.h34
-rw-r--r--riscv/zvkned_ext_macros.h16
-rw-r--r--riscv/zvknh_ext_macros.h1
-rw-r--r--riscv/zvksed_ext_macros.h3
-rw-r--r--riscv/zvksh_ext_macros.h3
-rw-r--r--softfloat/bf16_to_e4m3.c48
-rw-r--r--softfloat/bf16_to_e5m2.c48
-rw-r--r--softfloat/e4m3_to_bf16.c92
-rw-r--r--softfloat/e4m3_to_f16.c92
-rw-r--r--softfloat/e5m2_to_bf16.c98
-rw-r--r--softfloat/e5m2_to_f16.c98
-rw-r--r--softfloat/f16_to_e4m3.c89
-rw-r--r--softfloat/f16_to_e5m2.c89
-rw-r--r--softfloat/f32_to_e4m3.c90
-rw-r--r--softfloat/f32_to_e5m2.c91
-rw-r--r--softfloat/f32_to_i8.c58
-rw-r--r--softfloat/f32_to_ui8.c54
-rw-r--r--softfloat/internals.h60
-rw-r--r--softfloat/s_normSubnormalE4M3Sig.c52
-rw-r--r--softfloat/s_normSubnormalE5M2Sig.c51
-rw-r--r--softfloat/s_roundPackToE4M3.c119
-rw-r--r--softfloat/s_roundPackToE5M2.c117
-rw-r--r--softfloat/softfloat.h10
-rw-r--r--softfloat/softfloat.mk.in16
-rw-r--r--softfloat/softfloat_types.h16
-rw-r--r--softfloat/specialize.h30
-rw-r--r--spike_main/spike.cc8
254 files changed, 5253 insertions, 1110 deletions
diff --git a/.github/workflows/apt-packages.txt b/.github/workflows/apt-packages.txt
index e153391..cef0337 100644
--- a/.github/workflows/apt-packages.txt
+++ b/.github/workflows/apt-packages.txt
@@ -1,2 +1,4 @@
build-essential
device-tree-compiler
+g++-riscv64-linux-gnu
+libc6-dev-riscv64-cross
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
index 517c74e..c6f73d5 100644
--- a/.github/workflows/continuous-integration.yml
+++ b/.github/workflows/continuous-integration.yml
@@ -18,7 +18,7 @@ on:
jobs:
test:
name: Test Spike build (Ubuntu)
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
with:
@@ -26,7 +26,9 @@ jobs:
fetch-depth: 0
- name: Install Dependencies
- run: sudo xargs apt-get install -y < .github/workflows/apt-packages.txt
+ run: |
+ sudo apt-get update
+ sudo xargs apt-get install -y < .github/workflows/apt-packages.txt
- run: |
for commit in $(git rev-list origin/master..HEAD | tac); do
@@ -53,5 +55,4 @@ jobs:
git checkout $commit
echo "Checking commit $commit"
ci-tests/build-spike
- ci-tests/test-spike
done
diff --git a/.github/workflows/debug-smoke.yml b/.github/workflows/debug-smoke.yml
index 7d6cc00..7559616 100644
--- a/.github/workflows/debug-smoke.yml
+++ b/.github/workflows/debug-smoke.yml
@@ -13,12 +13,13 @@ on:
jobs:
test:
name: Test debug (Ubuntu)
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- name: Install Dependencies
run: |
+ sudo apt-get update
sudo xargs apt-get install -y < .github/workflows/apt-packages.txt
- name: Download OpenOCD
@@ -55,10 +56,10 @@ jobs:
./gdbserver.py targets/RISC-V/spike32.py --print-failures \
--gcc $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gcc \
--gdb $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gdb \
- --sim_cmd $GITHUB_WORKSPACE/build/install/bin/spike \
+ --sim_cmd $GITHUB_WORKSPACE/install/bin/spike \
--server_cmd $GITHUB_WORKSPACE/riscv-openocd/src/openocd
./gdbserver.py targets/RISC-V/spike64-2.py --print-failures \
--gcc $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gcc \
--gdb $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gdb \
- --sim_cmd $GITHUB_WORKSPACE/build/install/bin/spike \
+ --sim_cmd $GITHUB_WORKSPACE/install/bin/spike \
--server_cmd $GITHUB_WORKSPACE/riscv-openocd/src/openocd
diff --git a/README.md b/README.md
index b930631..83d4503 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ Spike supports the following RISC-V ISA features:
- Zbc extension, v1.0
- Zbs extension, v1.0
- Zfh and Zfhmin half-precision floating-point extensions, v1.0
+ - Zfa extension, v1.0
- Zfinx extension, v1.0
- Zmmul integer multiplication extension, v1.0
- Zicbom, Zicbop, Zicboz cache-block maintenance extensions, v1.0
@@ -78,6 +79,7 @@ Spike supports the following RISC-V ISA features:
- Zicond extension, v1.0
- Zilsd extension, v1.0
- Zclsd extension, v1.0
+ - Zimop extension, v1.0
Versioning and APIs
-------------------
diff --git a/ci-tests/.gitignore b/ci-tests/.gitignore
new file mode 100644
index 0000000..b2b07d4
--- /dev/null
+++ b/ci-tests/.gitignore
@@ -0,0 +1,3 @@
+/install
+/build
+/run
diff --git a/ci-tests/atomics.c b/ci-tests/atomics.c
new file mode 100644
index 0000000..ece5a38
--- /dev/null
+++ b/ci-tests/atomics.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdatomic.h>
+
+atomic_int acnt = 0;
+atomic_int bcnt = 0;
+
+int foo() {
+ for(int n = 0; n < 1000; ++n) {
+ ++acnt;
+ if(acnt % 10 == 0)
+ ++bcnt;
+ }
+ return acnt;
+}
+
+int main(void) {
+ int acnt = foo();
+ printf("First atomic counter is %u, second is %u\n", acnt, bcnt);
+ return 0;
+}
diff --git a/ci-tests/build-spike b/ci-tests/build-spike
index 0a1b315..ed7de8f 100755
--- a/ci-tests/build-spike
+++ b/ci-tests/build-spike
@@ -1,17 +1,25 @@
#!/bin/bash
set -e
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT=`git rev-parse --show-toplevel`
+NPROCS="$(nproc 2> /dev/null || sysctl -n hw.ncpu)"
+HERE=`pwd`
+CI="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+INSTALL=$HERE/install
+BUILD=$HERE/build
-rm -rf build
+rm -rf $INSTALL $BUILD
+mkdir $INSTALL $BUILD
-mkdir build
-cd build
-mkdir install
-CXXFLAGS="-Wnon-virtual-dtor" CFLAGS="-Werror -Wall -Wextra -Wvla" $DIR/../configure --prefix=`pwd`/install
-make -j"$(nproc 2> /dev/null || sysctl -n hw.ncpu)"
+# build spike
+mkdir $BUILD/spike
+cd $BUILD/spike
+CFLAGS="-Werror -Wall -Wextra -Wvla"
+CXXFLAGS="-Wnon-virtual-dtor $CFLAGS"
+CXXFLAGS="$CXXFLAGS" CFLAGS="$CFLAGS" $ROOT/configure --prefix=$INSTALL
+make -j$NPROCS
make check
make install install-hdrs-list.h
# check that help message prints without error
-install/bin/spike -h
+$INSTALL/bin/spike -h
diff --git a/ci-tests/create-ci-binary-tarball b/ci-tests/create-ci-binary-tarball
deleted file mode 100755
index 73a549e..0000000
--- a/ci-tests/create-ci-binary-tarball
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-set -e
-
-rm -rf build
-
-mkdir -p build/pk && cd "$_"
-`git rev-parse --show-toplevel`/../riscv-pk/configure --host=riscv64-unknown-elf --with-arch=rv64gc_zifencei
-make -j4
-cd -
-
-mkdir -p build/hello && cd "$_"
-riscv64-unknown-elf-gcc -O2 -o hello `git rev-parse --show-toplevel`/ci-tests/hello.c
-cd -
-
-mkdir -p build/dummy-slliuw && cd "$_"
-riscv64-unknown-elf-gcc -O2 -o dummy-slliuw `git rev-parse --show-toplevel`/ci-tests/dummy-slliuw.c
-cd -
-
-mkdir -p build/dummycsr && cd "$_"
-riscv64-unknown-elf-gcc -O2 -o customcsr `git rev-parse --show-toplevel`/ci-tests/customcsr.c
-cd -
-
-mv build/pk/pk .
-mv build/hello/hello .
-mv build/dummy-slliuw/dummy-slliuw .
-mv build/dummycsr/customcsr .
-tar -cf spike-ci.tar pk hello dummy-slliuw customcsr
-
-rm pk hello dummy-slliuw customcsr
diff --git a/ci-tests/custom-csr.cc b/ci-tests/custom-csr.cc
index 857c9c3..89b0149 100644
--- a/ci-tests/custom-csr.cc
+++ b/ci-tests/custom-csr.cc
@@ -37,7 +37,7 @@ struct xdummycsr_t : public extension_t {
}
};
-REGISTER_EXTENSION(dummycsr, []() { return new xdummycsr_t; })
+REGISTER_EXTENSION(dummycsr, []() { static xdummycsr_t ext; return &ext; })
// Copied from spike main.
// TODO: This should really be provided in libriscv
diff --git a/ci-tests/run-snippy-test.sh b/ci-tests/run-snippy-test.sh
new file mode 100755
index 0000000..f6a3c90
--- /dev/null
+++ b/ci-tests/run-snippy-test.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+set -e -x
+set -o pipefail
+
+ROOT="$1"
+NUMINSTRS="$2"
+BOOTCODE="$3"
+TRIPLE="$4"
+ARCH="$5"
+EXTENSIONS="$6"
+ABI="$7"
+SPIKE_PATH="$8"
+
+CONFIGDIR="$ROOT"/ci-tests/snippy-tests
+
+CONFIG="test-$ARCH-$ABI.yaml"
+
+base=$(basename "$CONFIG" .yaml)
+testfile="$base".elf
+tmpelf="$base".tmp.elf
+# exclude C_JR and C_JALR and some othe compressed opcodes as snippy has issues with them
+# exclude EBREAK/ECALL as we want non-privileged instructions
+# exclude lr.rl and sc.aq as they don't make sense
+"$CONFIGDIR"/generate-snippy-test.sh --march "$ARCH" --mtriple "$TRIPLE" --extensions "$EXTENSIONS" --num-instrs $NUMINSTRS --ignore-opcode-regex "C_JR|C_JALR|EBREAK|ECALL|C_.*(SP|HINT|UNIMP).*|LR_.*_RL|SC_.*_AQ" > "$CONFIG"
+llvm-snippy "$CONFIG" -o "$tmpelf" --seed 1 -riscv-disable-misaligned-access --layout-include-dir "$ROOT"/ci-tests/snippy-tests
+riscv64-linux-gnu-gcc -O0 -march="$ARCH" -mabi="$ABI" -T "$tmpelf".ld -T "$CONFIGDIR"/linker-entry.ld "$tmpelf" "$BOOTCODE" -nostdlib -static -o "$testfile" -Wl,--build-id=none
+
+error=0
+if ! timeout --foreground 60s "$SPIKE_PATH" -l --log-commits --isa "$ARCH" "$testfile"
+then
+ echo "TIMEOUT: $testfile"
+ error=1
+else
+ echo "SUCCESS: $testfile"
+fi
+exit $error
diff --git a/ci-tests/run-snippy-tests.sh b/ci-tests/run-snippy-tests.sh
new file mode 100755
index 0000000..8f67d3a
--- /dev/null
+++ b/ci-tests/run-snippy-tests.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+set -e -x
+
+WORKDIR="$1"
+CONFIGDIR="$2"
+SPIKE_PATH="$3"
+RESULTDIR="$WORKDIR"/snippy-tests
+
+mkdir -p "$WORKDIR"
+mkdir -p "$RESULTDIR"
+
+ROOT=`git rev-parse --show-toplevel`
+run_test_script="$ROOT"/ci-tests/run-snippy-test.sh
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei "i" lp64 "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei "i" ilp32 "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv64 rv64ic_zicsr_zifencei "c - d" lp64 "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv32 rv32ic_zicsr_zifencei "c - d" ilp32 "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifd_zicsr_zifencei "d - c - zfa - zvfh" lp64d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifd_zicsr_zifencei "d - c - zfa - zvfh" ilp32d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64if_zicsr_zifencei "f - c" lp64f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32if_zicsr_zifencei "f - c" ilp32f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifc_zicsr_zifencei "f" lp64f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifc_zicsr_zifencei "f" ilp32f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 2000 \
+ "$CONFIGDIR"/boot-code-vf.s riscv64 rv64gcv_zfa_zvfh "v" lp64d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifc_zicsr_zifencei_zfhmin "f + zfhmin - d" lp64f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifc_zicsr_zifencei_zfhmin "f + zfhmin - d" ilp32f "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei "d - zfa - zvfh" lp64d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei "d - zfa - zvfh" ilp32d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei_zfhmin "d - zfa + zfhmin" lp64d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei_zfhmin "d - zfa + zfhmin" ilp32d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei_zfh_zfa "d + zfh" lp64d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei_zfh_zfa "d + zfh" ilp32d "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei_zca "zca" lp64 "$SPIKE_PATH"
+
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei_zca "zca" ilp32 "$SPIKE_PATH"
+
+# rv32-only zcf
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32if_zicsr_zifencei_zca_zcf "zcf" ilp32f "$SPIKE_PATH"
+# zcd
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifd_zicsr_zifencei_zca_zcd "zca + zcd - zfa - zfh" lp64d "$SPIKE_PATH"
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifd_zicsr_zifencei_zca_zcd "zca + zcd - zfa - zfh" ilp32d "$SPIKE_PATH"
+# zcb
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei_zca_zcb_zmmul_zba_zbb "zca + zcb" lp64 "$SPIKE_PATH"
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei_zca_zcb_zmmul_zba_zbb "zca + zcb" ilp32 "$SPIKE_PATH"
+# zawrs
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv64 rv64ia_zicsr_zifencei_zawrs "zawrs + zalrsc" lp64 "$SPIKE_PATH"
+"$run_test_script" "$ROOT" 3000 \
+ "$CONFIGDIR"/boot-code.s riscv32 rv32ia_zicsr_zifencei_zawrs "zawrs + zalrsc" ilp32 "$SPIKE_PATH"
+
diff --git a/ci-tests/snippy-tests/boot-code-f.s b/ci-tests/snippy-tests/boot-code-f.s
new file mode 100644
index 0000000..b30fccc
--- /dev/null
+++ b/ci-tests/snippy-tests/boot-code-f.s
@@ -0,0 +1,41 @@
+.option norvc
+
+.global _entry
+.global fromhost
+.global tohost
+
+.text
+_entry:
+ la t0, exception_handler
+ csrw mtvec, t0
+ csrr t1, mstatus
+# Setting bit number 13 (mstatus.FS)
+ li t3, 1
+ slli t3, t3, 13
+ or t1, t1, t3
+ csrw mstatus, t1
+ la t0, SNIPPY_ENTRY
+ jalr t0
+
+exception_handler:
+ csrr x10, mcause
+# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize.
+# Otherwise it's not the expected behavior and we go into an infinite loop.
+ li x11, 3
+ beq x10, x11, exit
+ j infinite_loop
+
+exit:
+ li ra, 1
+ la sp, tohost
+ sw ra, 0(sp)
+
+infinite_loop:
+ j infinite_loop
+
+.balign 64
+tohost:
+.8byte 0x0
+.balign 64
+fromhost:
+.8byte 0x0
diff --git a/ci-tests/snippy-tests/boot-code-vf.s b/ci-tests/snippy-tests/boot-code-vf.s
new file mode 100644
index 0000000..8c32c0f
--- /dev/null
+++ b/ci-tests/snippy-tests/boot-code-vf.s
@@ -0,0 +1,45 @@
+.option norvc
+
+.global _entry
+.global fromhost
+.global tohost
+
+.text
+_entry:
+ la t0, exception_handler
+ csrw mtvec, t0
+ csrr t1, mstatus
+# Setting bit number 9 (mstatus.VS)
+ li t2, 1
+ slli t2, t2, 9
+ or t1, t1, t2
+# Setting bit number 13 (mstatus.FS)
+ li t3, 1
+ slli t3, t3, 13
+ or t1, t1, t3
+ csrw mstatus, t1
+ la t0, SNIPPY_ENTRY
+ jalr t0
+
+exception_handler:
+ csrr x10, mcause
+# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize.
+# Otherwise it's not the expected behavior and we go into an infinite loop.
+ li x11, 3
+ beq x10, x11, exit
+ j infinite_loop
+
+exit:
+ li ra, 1
+ la sp, tohost
+ sd ra, 0(sp)
+
+infinite_loop:
+ j infinite_loop
+
+.balign 64
+tohost:
+.8byte 0x0
+.balign 64
+fromhost:
+.8byte 0x0
diff --git a/ci-tests/snippy-tests/boot-code.s b/ci-tests/snippy-tests/boot-code.s
new file mode 100644
index 0000000..9dfae53
--- /dev/null
+++ b/ci-tests/snippy-tests/boot-code.s
@@ -0,0 +1,30 @@
+.option norvc
+.global _entry
+.global fromhost
+.global tohost
+.text
+_entry:
+ la t0, exception_handler
+ csrw mtvec, t0
+ la t0, SNIPPY_ENTRY
+ jalr t0
+ j exit
+exception_handler:
+ csrr x10, mcause
+# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize.
+# Otherwise it's not the expected behavior and we go into an infinite loop.
+ li x11, 3
+ beq x10, x11, exit
+ j infinite_loop
+exit:
+ li ra, 1
+ la sp, tohost
+ sw ra, 0(sp)
+infinite_loop:
+ j infinite_loop
+.balign 64
+tohost:
+.8byte 0x0
+fromhost:
+.8byte 0x0
+
diff --git a/ci-tests/snippy-tests/generate-snippy-test.sh b/ci-tests/snippy-tests/generate-snippy-test.sh
new file mode 100755
index 0000000..6e23768
--- /dev/null
+++ b/ci-tests/snippy-tests/generate-snippy-test.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+args=("$@")
+
+march=
+mtriple=
+extensions=
+ignore_regex='^$'
+includes=("./sections.yaml")
+num_instrs=1000
+
+usage() {
+ {
+ echo "isa-tests-gen.sh [options]"
+ echo " --march : Target architecture [required]"
+ echo " --mtriple : Target triple"
+ echo " --extensions : Target extensions string"
+ echo " --include : Additional include"
+ echo " --num-instrs : Number of instructions to generate (default: $num_instrs)"
+ echo " --ignore-opcode-regex : Regex to filter out opcodes"
+ echo " -h, --help : Print this help message"
+ } >&2
+}
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --march)
+ shift
+ march=$1
+ shift
+ ;;
+ --mtriple)
+ shift
+ mtriple=$1
+ shift
+ ;;
+ --extensions)
+ shift
+ extensions="$1"
+ shift
+ ;;
+ --include)
+ shift
+ includes+=("$1")
+ shift
+ ;;
+ --num-instrs)
+ shift
+ num_instrs="$1"
+ shift
+ ;;
+ --ignore-opcode-regex)
+ shift
+ ignore_regex="$1"
+ shift
+ ;;
+ -h | --help)
+ usage
+ exit 0
+ ;;
+ *)
+ usage
+ exit 1
+ ;;
+ esac
+done
+
+declare -a ie_args
+
+case $mtriple in
+riscv32)
+ ie_args+=("-arch=riscv" "--rv32" "-riscv-ext" "$extensions")
+ ;;
+riscv64)
+ ie_args+=("-arch=riscv" "--rv64" "-riscv-ext" "$extensions")
+ ;;
+*)
+ echo "error: Unrecognized --mtriple" >&2
+ usage
+ exit 1
+ ;;
+esac
+
+ie_args+=("--disable-pseudo")
+
+if [[ -z "$march" ]]; then
+ echo "error: --march hasn't been specified" >&2
+ exit 1
+fi
+
+if ! [ -x "$(command -v llvm-ie)" ]; then
+ echo "error: 'llvm-ie' is not in PATH" >&2
+ exit 1
+fi
+
+mapfile -t opcodes < <(llvm-ie "${ie_args[@]}")
+filtered_opcodes=()
+
+for opc in "${opcodes[@]}"; do
+ if [[ ! "$opc" =~ $ignore_regex ]]; then
+ filtered_opcodes+=("$opc")
+ fi
+done
+
+if [[ ${#filtered_opcodes[@]} -eq 0 ]]; then
+ echo "error: No opcodes matched" >&2
+ exit 1
+fi
+
+echo "# generated with" "$(basename "$0"), to regenerate run:"
+printf "# %s" "$(basename "$0")"
+for arg in "${args[@]}"; do
+ printf " %q" "$arg"
+done
+printf "\n"
+echo "include:"
+for inc in "${includes[@]}"; do
+ echo " - \"$inc\""
+done
+
+cat <<EOF
+options:
+ march: ${march}
+ mtriple: ${mtriple}
+ num-instrs: ${num_instrs}
+ model-plugin: None
+ entry-point: SNIPPY_ENTRY
+ riscv-init-fregs-from-memory: true
+histogram:
+EOF
+for opc in "${filtered_opcodes[@]}"; do
+ echo " - [$opc, 1.0]"
+done
+
+printf "\n"
diff --git a/ci-tests/snippy-tests/linker-entry.ld b/ci-tests/snippy-tests/linker-entry.ld
new file mode 100644
index 0000000..f94c957
--- /dev/null
+++ b/ci-tests/snippy-tests/linker-entry.ld
@@ -0,0 +1 @@
+ENTRY(_entry)
diff --git a/ci-tests/snippy-tests/sections.yaml b/ci-tests/snippy-tests/sections.yaml
new file mode 100644
index 0000000..bc5a399
--- /dev/null
+++ b/ci-tests/snippy-tests/sections.yaml
@@ -0,0 +1,22 @@
+sections:
+ - name: 0
+ VMA: 0x80000000
+ SIZE: 0x10000
+ LMA: 0x80000000
+ ACCESS: r
+ - name: 1
+ VMA: 0x80020000
+ SIZE: 0x20000
+ LMA: 0x80020000
+ ACCESS: rx
+ - name: 2
+ VMA: 0x80040000
+ SIZE: 0x10000
+ LMA: 0x80040000
+ ACCESS: rw
+ - name: stack
+ VMA: 0x80050000
+ SIZE: 0x10000
+ LMA: 0x80050000
+ ACCESS: rw
+
diff --git a/ci-tests/test-customext.cc b/ci-tests/test-customext.cc
index 77c739f..90cdb35 100644
--- a/ci-tests/test-customext.cc
+++ b/ci-tests/test-customext.cc
@@ -46,7 +46,7 @@ struct xslliuw_dummy_t : public extension_t {
}
};
-REGISTER_EXTENSION(dummyslliuw, []() { return new xslliuw_dummy_t; })
+REGISTER_EXTENSION(dummyslliuw, []() { static xslliuw_dummy_t ext; return &ext; })
// Copied from spike main.
// TODO: This should really be provided in libriscv
diff --git a/ci-tests/test-spike b/ci-tests/test-spike
index 36b748a..ebec4c6 100755
--- a/ci-tests/test-spike
+++ b/ci-tests/test-spike
@@ -1,25 +1,51 @@
#!/bin/bash
set -e
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+ROOT=`git rev-parse --show-toplevel`
+NPROCS="$(nproc 2> /dev/null || sysctl -n hw.ncpu)"
+HERE=`pwd`
+CI="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+INSTALL=$HERE/install
+BUILD=$HERE/build
+RUN=$HERE/run
-cd build
+# build pk
+rm -rf $BUILD/pk
+mkdir $BUILD/pk
+cd $BUILD/pk
+git clone https://github.com/riscv-software-src/riscv-pk.git
+riscv-pk/configure --host=riscv64-linux-gnu --prefix=$INSTALL
+make -j$NPROCS
+make install
-# run a program and check for correct output
-mkdir run
-cd run
-wget https://github.com/riscv-software-src/riscv-isa-sim/releases/download/dummy-tag-for-ci-storage/spike-ci.tar
-tar xf spike-ci.tar
-time ../install/bin/spike --isa=rv64gc pk hello | grep "Hello, world! Pi is approximately 3.141588."
+# build tests
+rm -rf $RUN
+mkdir -p $RUN
+cd $RUN
+riscv64-linux-gnu-gcc -static -O2 -o hello $CI/hello.c
+riscv64-linux-gnu-gcc -static -O2 -o dummy-slliuw $CI/dummy-slliuw.c
+riscv64-linux-gnu-gcc -static -O2 -o customcsr $CI/customcsr.c
+riscv64-linux-gnu-gcc -static -O2 -o atomics $CI/atomics.c
+
+# run snippy-based tests
+wget https://github.com/syntacore/snippy/releases/download/snippy-2.1/snippy-x86_64-linux.tar.xz
+tar xf snippy-x86_64-linux.tar.xz
+
+# test that snippy runs
+bin/llvm-snippy --version | grep "Snippy version: 2.1.0"
+PATH="$PATH:$RUN/bin" "$ROOT"/ci-tests/run-snippy-tests.sh "$RUN" "$ROOT"/ci-tests/snippy-tests "$INSTALL"/bin/spike
# check that including sim.h in an external project works
-g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o test-libriscv
-g++ -std=c++2a -I../install/include -L../install/lib $DIR/test-customext.cc -lriscv -o test-customext
-g++ -std=c++2a -I../install/include -L../install/lib $DIR/custom-csr.cc -lriscv -o test-custom-csr
+g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o test-libriscv
+g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/test-customext.cc -lriscv -o test-customext
+g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/custom-csr.cc -lriscv -o test-custom-csr
# check that all installed headers are functional
-g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o /dev/null -include ../install-hdrs-list.h
+g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o /dev/null -include $BUILD/spike/install-hdrs-list.h
-LD_LIBRARY_PATH=../install/lib ./test-libriscv pk hello| grep "Hello, world! Pi is approximately 3.141588."
-LD_LIBRARY_PATH=../install/lib ./test-customext pk dummy-slliuw | grep "Executed successfully"
-LD_LIBRARY_PATH=../install/lib ./test-custom-csr pk customcsr | grep "Executed successfully"
+# run tests
+time $INSTALL/bin/spike --isa=rv64gc $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588."
+$INSTALL/bin/spike --log-commits --isa=rv64gc $BUILD/pk/pk atomics 2> /dev/null | grep "First atomic counter is 1000, second is 100"
+LD_LIBRARY_PATH=$INSTALL/lib ./test-libriscv $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588."
+LD_LIBRARY_PATH=$INSTALL/lib ./test-customext $BUILD/pk/pk dummy-slliuw | grep "Executed successfully"
+LD_LIBRARY_PATH=$INSTALL/lib ./test-custom-csr $BUILD/pk/pk customcsr | grep "Executed successfully"
diff --git a/customext/cflush.cc b/customext/cflush.cc
index c090e88..5a9d279 100644
--- a/customext/cflush.cc
+++ b/customext/cflush.cc
@@ -40,4 +40,4 @@ class cflush_t : public extension_t
}
};
-REGISTER_EXTENSION(cflush, []() { return new cflush_t; })
+REGISTER_EXTENSION(cflush, []() { static cflush_t ext; return &ext; })
diff --git a/customext/dummy_rocc.cc b/customext/dummy_rocc.cc
index 6669887..bc23939 100644
--- a/customext/dummy_rocc.cc
+++ b/customext/dummy_rocc.cc
@@ -44,4 +44,4 @@ class dummy_rocc_t : public rocc_t
reg_t acc[num_acc];
};
-REGISTER_EXTENSION(dummy_rocc, []() { return new dummy_rocc_t; })
+REGISTER_EXTENSION(dummy_rocc, []() { static dummy_rocc_t ext; return &ext; })
diff --git a/debug_rom/debug_rom.S b/debug_rom/debug_rom.S
index 2d36139..378c568 100755
--- a/debug_rom/debug_rom.S
+++ b/debug_rom/debug_rom.S
@@ -7,6 +7,19 @@
.global entry
.global exception
+// This macro handles mem access with proper management of the MPRVEN
+// Usage: MEMORY_ACCESS_WITH_MPRV(<your code>)
+#define MEMORY_ACCESS_WITH_MPRV(...) \
+ csrrci s0, CSR_DCSR, DCSR_MPRVEN; \
+ andi s0, s0, DCSR_MPRVEN; \
+ bnez s0, 1f; \
+ __VA_ARGS__; \
+ j 2f; \
+1: \
+ __VA_ARGS__; \
+ csrrsi zero, CSR_DCSR, DCSR_MPRVEN; \
+2:
+
// Entry location on ebreak, Halt, or Breakpoint
// It is the same for all harts. They branch when
// their GO or RESUME bit is set.
@@ -30,13 +43,22 @@ _entry:
// We keep checking both whether there is something the debugger wants
// us to do, or whether we should resume.
entry_loop:
- csrr s0, CSR_MHARTID
- sw s0, DEBUG_ROM_HALTED(zero)
- lbu s0, DEBUG_ROM_FLAGS(s0) // 1 byte flag per hart. Only one hart advances here.
+ // 1 byte flag per hart. Only one hart advances here.
+ MEMORY_ACCESS_WITH_MPRV(
+ csrr s0, CSR_MHARTID;
+ sw s0, DEBUG_ROM_HALTED(zero);
+ lbu s0, DEBUG_ROM_FLAGS(s0);
+ )
+
andi s0, s0, (1 << DEBUG_ROM_FLAG_GO)
bnez s0, going
- csrr s0, CSR_MHARTID
- lbu s0, DEBUG_ROM_FLAGS(s0) // multiple harts can resume here
+
+ // multiple harts can resume here
+ MEMORY_ACCESS_WITH_MPRV(
+ csrr s0, CSR_MHARTID;
+ lbu s0, DEBUG_ROM_FLAGS(s0);
+ )
+
andi s0, s0, (1 << DEBUG_ROM_FLAG_RESUME)
bnez s0, _resume
wfi
@@ -46,13 +68,23 @@ _exception:
// Restore S0, which we always save to dscratch.
// We need this in case the user tried an abstract write to a
// non-existent CSR.
- csrr s0, CSR_DSCRATCH0
- sw zero, DEBUG_ROM_EXCEPTION(zero) // Let debug module know you got an exception.
+
+
+ // Let debug module know you got an exception.
+ MEMORY_ACCESS_WITH_MPRV(
+ csrr s0, CSR_DSCRATCH0;
+ sw zero, DEBUG_ROM_EXCEPTION(zero);
+ )
+
ebreak
going:
- csrr s0, CSR_MHARTID
- sw s0, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset.
+ // When debug module sees this write, the GO flag is reset.
+ MEMORY_ACCESS_WITH_MPRV(
+ csrr s0, CSR_MHARTID;
+ sw s0, DEBUG_ROM_GOING(zero);
+ )
+
csrr s0, CSR_DSCRATCH0 // Restore s0 here
fence
fence.i
@@ -61,8 +93,12 @@ going:
// because jalr is special there)
_resume:
- csrr s0, CSR_MHARTID
- sw s0, DEBUG_ROM_RESUMING(zero) // When Debug Module sees this write, the RESUME flag is reset.
+ // When Debug Module sees this write, the RESUME flag is reset.
+ MEMORY_ACCESS_WITH_MPRV(
+ csrr s0, CSR_MHARTID;
+ sw s0, DEBUG_ROM_RESUMING(zero);
+ )
+
csrr s0, CSR_DSCRATCH0 // Restore s0
dret
diff --git a/debug_rom/debug_rom.h b/debug_rom/debug_rom.h
index 7edd5f6..d3d89a2 100644
--- a/debug_rom/debug_rom.h
+++ b/debug_rom/debug_rom.h
@@ -1,13 +1,25 @@
static const unsigned char debug_rom_raw[] = {
- 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x00, 0x06, 0x6f, 0x00, 0x80, 0x03,
- 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x40, 0xf1,
- 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x13, 0x74, 0x14, 0x00,
- 0x63, 0x14, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40,
- 0x13, 0x74, 0x24, 0x00, 0x63, 0x18, 0x04, 0x02, 0x73, 0x00, 0x50, 0x10,
- 0x6f, 0xf0, 0x9f, 0xfd, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10,
- 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x22, 0x80, 0x10,
- 0x73, 0x24, 0x20, 0x7b, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00,
- 0x67, 0x00, 0x00, 0x30, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10,
+ 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x40, 0x0d, 0x6f, 0x00, 0x40, 0x07,
+ 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x74, 0x08, 0x7b,
+ 0x13, 0x74, 0x04, 0x01, 0x63, 0x1a, 0x04, 0x00, 0x73, 0x24, 0x40, 0xf1,
+ 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x6f, 0x00, 0x40, 0x01,
+ 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40,
+ 0x73, 0x60, 0x08, 0x7b, 0x13, 0x74, 0x14, 0x00, 0x63, 0x10, 0x04, 0x06,
+ 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00,
+ 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, 0x6f, 0x00, 0x00, 0x01,
+ 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, 0x73, 0x60, 0x08, 0x7b,
+ 0x13, 0x74, 0x24, 0x00, 0x63, 0x14, 0x04, 0x06, 0x73, 0x00, 0x50, 0x10,
+ 0x6f, 0xf0, 0xdf, 0xf9, 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01,
+ 0x63, 0x18, 0x04, 0x00, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10,
+ 0x6f, 0x00, 0x00, 0x01, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10,
+ 0x73, 0x60, 0x08, 0x7b, 0x73, 0x00, 0x10, 0x00, 0x73, 0x74, 0x08, 0x7b,
+ 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00, 0x73, 0x24, 0x40, 0xf1,
+ 0x23, 0x22, 0x80, 0x10, 0x6f, 0x00, 0x00, 0x01, 0x73, 0x24, 0x40, 0xf1,
+ 0x23, 0x22, 0x80, 0x10, 0x73, 0x60, 0x08, 0x7b, 0x73, 0x24, 0x20, 0x7b,
+ 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, 0x67, 0x00, 0x00, 0x30,
+ 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00,
+ 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x6f, 0x00, 0x00, 0x01,
+ 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x73, 0x60, 0x08, 0x7b,
0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b
};
-static const unsigned int debug_rom_raw_len = 116;
+static const unsigned int debug_rom_raw_len = 260;
diff --git a/disasm/disasm.cc b/disasm/disasm.cc
index 49f2794..7b505b0 100644
--- a/disasm/disasm.cc
+++ b/disasm/disasm.cc
@@ -123,6 +123,12 @@ struct : public arg_t {
struct : public arg_t {
std::string to_string(insn_t insn) const {
+ return frm_name(insn.rm());
+ }
+} rm;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
return fpr_name[insn.rd()];
}
} frd;
@@ -562,6 +568,12 @@ struct : public arg_t {
struct : public arg_t {
std::string to_string(insn_t insn) const {
+ return std::to_string((int)insn.b_imm5());
+ }
+} b_imm5;
+
+struct : public arg_t {
+ std::string to_string(insn_t insn) const {
return std::to_string((int)insn.bs());
}
} bs;
@@ -637,7 +649,17 @@ static void NOINLINE add_fstore_insn(disassembler_t* d, const char* name, uint32
static void NOINLINE add_xamo_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
{
- d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs2, &base_only_address}));
+ const char *suffix[] = {"", ".rl", ".aq", ".aqrl"};
+ char new_name[128];
+ uint32_t new_mask = mask | (0x3 << 25);
+ uint32_t new_match;
+
+ for (uint32_t idx = 0; idx < sizeof(suffix) / sizeof(suffix[0]); ++idx) {
+ snprintf(new_name, sizeof(new_name), "%s%s", name, suffix[idx]);
+ new_match = match | (idx << 25);
+
+ d->add_insn(new disasm_insn_t(new_name, new_match, new_mask, {&xrd, &xrs2, &base_only_address}));
+ }
}
static void NOINLINE add_xlr_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
@@ -655,6 +677,11 @@ static void NOINLINE add_btype_insn(disassembler_t* d, const char* name, uint32_
d->add_insn(new disasm_insn_t(name, match, mask, {&xrs1, &xrs2, &branch_target}));
}
+static void NOINLINE add_bimmtype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
+{
+ d->add_insn(new disasm_insn_t(name, match, mask, {&xrs1, &b_imm5, &branch_target}));
+}
+
static void NOINLINE add_b1type_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
{
const uint32_t mask_rs2 = 0x1fUL << 20;
@@ -696,6 +723,11 @@ static void NOINLINE add_fx2type_insn(disassembler_t* d, const char* name, uint3
d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &frs1, &frs2}));
}
+static void NOINLINE add_fxrtype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
+{
+ d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &frs1, &rm}));
+}
+
static void NOINLINE add_flitype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask)
{
d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &fli_imm}));
@@ -833,6 +865,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
#define DEFINE_PREFETCH(code) DISASM_INSN(#code, code, 0, {&store_address})
#define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm})
#define DEFINE_BTYPE(code) add_btype_insn(this, #code, match_##code, mask_##code);
+ #define DEFINE_BIMMTYPE(code) add_bimmtype_insn(this, #code, match_##code, mask_##code);
#define DEFINE_B1TYPE(name, code) add_b1type_insn(this, name, match_##code, mask_##code);
#define DEFINE_XLOAD(code) add_xload_insn(this, #code, match_##code, mask_##code);
#define DEFINE_XSTORE(code) add_xstore_insn(this, #code, match_##code, mask_##code);
@@ -846,6 +879,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
#define DEFINE_FR3TYPE(code) add_fr3type_insn(this, #code, match_##code, mask_##code);
#define DEFINE_FXTYPE(code) add_fxtype_insn(this, #code, match_##code, mask_##code);
#define DEFINE_FX2TYPE(code) add_fx2type_insn(this, #code, match_##code, mask_##code);
+ #define DEFINE_FXRTYPE(code) add_fxrtype_insn(this, #code, match_##code, mask_##code);
#define DEFINE_FLITYPE(code) add_flitype_insn(this, #code, match_##code, mask_##code);
#define DEFINE_XFTYPE(code) add_xftype_insn(this, #code, match_##code, mask_##code);
#define DEFINE_XF2TYPE(code) add_xf2type_insn(this, #code, match_##code, mask_##code);
@@ -1260,6 +1294,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
DEFINE_FR1TYPE(froundnx_d);
DEFINE_FX2TYPE(fleq_d);
DEFINE_FX2TYPE(fltq_d);
+ DEFINE_FXRTYPE(fcvtmod_w_d);
if (xlen_eq(32)) {
DEFINE_XF2TYPE(fmvp_d_x);
@@ -1368,6 +1403,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
//DEFINE_R1TYPE(fcvt_q_h);
}
+ if (ext_enabled(EXT_ZIBI)) {
+ DEFINE_BIMMTYPE(beqi)
+ DEFINE_BIMMTYPE(bnei)
+ }
+
if (ext_enabled('Q')) {
DEFINE_FLOAD(flq)
DEFINE_FSTORE(fsq)
@@ -1946,6 +1986,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
#undef DISASM_VFUNARY0_INSN
}
+ if (ext_enabled(EXT_ZVFOFP4MIN)) {
+ DEFINE_VECTOR_V(vfext_vf2);
+ }
+
+ if (ext_enabled(EXT_ZVFOFP8MIN)) {
+ DEFINE_VECTOR_V(vfncvt_f_f_q);
+ DEFINE_VECTOR_V(vfncvt_sat_f_f_q);
+ DEFINE_VECTOR_V(vfncvtbf16_sat_f_f_w);
+ }
+
if (ext_enabled(EXT_ZVFBFMIN)) {
DEFINE_VECTOR_V(vfncvtbf16_f_f_w);
DEFINE_VECTOR_V(vfwcvtbf16_f_f_v);
diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc
index baedc3f..930ef47 100644
--- a/disasm/isa_parser.cc
+++ b/disasm/isa_parser.cc
@@ -130,6 +130,12 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
// Zvfh implies Zfhmin
extension_table[EXT_ZFHMIN] = true;
}
+ } else if (ext_str == "zvfbfa") {
+ extension_table[EXT_ZVFBFA] = true;
+ } else if (ext_str == "zvfofp4min") {
+ extension_table[EXT_ZVFOFP4MIN] = true;
+ } else if (ext_str == "zvfofp8min") {
+ extension_table[EXT_ZVFOFP8MIN] = true;
} else if (ext_str == "zicsr") {
// Spike necessarily has Zicsr, because
// Zicsr is implied by the privileged architecture
@@ -140,6 +146,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
// HINTs encoded in base-ISA instructions are always present.
} else if (ext_str == "zihintntl") {
// HINTs encoded in base-ISA instructions are always present.
+ } else if (ext_str == "ziccid") {
+ extension_table[EXT_ZICCID] = true;
+ } else if (ext_str == "ziccif") {
+ // aligned instruction fetch is always atomic in Spike
} else if (ext_str == "zaamo") {
extension_table[EXT_ZAAMO] = true;
} else if (ext_str == "zalrsc") {
@@ -201,6 +211,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_ZCMP] = true;
} else if (ext_str == "zcmt") {
extension_table[EXT_ZCMT] = true;
+ } else if (ext_str == "zibi") {
+ extension_table[EXT_ZIBI] = true;
} else if (ext_str == "zk") {
extension_table[EXT_ZBKB] = true;
extension_table[EXT_ZBKC] = true;
@@ -239,6 +251,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_SMEPMP] = true;
} else if (ext_str == "smstateen") {
extension_table[EXT_SMSTATEEN] = true;
+ } else if (ext_str == "smpmpmt") {
+ extension_table[EXT_SMPMPMT] = true;
} else if (ext_str == "smrnmi") {
extension_table[EXT_SMRNMI] = true;
} else if (ext_str == "sscofpmf") {
@@ -253,6 +267,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_SVPBMT] = true;
} else if (ext_str == "svinval") {
extension_table[EXT_SVINVAL] = true;
+ } else if (ext_str == "svukte") {
+ if (max_xlen != 64)
+ bad_isa_string(str, "'svukte' requires RV64");
+ extension_table[EXT_SVUKTE] = true;
} else if (ext_str == "zfa") {
extension_table[EXT_ZFA] = true;
} else if (ext_str == "zicbom") {
@@ -272,7 +290,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_ZILSD] = true;
} else if (ext_str == "zclsd") {
extension_table[EXT_ZCLSD] = true;
+ } else if (ext_str == "zvkb") {
+ extension_table[EXT_ZVKB] = true;
} else if (ext_str == "zvbb") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
} else if (ext_str == "zvbc") {
extension_table[EXT_ZVBC] = true;
@@ -283,15 +304,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
} else if (ext_str == "zvkg") {
extension_table[EXT_ZVKG] = true;
} else if (ext_str == "zvkn") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKNED] = true;
extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvknc") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVBC] = true;
extension_table[EXT_ZVKNED] = true;
extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvkng") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKG] = true;
extension_table[EXT_ZVKNED] = true;
@@ -303,15 +327,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
} else if (ext_str == "zvknhb") {
extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvks") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKSED] = true;
extension_table[EXT_ZVKSH] = true;
} else if (ext_str == "zvksc") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVBC] = true;
extension_table[EXT_ZVKSED] = true;
extension_table[EXT_ZVKSH] = true;
} else if (ext_str == "zvksg") {
+ extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKG] = true;
extension_table[EXT_ZVKSED] = true;
@@ -322,6 +349,24 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_ZVKSH] = true;
} else if (ext_str == "zvqdotq") {
extension_table[EXT_ZVQDOTQ] = true;
+ } else if (ext_str == "zvqbdot8i") {
+ extension_table[EXT_ZVQBDOT8I] = true;
+ } else if (ext_str == "zvqbdot16i") {
+ extension_table[EXT_ZVQBDOT16I] = true;
+ } else if (ext_str == "zvfqbdot8f") {
+ extension_table[EXT_ZVFQBDOT8F] = true;
+ } else if (ext_str == "zvfwbdot16bf") {
+ extension_table[EXT_ZVFWBDOT16BF] = true;
+ } else if (ext_str == "zvfbdot32f") {
+ extension_table[EXT_ZVFBDOT32F] = true;
+ } else if (ext_str == "zvqldot8i") {
+ extension_table[EXT_ZVQLDOT8I] = true;
+ } else if (ext_str == "zvqldot16i") {
+ extension_table[EXT_ZVQLDOT16I] = true;
+ } else if (ext_str == "zvfqldot8f") {
+ extension_table[EXT_ZVFQLDOT8F] = true;
+ } else if (ext_str == "zvfwldot16bf") {
+ extension_table[EXT_ZVFWLDOT16BF] = true;
} else if (ext_str == "zvkt") {
} else if (ext_str == "sstc") {
extension_table[EXT_SSTC] = true;
@@ -364,6 +409,9 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
bad_isa_string(str, ("Invalid Zvl string: " + ext_str).c_str());
vlen = std::max(vlen, new_vlen);
} else if (ext_str.substr(0, 3) == "zve") {
+ if (ext_str.size() != 6) {
+ bad_isa_string(str, ("Invalid Zve string: " + ext_str).c_str());
+ }
reg_t new_elen;
try {
new_elen = safe_stoul(ext_str.substr(3, ext_str.size() - 4));
@@ -382,10 +430,19 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
if (new_elen != 32 && new_elen != 64)
bad_isa_string(str, ("Invalid Zve string: " + ext_str).c_str());
elen = std::max(elen, new_elen);
+ vlen = std::max(vlen, new_elen);
} else if (ext_str == "ssdbltrp") {
extension_table[EXT_SSDBLTRP] = true;
} else if (ext_str == "smdbltrp") {
extension_table[EXT_SMDBLTRP] = true;
+ } else if (ext_str == "smaia") {
+ extension_table[EXT_SMAIA] = true;
+ extension_table[EXT_SSAIA] = true;
+ extension_table[EXT_SMCSRIND] = true;
+ extension_table[EXT_SSCSRIND] = true;
+ } else if (ext_str == "ssaia") {
+ extension_table[EXT_SSAIA] = true;
+ extension_table[EXT_SSCSRIND] = true;
} else if (ext_str[0] == 'x') {
extension_table['X'] = true;
if (ext_str.size() == 1) {
@@ -433,16 +490,32 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
bad_isa_string(str, "'Zclsd' extension requires 'Zca' and 'Zilsd' extensions");
}
- if (extension_table[EXT_ZFBFMIN] && !extension_table['F']) {
+ if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZFHMIN]) {
+ extension_table[EXT_INTERNAL_ZFH_MOVE] = true;
+ }
+
+ if (extension_table[EXT_ZFBFMIN] && (!extension_table['F'])) {
bad_isa_string(str, "'Zfbfmin' extension requires 'F' extension");
}
- if ((extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZVFBFWMA]) && !extension_table['V']) {
- bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension");
+ if (extension_table[EXT_ZVFBFMIN] && (vlen == 0 || !zvf)) {
+ bad_isa_string(str, "'Zvfbfmin' extension requires 'Zve32f' extension");
}
- if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZFHMIN]) {
- extension_table[EXT_INTERNAL_ZFH_MOVE] = true;
+ if (extension_table[EXT_ZVFBFA] && (!has_any_vector() || !extension_table[EXT_ZFBFMIN] || !get_zvf())) {
+ bad_isa_string(str, "'zvfbfa' extension requires at least 'Zve32f', and 'Zfbfmin'");
+ }
+
+ if (extension_table[EXT_ZVFBFWMA] && (!extension_table[EXT_ZFBFMIN] || !extension_table[EXT_ZVFBFMIN])) {
+ bad_isa_string(str, "'Zvfbfwma' extension requires 'Zfbfmin' and 'Zvfbfmin' extensions");
+ }
+
+ if (extension_table[EXT_ZVFOFP4MIN] && (!has_any_vector() || !get_zvf())) {
+ bad_isa_string(str, "'Zvfofp4min' extension requires either 'V' or 'Zve32f' extension");
+ }
+
+ if (extension_table[EXT_ZVFOFP8MIN] && (!has_any_vector() || !get_zvf())) {
+ bad_isa_string(str, "'Zvfofp8min' extension requires either 'V' or 'Zve32f' extension");
}
if (extension_table[EXT_ZFINX] && extension_table['F']) {
@@ -475,7 +548,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
}
if (extension_table[EXT_ZAWRS] && !extension_table[EXT_ZALRSC]) {
- bad_isa_string(str, "'Zabha' extension requires either the 'A' or the 'Zalrsc' extension");
+ bad_isa_string(str, "'Zawrs' extension requires either the 'A' or the 'Zalrsc' extension");
}
// When SSE is 0, Zicfiss behavior is defined by Zicmop
@@ -498,11 +571,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
#endif
if (vlen > 4096) {
- bad_isa_string(str, "Spike does not currently support VLEN > 4096b");
- }
-
- if ((vlen != 0) ^ (elen != 0)) {
- bad_isa_string(str, "Invalid Zvl/Zve configuration");
+ bad_isa_string(str, "Spike does not support VLEN > 4096");
}
if (extension_table[EXT_ZVFHMIN] && (vlen == 0 || elen == 0 || !zvf)) {
diff --git a/disasm/regnames.cc b/disasm/regnames.cc
index 0a7fd4d..42b1328 100644
--- a/disasm/regnames.cc
+++ b/disasm/regnames.cc
@@ -31,3 +31,21 @@ const char* csr_name(int which) {
}
return "unknown-csr";
}
+
+const char* frm_name(int which) {
+ switch (which) {
+ case 0:
+ return "rne";
+ case 1:
+ return "rtz";
+ case 2:
+ return "rdn";
+ case 3:
+ return "rup";
+ case 4:
+ return "rmm";
+ case 7:
+ return "dyn";
+ }
+ return "unknown-frm";
+}
diff --git a/fesvr/term.cc b/fesvr/term.cc
index c4cba0c..e0acff6 100644
--- a/fesvr/term.cc
+++ b/fesvr/term.cc
@@ -1,9 +1,13 @@
#include "term.h"
+#include "common.h"
#include <termios.h>
#include <unistd.h>
#include <poll.h>
#include <signal.h>
#include <stdlib.h>
+#include <string.h>
+
+static int tcsetattr_ttou(int fd, int optional_actions, const struct termios *p);
class canonical_termios_t
{
@@ -15,7 +19,7 @@ class canonical_termios_t
{
struct termios new_tios = old_tios;
new_tios.c_lflag &= ~(ICANON | ECHO);
- if (tcsetattr(0, TCSANOW, &new_tios) == 0)
+ if (tcsetattr_ttou(0, TCSANOW, &new_tios) == 0)
restore_tios = true;
}
}
@@ -23,7 +27,7 @@ class canonical_termios_t
~canonical_termios_t()
{
if (restore_tios)
- tcsetattr(0, TCSANOW, &old_tios);
+ tcsetattr_ttou(0, TCSANOW, &old_tios);
}
private:
struct termios old_tios;
@@ -51,3 +55,34 @@ void canonical_terminal_t::write(char ch)
if (::write(1, &ch, 1) != 1)
abort();
}
+
+static volatile sig_atomic_t sigttou_caught;
+
+static void sigttou_handler(int UNUSED signum) {
+ sigttou_caught = 1;
+}
+
+static int tcsetattr_ttou(int fd, int optional_actions, const struct termios *p)
+{
+ struct sigaction sa, old_sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = sigttou_handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGTTOU, &sa, &old_sa))
+ abort();
+
+ sigttou_caught = 0;
+
+ int result = tcsetattr(fd, optional_actions, p);
+
+ if (sigttou_caught) {
+ sigaction(SIGTTOU, &old_sa, NULL);
+ return -1;
+ }
+
+ if (sigaction(SIGTTOU, &old_sa, NULL))
+ abort();
+
+ return result;
+}
diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h
index d8ddbab..41f5c3f 100644
--- a/riscv/abstract_device.h
+++ b/riscv/abstract_device.h
@@ -46,4 +46,11 @@ mmio_device_map_t& mmio_device_map();
std::string generate_dts(const sim_t* sim, const std::vector<std::string>& sargs) const override { return generate(sim, sargs); } \
}; device_factory_t *name##_factory = new name##_factory_t();
+#define REGISTER_BUILTIN_DEVICE(name, parse, generate) \
+ class name##_factory_t : public device_factory_t { \
+ public: \
+ name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& sargs) const override { return parse(fdt, sim, base, sargs); } \
+ std::string generate_dts(const sim_t* sim, const std::vector<std::string>& sargs) const override { return generate(sim, sargs); } \
+ }; device_factory_t *name##_factory = new name##_factory_t();
+
#endif
diff --git a/riscv/bloom_filter.h b/riscv/bloom_filter.h
new file mode 100644
index 0000000..a3285bd
--- /dev/null
+++ b/riscv/bloom_filter.h
@@ -0,0 +1,64 @@
+// See LICENSE for license details.
+
+#ifndef _RISCV_BLOOM_FILTER_H
+#define _RISCV_BLOOM_FILTER_H
+
+#include <bitset>
+#include <cstdint>
+
+struct simple_hash1 {
+ uint64_t operator()(uint64_t x) const
+ {
+ x = (x ^ (x >> 33)) * 0xff51afd7ed558ccd;
+ x = (x ^ (x >> 33)) * 0xc4ceb9fe1a85ec53;
+ return x ^ (x >> 33);
+ }
+};
+
+struct simple_hash2 {
+ uint64_t operator()(uint64_t x) const
+ {
+ x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
+ x = (x ^ (x >> 27)) * 0x94d049b13c66a8ed;
+ return x ^ (x >> 31);
+ }
+};
+
+template <typename T, typename H1, typename H2, size_t M, size_t K> // M: bit array size, K: number of hash functions
+class bloom_filter_t {
+ public:
+ void clear()
+ {
+ bits.reset();
+ }
+
+ void insert(T value)
+ {
+ uint64_t h1 = H1()(value);
+ uint64_t h2 = H2()(value);
+
+ for (size_t i = 0; i < K; i++) {
+ size_t idx = (h1 + i * h2) % M;
+ bits[idx] = true;
+ }
+ }
+
+ bool contains(T value) const
+ {
+ uint64_t h1 = H1()(value);
+ uint64_t h2 = H2()(value);
+
+ for (size_t i = 0; i < K; i++) {
+ size_t idx = (h1 + i * h2) % M;
+ if (!bits[idx])
+ return false;
+ }
+
+ return true;
+ }
+
+ private:
+ std::bitset<M> bits;
+};
+
+#endif
diff --git a/riscv/bulknormdot.h b/riscv/bulknormdot.h
new file mode 100644
index 0000000..37981ae
--- /dev/null
+++ b/riscv/bulknormdot.h
@@ -0,0 +1,328 @@
+#ifndef _RISCV_BULKNORMDOT_H
+#define _RISCV_BULKNORMDOT_H
+
+#include <cstdint>
+#include <vector>
+#include "softfloat.h"
+
+struct bulk_norm_out_t {
+ uint32_t out;
+ uint8_t flags;
+};
+
+template<typename T>
+static int int_log2(T n)
+{
+ int res = 0;
+ while (n >>= 1)
+ res++;
+ return res;
+}
+
+template<typename T>
+static T shift_right_jam(T n, int amt)
+{
+ int width = 8 * sizeof(T);
+ T shifted = amt >= width ? 0 : n >> amt;
+ T jam_mask = amt >= width ? T(-1) : (T(1) << amt) - 1;
+ bool jam = (n & jam_mask) != 0;
+ return shifted | jam;
+}
+
+/** Configuration description for dot product */
+class DotConfig {
+ public:
+ int n; // number of products
+ int guardBits; // number of guard bits
+ bool flushSub; // flush subnormal (input/output) to zero
+ DotConfig(int numProd, int numGuardBits) : n(numProd), guardBits(numGuardBits), flushSub(false) {}
+};
+
+const static int f32_exp_bits = 8;
+const static int f32_exp_bias = (1 << (f32_exp_bits - 1)) - 1;
+const static int f32_mant_bits = 23; // number of mantissa bits (excluding implicit one)
+const static int f32_exp_mask = (uint32_t(1) << f32_exp_bits) - 1;
+const static uint32_t f32_mant_mask = (uint32_t(1) << f32_mant_bits) - 1;
+
+/** Template for a floating-point format class */
+template <typename U, typename M, typename E> class FloatFormat {
+ virtual M mant() const = 0;
+ virtual M sig() const = 0;
+ virtual E exp() const = 0;
+
+ virtual bool subOrZero() const = 0;
+
+ virtual bool inf() const = 0;
+ virtual bool nan() const = 0;
+ virtual bool sigNan() const = 0;
+ virtual bool special() const = 0;
+
+public:
+ virtual ~FloatFormat() = default;
+};
+
+/** Template for an IEEE-754 floating-point format class */
+template <typename U, typename M, typename E, unsigned expWidth, unsigned mantWidth> class IEEEFloatFormat : FloatFormat<U, M, E> {
+public:
+ U n;
+ IEEEFloatFormat(U _n) : n(_n) {}
+ IEEEFloatFormat() {}
+
+ int bias = (1 << (expWidth - 1)) - 1;
+ int sigBits = mantWidth + 1;
+ int mant_bits = mantWidth;
+public:
+ /* raw exponent field */
+ E exp() const { return (n >> mantWidth) & ((1 << expWidth) - 1); }
+
+ /* raw exponent field with correction for subnormal */
+ E expSubFixed() const { return exp() + subOrZero(); }
+
+ /** number sign */
+ bool sign() const { return n >> (expWidth + mantWidth); }
+
+ /** bit mask for mantissa */
+ M mantMask() const { return (1 << mantWidth) - 1; }
+
+ /** Number mantissa */
+ M mant() const { return n & mantMask(); }
+
+ /** Number significand */
+ M sig() const { return mant() ^ (!subOrZero() << mantWidth);}
+
+ /** bit mask for exponent */
+ E expMask() const { return (1 << expWidth) - 1; }
+
+ /* predicate: is the value a subnormal number or a zero */
+ bool subOrZero() const { return exp() == 0; }
+
+ /** predicate: is the value a special value (infinity or NaN) */
+ virtual bool special() const { return exp() == expMask(); }
+
+ /** predicate: is the value an infinity */
+ virtual bool inf() const { return special() && mant() == 0; }
+
+ /** predicate: is the value a NaN (Not A Number) */
+ virtual bool nan() const { return special() && mant() != 0; }
+
+ virtual bool sigNan() const { return nan() && !inf() && ( ( mant() >> (mantWidth - 1)) == 0); }
+
+ bool isZero() const { return exp() == 0 && mant() == 0; }
+};
+
+class bf16_t final : public IEEEFloatFormat<uint16_t, uint8_t, uint8_t, 8, 7> {
+ public:
+ operator uint16_t() const { return n; }
+
+ bf16_t() {}
+ bf16_t(uint16_t _n) : IEEEFloatFormat(_n) {}
+
+ bf16_t flushed() const
+ {
+ if (exp() == 0)
+ return bf16_t(uint16_t(sign() << 15));
+ return *this;
+ }
+};
+
+/** OpenCompute 8-bit Floating-point E5M2 (5-bit exponent, 2-bit mantissa) */
+class ofp8_e5m2 final : public IEEEFloatFormat<uint8_t, uint8_t, uint8_t, 5, 2> {
+ public:
+ operator uint8_t() const { return n; }
+ ofp8_e5m2() {}
+ ofp8_e5m2(uint8_t _n) : IEEEFloatFormat(_n) {}
+
+ // OFP8 does not have signaling NaNs
+ bool sigNan() const { return false; }
+
+ ofp8_e5m2 flushed() const
+ {
+ if (exp() == 0)
+ return ofp8_e5m2(uint8_t(sign() << 7));
+ return *this;
+ }
+};
+
+/** OpenCompute 8-bit Floating-point E4M3 (4-bit exponent, 3-bit mantissa) */
+class ofp8_e4m3 final : public IEEEFloatFormat<uint8_t, uint8_t, uint8_t, 4, 3> {
+ public:
+ operator uint8_t() const { return n; }
+ ofp8_e4m3() {}
+ ofp8_e4m3(uint8_t _n) : IEEEFloatFormat(_n) {}
+
+ // E4M3 does not have infinities
+ bool inf() const { return false; }
+
+ bool nan() const { return exp() == expMask() && mant() == mantMask(); }
+
+ bool special() const { return nan(); }
+
+ // OFP8 does not have signaling NaNs
+ bool sigNan() const { return false; }
+
+ ofp8_e4m3 flushed() const
+ {
+ if (exp() == 0)
+ return ofp8_e4m3(uint8_t(sign() << 7));
+ return *this;
+ }
+};
+
+/** bulk-normalization dot product (without accumulation) with binary32 result
+ *
+ * The actual products of significands is provided as an argument such that the model can be used
+ * to match against RTL implementations with external product implementation.
+ *
+ * @param cfg dot-product configuration
+ * @param a left-hand-side operand array
+ * @param b right-hand-side operand array
+ * @param prod_signs array of products of significands
+ *
+ */
+template<typename ValueTypeLHS, typename ValueTypeRHS, typename SigProdType> bulk_norm_out_t bulk_norm_dot_no_mult(const DotConfig cfg, const ValueTypeLHS* a, const ValueTypeRHS* b, const SigProdType* prod_sigs)
+{
+ std::vector<int> approx_prod_exp(cfg.n);
+ std::vector<int> flushed_prods(cfg.n);
+
+ bool any_pos_inf = false;
+ bool any_neg_inf = false;
+ bool any_nan = false;
+ bool any_invalid_nan = false;
+ bool any_sigNan = false;
+
+ // extracting format parameters from the first element in each input arrays
+ int lhs_bias = a[0].bias;
+ int rhs_bias = b[0].bias;
+
+ int lhs_mant_bits = a[0].mant_bits;
+ int rhs_mant_bits = b[0].mant_bits;
+
+ for (int i = 0; i < cfg.n; i++) {
+ flushed_prods[i] = (cfg.flushSub && (a[i].subOrZero() || b[i].subOrZero()));
+ approx_prod_exp[i] = flushed_prods[i] ? 0 : // flush input subnormals
+ a[i].isZero() || b[i].isZero() ? (f32_exp_bias - (lhs_bias + rhs_bias)) : // minimalize exp of zero product
+ a[i].expSubFixed() + b[i].expSubFixed() + (f32_exp_bias - (lhs_bias + rhs_bias));
+
+ bool either_inf = a[i].inf() || b[i].inf();
+ any_pos_inf |= either_inf && a[i].sign() == b[i].sign();
+ any_neg_inf |= either_inf && a[i].sign() != b[i].sign();
+
+ any_invalid_nan |=
+ (a[i].inf() && ((b[i].subOrZero() && cfg.flushSub) || b[i].isZero())) ||
+ (b[i].inf() && ((a[i].subOrZero() && cfg.flushSub) || a[i].isZero()));
+
+ any_nan |= any_invalid_nan || a[i].nan() || b[i].nan();
+
+ any_sigNan |= a[i].sigNan() || b[i].sigNan();
+ }
+
+ // find largest exponent
+ int max_approx_prod_exp = approx_prod_exp[0];
+ for (int i = 1; i < cfg.n; i++) {
+ max_approx_prod_exp = std::max(max_approx_prod_exp, approx_prod_exp[i]);
+ }
+
+ bool acc_sign = false; // assuming the accumulator is positive
+
+ int64_t acc = 0;
+
+ // compute products, normalize to largest exponent, accumulate
+ for (int i = 0; i < cfg.n; i++) {
+ int prod_sign = a[i].sign() ^ b[i].sign();
+ uint64_t prod_sig = uint64_t(prod_sigs[i]); // 16 to 64-bit zero extension
+ // align the product so the width of its fractional part is: f32_mant_bits(23) + guardBits
+ prod_sig <<= f32_mant_bits - lhs_mant_bits - rhs_mant_bits + cfg.guardBits;
+
+ int shiftAmt = max_approx_prod_exp - approx_prod_exp[i];
+ uint64_t shifted_sig = shift_right_jam(prod_sig, shiftAmt);
+ acc += flushed_prods[i]? 0 : // flush input subnormals
+ (prod_sign != acc_sign ? -shifted_sig : shifted_sig);
+ }
+
+ // normalize result to f32
+ bool sign = (acc < 0) != acc_sign;
+ uint64_t mag = acc < 0 ? -acc : acc; // absolute magnitude
+ int norm_dist = int_log2(mag);
+ int exp = max_approx_prod_exp - f32_mant_bits - cfg.guardBits + norm_dist;
+
+ // fixing normalization distance for subnormal results
+ int sig_bits = (!cfg.flushSub && exp <= 0) ? f32_mant_bits - (1-exp) : f32_mant_bits;
+ sig_bits = std::max(sig_bits, 0);
+ uint32_t rounded_sig = shift_right_jam(uint64_t(mag) << sig_bits, norm_dist);
+
+ bool any_inf = any_pos_inf || any_neg_inf;
+ bool overflow = (exp >= f32_exp_mask && mag != 0) || any_inf;
+ bool op_sign_inf = (any_pos_inf && any_neg_inf);
+ bool nan_out = any_nan || op_sign_inf;
+ bool overflowflag = (exp >= f32_exp_mask && mag != 0) && !any_inf && !nan_out;
+
+ if (nan_out) {
+ sign = 0;
+ exp = f32_exp_mask;
+ rounded_sig = uint32_t(1) << (f32_mant_bits - 1);
+ } else if (overflow) {
+ exp = f32_exp_mask;
+ rounded_sig = 0;
+ if (any_inf)
+ sign = any_neg_inf;
+ } else if (mag == 0) {
+ // exact zero result
+ exp = 0;
+ } else if (exp <= 0) {
+ if (cfg.flushSub) {
+ // flush output subnormals
+ exp = 0;
+ rounded_sig = 0;
+ } else {
+ exp = 0;
+ // rounded_sig should have been properly denormalized previously
+ }
+ }
+
+ bulk_norm_out_t su;
+ su.flags = 0;
+ su.out = (rounded_sig & f32_mant_mask)
+ | (exp << f32_mant_bits)
+ | (uint32_t(sign) << (f32_exp_bits + f32_mant_bits));
+
+ if (any_sigNan) {
+ su.flags |= softfloat_flag_invalid;
+ }
+ if (any_invalid_nan || op_sign_inf) {
+ su.flags |= softfloat_flag_invalid;
+ }
+ if (overflowflag) {
+ su.flags |= softfloat_flag_overflow;
+ }
+
+ return su;
+}
+
+/** bf16_t dot product (without accumulation) */
+static inline bulk_norm_out_t bulk_norm_dot_bf16(const DotConfig cfg, const bf16_t* a, const bf16_t* b)
+{
+ // product are extracted so that the no-mult version can be more easily matched against the RTL implementation
+ std::vector<uint16_t> prod_sigs(cfg.n);
+
+ // compute products, normalize to largest exponent, accumulate
+ for (int i = 0; i < cfg.n; i++) {
+ prod_sigs[i] = a[i].sig() * (uint16_t) b[i].sig();
+ }
+
+ return bulk_norm_dot_no_mult<bf16_t, bf16_t, uint16_t>(cfg, a, b, &prod_sigs[0]);
+}
+
+template <typename L, typename R>
+bulk_norm_out_t bulk_norm_dot_ofp8(const DotConfig cfg, const L* a, const R* b)
+{
+ // products are extracted so that the no-mult version can be more easily matched against the RTL implementation
+ std::vector<uint16_t> prod_sigs(cfg.n);
+
+ // compute products, normalize to largest exponent, accumulate
+ for (int i = 0; i < cfg.n; i++) {
+ prod_sigs[i] = a[i].sig() * (uint16_t) b[i].sig();
+ }
+ return bulk_norm_dot_no_mult<L, R, uint16_t>(cfg, a, b, &prod_sigs[0]);
+}
+
+#endif
diff --git a/riscv/cfg.cc b/riscv/cfg.cc
index 2f9a229..cc39a54 100644
--- a/riscv/cfg.cc
+++ b/riscv/cfg.cc
@@ -47,4 +47,5 @@ cfg_t::cfg_t()
explicit_hartids = false;
real_time_clint = false;
trigger_count = 4;
+ cache_blocksz = 64;
}
diff --git a/riscv/cfg.h b/riscv/cfg.h
index 388030b..8032856 100644
--- a/riscv/cfg.h
+++ b/riscv/cfg.h
@@ -78,6 +78,7 @@ public:
bool explicit_hartids;
bool real_time_clint;
reg_t trigger_count;
+ reg_t cache_blocksz;
std::optional<abstract_sim_if_t*> external_simulator;
size_t nprocs() const { return hartids.size(); }
diff --git a/riscv/clint.cc b/riscv/clint.cc
index 3d5c984..e16ebdd 100644
--- a/riscv/clint.cc
+++ b/riscv/clint.cc
@@ -145,4 +145,4 @@ std::string clint_generate_dts(const sim_t* sim, const std::vector<std::string>&
return s.str();
}
-REGISTER_DEVICE(clint, clint_parse_from_fdt, clint_generate_dts)
+REGISTER_BUILTIN_DEVICE(clint, clint_parse_from_fdt, clint_generate_dts)
diff --git a/riscv/common.h b/riscv/common.h
index a354ced..b55657b 100644
--- a/riscv/common.h
+++ b/riscv/common.h
@@ -19,4 +19,16 @@
# define UNUSED
#endif
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+#if __has_cpp_attribute(assume)
+# define assume(x) [[assume(x)]]
+#elif __has_builtin(__builtin_assume)
+# define assume(x) __builtin_assume(x)
+#else
+# define assume(x) ((void) 0)
+#endif
+
#endif
diff --git a/riscv/csr_init.cc b/riscv/csr_init.cc
index cabb7c2..4a05a9c 100644
--- a/riscv/csr_init.cc
+++ b/riscv/csr_init.cc
@@ -12,6 +12,24 @@ void state_t::add_csr(reg_t addr, const csr_t_p& csr)
#define add_supervisor_csr(addr, csr) add_const_ext_csr('S', addr, csr)
#define add_hypervisor_csr(addr, csr) add_ext_csr('H', addr, csr)
+void state_t::add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg)
+{
+ // This assumes xlen is always max_xlen, which is true today (see
+ // mstatus_csr_t::unlogged_write()):
+ auto xlen = proc->get_isa().get_max_xlen();
+
+ const reg_t iprio0_addr = 0x30;
+ for (int i=0; i<16; i+=2) {
+ csr_t_p iprio = std::make_shared<aia_csr_t>(proc, iprio0_addr + i, 0, 0);
+ if (xlen == 32) {
+ ireg->add_ireg_proxy(iprio0_addr + i, std::make_shared<rv32_low_csr_t>(proc, iprio0_addr + i, iprio));
+ ireg->add_ireg_proxy(iprio0_addr + i + 1, std::make_shared<rv32_high_csr_t>(proc, iprio0_addr + i + 1, iprio));
+ } else {
+ ireg->add_ireg_proxy(iprio0_addr + i, iprio);
+ }
+ }
+}
+
void state_t::csr_init(processor_t* const proc, reg_t max_isa)
{
// This assumes xlen is always max_xlen, which is true today (see
@@ -87,8 +105,17 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
}
}
add_const_ext_csr(EXT_SSCOFPMF, CSR_SCOUNTOVF, std::make_shared<scountovf_csr_t>(proc, CSR_SCOUNTOVF));
- add_csr(CSR_MIE, mie = std::make_shared<mie_csr_t>(proc, CSR_MIE));
- add_csr(CSR_MIP, mip = std::make_shared<mip_csr_t>(proc, CSR_MIP));
+ mie = std::make_shared<mie_csr_t>(proc, CSR_MIE);
+ mip = std::make_shared<mip_csr_t>(proc, CSR_MIP);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) {
+ add_csr(CSR_MIE, std::make_shared<rv32_low_csr_t>(proc, CSR_MIE, mie));
+ add_csr(CSR_MIEH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIEH, mie));
+ add_csr(CSR_MIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MIP, mip));
+ add_csr(CSR_MIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIPH, mip));
+ } else {
+ add_csr(CSR_MIE, mie);
+ add_csr(CSR_MIP, mip);
+ }
auto sip_sie_accr = std::make_shared<generic_int_accessor_t>(
this,
~MIP_HS_MASK, // read_mask
@@ -116,21 +143,49 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
1 // shiftamt
);
- auto nonvirtual_sip = std::make_shared<mip_proxy_csr_t>(proc, CSR_SIP, sip_sie_accr);
+ nonvirtual_sip = std::make_shared<sip_csr_t>(proc, CSR_SIP, sip_sie_accr);
auto vsip = std::make_shared<mip_proxy_csr_t>(proc, CSR_VSIP, vsip_vsie_accr);
- add_hypervisor_csr(CSR_VSIP, vsip);
- add_supervisor_csr(CSR_SIP, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip));
+ auto sip = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+ add_hypervisor_csr(CSR_VSIP, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIP, vsip));
+ add_hypervisor_csr(CSR_VSIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIPH, vsip));
+ add_supervisor_csr(CSR_SIP, std::make_shared<rv32_low_csr_t>(proc, CSR_SIP, sip));
+ add_supervisor_csr(CSR_SIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIPH, sip));
+ } else {
+ add_hypervisor_csr(CSR_VSIP, vsip);
+ add_supervisor_csr(CSR_SIP, sip);
+ }
add_hypervisor_csr(CSR_HIP, std::make_shared<mip_proxy_csr_t>(proc, CSR_HIP, hip_hie_accr));
- add_hypervisor_csr(CSR_HVIP, hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0));
+ hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+ add_hypervisor_csr(CSR_HVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIP, hvip));
+ add_hypervisor_csr(CSR_HVIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HVIPH, hvip));
+ } else {
+ add_hypervisor_csr(CSR_HVIP, hvip);
+ }
- auto nonvirtual_sie = std::make_shared<mie_proxy_csr_t>(proc, CSR_SIE, sip_sie_accr);
+ nonvirtual_sie = std::make_shared<sie_csr_t>(proc, CSR_SIE, sip_sie_accr);
auto vsie = std::make_shared<mie_proxy_csr_t>(proc, CSR_VSIE, vsip_vsie_accr);
- add_hypervisor_csr(CSR_VSIE, vsie);
- add_supervisor_csr(CSR_SIE, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie));
+ auto sie = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+ add_hypervisor_csr(CSR_VSIE, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIE, vsie));
+ add_hypervisor_csr(CSR_VSIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIEH, vsie));
+ add_supervisor_csr(CSR_SIE, std::make_shared<rv32_low_csr_t>(proc, CSR_SIE, sie));
+ add_supervisor_csr(CSR_SIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIEH, sie));
+ } else {
+ add_hypervisor_csr(CSR_VSIE, vsie);
+ add_supervisor_csr(CSR_SIE, sie);
+ }
add_hypervisor_csr(CSR_HIE, std::make_shared<mie_proxy_csr_t>(proc, CSR_HIE, hip_hie_accr));
add_supervisor_csr(CSR_MEDELEG, medeleg = std::make_shared<medeleg_csr_t>(proc, CSR_MEDELEG));
- add_supervisor_csr(CSR_MIDELEG, mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG));
+ mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) {
+ add_supervisor_csr(CSR_MIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_MIDELEG, mideleg));
+ add_supervisor_csr(CSR_MIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_MIDELEGH, mideleg));
+ } else {
+ add_supervisor_csr(CSR_MIDELEG, mideleg);
+ }
const reg_t counteren_mask = (proc->extension_enabled_const(EXT_ZICNTR) ? 0x7UL : 0x0) | (proc->extension_enabled_const(EXT_ZIHPM) ? 0xfffffff8ULL : 0x0);
add_user_csr(CSR_MCOUNTEREN, mcounteren = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTEREN, counteren_mask, 0));
add_csr(CSR_MCOUNTINHIBIT, mcountinhibit = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTINHIBIT, counteren_mask & (~MCOUNTEREN_TIME), 0));
@@ -162,7 +217,32 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
add_hypervisor_csr(CSR_HSTATUS, hstatus = std::make_shared<hstatus_csr_t>(proc, CSR_HSTATUS));
add_hypervisor_csr(CSR_HGEIE, std::make_shared<const_csr_t>(proc, CSR_HGEIE, 0));
add_hypervisor_csr(CSR_HGEIP, std::make_shared<const_csr_t>(proc, CSR_HGEIP, 0));
- add_hypervisor_csr(CSR_HIDELEG, hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg));
+ hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg);
+ if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+ add_hypervisor_csr(CSR_HIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_HIDELEG, hideleg));
+ add_hypervisor_csr(CSR_HIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HIDELEGH, hideleg));
+ } else {
+ add_hypervisor_csr(CSR_HIDELEG, hideleg);
+ }
+
+ const reg_t menvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? MENVCFG_CBCFE | MENVCFG_CBIE : 0) |
+ (proc->extension_enabled(EXT_ZICBOZ) ? MENVCFG_CBZE : 0) |
+ (proc->extension_enabled(EXT_SMNPM) ? MENVCFG_PMM : 0) |
+ (proc->extension_enabled(EXT_SVADU) ? MENVCFG_ADUE: 0) |
+ (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) |
+ (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0) |
+ (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) |
+ (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) |
+ (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)|
+ (proc->extension_enabled(EXT_SMCDELEG) ? MENVCFG_CDE : 0);
+ menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0);
+ if (xlen == 32) {
+ add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg));
+ add_user_csr(CSR_MENVCFGH, std::make_shared<rv32_high_csr_t>(proc, CSR_MENVCFGH, menvcfg));
+ } else {
+ add_user_csr(CSR_MENVCFG, menvcfg);
+ }
+
const reg_t hedeleg_mask =
(1 << CAUSE_MISALIGNED_FETCH) |
(1 << CAUSE_FETCH_ACCESS) |
@@ -220,7 +300,14 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
auto hcontext = std::make_shared<masked_csr_t>(proc, CSR_HCONTEXT, (reg_t(1) << hcontext_length) - 1, 0);
add_hypervisor_csr(CSR_HCONTEXT, hcontext);
add_csr(CSR_MCONTEXT, mcontext = std::make_shared<proxy_csr_t>(proc, CSR_MCONTEXT, hcontext));
- add_csr(CSR_MSECCFG, mseccfg = std::make_shared<mseccfg_csr_t>(proc, CSR_MSECCFG));
+
+ mseccfg = std::make_shared<mseccfg_csr_t>(proc, CSR_MSECCFG);
+ if (xlen == 32) {
+ add_csr(CSR_MSECCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MSECCFG, mseccfg));
+ add_csr(CSR_MSECCFGH, mseccfgh = std::make_shared<rv32_high_csr_t>(proc, CSR_MSECCFGH, mseccfg));
+ } else {
+ add_csr(CSR_MSECCFG, mseccfg);
+ }
for (int i = 0; i < max_pmp; ++i) {
add_csr(CSR_PMPADDR0 + i, pmpaddr[i] = std::make_shared<pmpaddr_csr_t>(proc, CSR_PMPADDR0 + i));
@@ -242,25 +329,9 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
add_csr(CSR_MVENDORID, std::make_shared<const_csr_t>(proc, CSR_MVENDORID, 0));
add_csr(CSR_MHARTID, std::make_shared<const_csr_t>(proc, CSR_MHARTID, proc->get_id()));
add_csr(CSR_MCONFIGPTR, std::make_shared<const_csr_t>(proc, CSR_MCONFIGPTR, 0));
- const reg_t menvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? MENVCFG_CBCFE | MENVCFG_CBIE : 0) |
- (proc->extension_enabled(EXT_ZICBOZ) ? MENVCFG_CBZE : 0) |
- (proc->extension_enabled(EXT_SMNPM) ? MENVCFG_PMM : 0) |
- (proc->extension_enabled(EXT_SVADU) ? MENVCFG_ADUE: 0) |
- (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) |
- (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0) |
- (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) |
- (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) |
- (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)|
- (proc->extension_enabled(EXT_SMCSRIND) ? MENVCFG_CDE : 0);
- menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0);
- if (xlen == 32) {
- add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg));
- add_user_csr(CSR_MENVCFGH, std::make_shared<rv32_high_csr_t>(proc, CSR_MENVCFGH, menvcfg));
- } else {
- add_user_csr(CSR_MENVCFG, menvcfg);
- }
const reg_t senvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? SENVCFG_CBCFE | SENVCFG_CBIE : 0) |
(proc->extension_enabled(EXT_ZICBOZ) ? SENVCFG_CBZE : 0) |
+ (proc->extension_enabled(EXT_SVUKTE) ? SENVCFG_UKTE : 0) |
(proc->extension_enabled(EXT_SSNPM) ? SENVCFG_PMM : 0) |
(proc->extension_enabled(EXT_ZICFILP) ? SENVCFG_LPE : 0) |
(proc->extension_enabled(EXT_ZICFISS) ? SENVCFG_SSE : 0);
@@ -285,7 +356,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
const reg_t sstateen0_mask = (proc->extension_enabled(EXT_ZFINX) ? SSTATEEN0_FCSR : 0) |
(proc->extension_enabled(EXT_ZCMT) ? SSTATEEN0_JVT : 0) |
SSTATEEN0_CS;
- const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN;
+ const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_CSRIND | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN;
const reg_t mstateen0_mask = hstateen0_mask | (proc->extension_enabled(EXT_SSQOSID) ? MSTATEEN0_PRIV114 : 0);
for (int i = 0; i < 4; i++) {
const reg_t mstateen_mask = i == 0 ? mstateen0_mask : MSTATEEN_HSTATEEN;
@@ -321,7 +392,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
if (proc->extension_enabled_const(EXT_SSTC)) {
stimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_STIMECMP, MIP_STIP);
vstimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_VSTIMECMP, MIP_VSTIP);
- auto virtualized_stimecmp = std::make_shared<virtualized_stimecmp_csr_t>(proc, stimecmp, vstimecmp);
+ auto virtualized_stimecmp = std::make_shared<virtualized_with_special_permission_csr_t>(proc, stimecmp, vstimecmp);
if (xlen == 32) {
add_supervisor_csr(CSR_STIMECMP, std::make_shared<rv32_low_csr_t>(proc, CSR_STIMECMP, virtualized_stimecmp));
add_supervisor_csr(CSR_STIMECMPH, std::make_shared<rv32_high_csr_t>(proc, CSR_STIMECMPH, virtualized_stimecmp));
@@ -348,20 +419,41 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
csr_t_p miselect = std::make_shared<basic_csr_t>(proc, CSR_MISELECT, 0);
add_csr(CSR_MISELECT, miselect);
- const reg_t mireg_csrs[] = { CSR_MIREG, CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 };
+ sscsrind_reg_csr_t::sscsrind_reg_csr_t_p mireg;
+ add_csr(CSR_MIREG, mireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_MIREG, miselect));
+ add_ireg_proxy(proc, mireg);
+ const reg_t mireg_csrs[] = { CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 };
for (auto csr : mireg_csrs)
add_csr(csr, std::make_shared<sscsrind_reg_csr_t>(proc, csr, miselect));
}
if (proc->extension_enabled_const(EXT_SSCSRIND)) {
- csr_t_p vsiselect = std::make_shared<basic_csr_t>(proc, CSR_VSISELECT, 0);
+ csr_t_p vsiselect = std::make_shared<siselect_csr_t>(proc, CSR_VSISELECT, 0);
add_hypervisor_csr(CSR_VSISELECT, vsiselect);
- csr_t_p siselect = std::make_shared<basic_csr_t>(proc, CSR_SISELECT, 0);
- add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_csr_t>(proc, siselect, vsiselect));
+ csr_t_p siselect = std::make_shared<siselect_csr_t>(proc, CSR_SISELECT, 0);
+ add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_with_special_permission_csr_t>(proc, siselect, vsiselect));
+
+ auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_VSIREG, vsiselect);
+ add_hypervisor_csr(CSR_VSIREG, vsireg);
- const reg_t vsireg_csrs[] = { CSR_VSIREG, CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 };
- const reg_t sireg_csrs[] = { CSR_SIREG, CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 };
+ auto sireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_SIREG, siselect);
+ add_ireg_proxy(proc, sireg);
+ add_supervisor_csr(CSR_SIREG, std::make_shared<virtualized_indirect_csr_t>(proc, sireg, vsireg));
+ if (proc->extension_enabled(EXT_SSCCFG) || proc->extension_enabled(EXT_SMCDELEG)) {
+ // case CSR_SIREG
+ if (proc->extension_enabled_const(EXT_ZICNTR)) {
+ sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle);
+ sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret);
+ }
+ if (proc->extension_enabled_const(EXT_ZIHPM)) {
+ for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++)
+ sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]);
+ }
+ }
+
+ const reg_t vsireg_csrs[] = { CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 };
+ const reg_t sireg_csrs[] = { CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 };
for (size_t i = 0; i < std::size(vsireg_csrs); i++) {
auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, vsireg_csrs[i], vsiselect);
add_hypervisor_csr(vsireg_csrs[i], vsireg);
@@ -372,16 +464,6 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
// Smcdeleg
if (proc->extension_enabled(EXT_SSCCFG) || proc->extension_enabled(EXT_SMCDELEG)) {
switch (sireg_csrs[i]) {
- case CSR_SIREG:
- if (proc->extension_enabled_const(EXT_ZICNTR)) {
- sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle);
- sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret);
- }
- if (proc->extension_enabled_const(EXT_ZIHPM)) {
- for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++)
- sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]);
- }
- break;
case CSR_SIREG4:
if (xlen == 32) {
if (proc->extension_enabled_const(EXT_ZICNTR)) {
@@ -438,4 +520,44 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
const reg_t srmcfg_mask = SRMCFG_MCID | SRMCFG_RCID;
add_const_ext_csr(EXT_SSQOSID, CSR_SRMCFG, std::make_shared<srmcfg_csr_t>(proc, CSR_SRMCFG, srmcfg_mask, 0));
+
+ mvien = std::make_shared<masked_csr_t>(proc, CSR_MVIEN, MIP_SEIP | MIP_SSIP, 0);
+ mvip = std::make_shared<mvip_csr_t>(proc, CSR_MVIP, 0);
+ if (proc->extension_enabled_const(EXT_SMAIA)) {
+ add_csr(CSR_MTOPI, std::make_shared<mtopi_csr_t>(proc, CSR_MTOPI));
+ if (xlen == 32) {
+ add_supervisor_csr(CSR_MVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIEN, mvien));
+ add_supervisor_csr(CSR_MVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIENH, mvien));
+ add_supervisor_csr(CSR_MVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIP, mvip));
+ add_supervisor_csr(CSR_MVIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIPH, mvip));
+ } else {
+ add_supervisor_csr(CSR_MVIEN, mvien);
+ add_supervisor_csr(CSR_MVIP, mvip);
+ }
+ }
+
+ hvictl = std::make_shared<aia_csr_t>(proc, CSR_HVICTL, HVICTL_VTI | HVICTL_IID | HVICTL_DPR | HVICTL_IPRIOM | HVICTL_IPRIO, 0);
+ vstopi = std::make_shared<vstopi_csr_t>(proc, CSR_VSTOPI);
+ if (proc->extension_enabled_const(EXT_SSAIA)) { // Included by EXT_SMAIA
+ csr_t_p nonvirtual_stopi = std::make_shared<nonvirtual_stopi_csr_t>(proc, CSR_STOPI);
+ add_supervisor_csr(CSR_STOPI, std::make_shared<virtualized_with_special_permission_csr_t>(proc, nonvirtual_stopi, vstopi));
+ add_supervisor_csr(CSR_STOPEI, std::make_shared<inaccessible_csr_t>(proc, CSR_STOPEI));
+ auto hvien = std::make_shared<aia_csr_t>(proc, CSR_HVIEN, 0, 0);
+ auto hviprio1 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO1, 0, 0);
+ auto hviprio2 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO2, 0, 0);
+ if (xlen == 32) {
+ add_hypervisor_csr(CSR_HVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIEN, hvien));
+ add_hypervisor_csr(CSR_HVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIENH, hvien));
+ add_hypervisor_csr(CSR_HVIPRIO1, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO1, hviprio1));
+ add_hypervisor_csr(CSR_HVIPRIO1H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO1H, hviprio1));
+ add_hypervisor_csr(CSR_HVIPRIO2, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO2, hviprio2));
+ add_hypervisor_csr(CSR_HVIPRIO2H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO2H, hviprio2));
+ } else {
+ add_hypervisor_csr(CSR_HVIEN, hvien);
+ add_hypervisor_csr(CSR_HVIPRIO1, hviprio1);
+ add_hypervisor_csr(CSR_HVIPRIO2, hviprio2);
+ }
+ add_hypervisor_csr(CSR_HVICTL, hvictl);
+ add_hypervisor_csr(CSR_VSTOPI, vstopi);
+ }
}
diff --git a/riscv/csrs.cc b/riscv/csrs.cc
index 1873f7e..914662a 100644
--- a/riscv/csrs.cc
+++ b/riscv/csrs.cc
@@ -15,6 +15,8 @@
#include "insn_macros.h"
// For CSR_DCSR_V:
#include "debug_defines.h"
+// For ctz:
+#include "arith.h"
// STATE macro used by require_privilege() macro:
#undef STATE
@@ -119,7 +121,7 @@ bool pmpaddr_csr_t::unlogged_write(const reg_t val) noexcept {
const bool locked = !lock_bypass && (cfg & PMP_L);
if (pmpidx < proc->n_pmp && !locked && !next_locked_and_tor()) {
- this->val = val & ((reg_t(1) << (MAX_PADDR_BITS - PMP_SHIFT)) - 1);
+ this->val = val & ((reg_t(1) << (proc->paddr_bits() - PMP_SHIFT)) - 1);
}
else
return false;
@@ -247,7 +249,10 @@ bool pmpcfg_csr_t::unlogged_write(const reg_t val) noexcept {
if (i < proc->n_pmp) {
const bool locked = (state->pmpaddr[i]->cfg & PMP_L);
if (rlb || !locked) {
- uint8_t cfg = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L);
+ uint8_t all_cfg_fields = (PMP_R | PMP_W | PMP_X | PMP_A |
+ (proc->extension_enabled(EXT_SMPMPMT) ? PMP_MT : 0) |
+ PMP_L);
+ uint8_t cfg = (val >> (8 * (i - i0))) & all_cfg_fields;
// Drop R=0 W=1 when MML = 0
// Remove the restriction when MML = 1
if (!mml) {
@@ -256,6 +261,9 @@ bool pmpcfg_csr_t::unlogged_write(const reg_t val) noexcept {
// Disallow A=NA4 when granularity > 4
if (proc->lg_pmp_granularity != PMP_SHIFT && (cfg & PMP_A) == PMP_NA4)
cfg |= PMP_NAPOT;
+ // MT value 0x3 is reserved
+ if (get_field(cfg, PMP_MT) == 0x3)
+ cfg = set_field(cfg, PMP_MT, 0);
/*
* Adding a rule with executable privileges that either is M-mode-only or a locked Shared-Region
* is not possible and such pmpcfg writes are ignored, leaving pmpcfg unchanged.
@@ -313,31 +321,31 @@ bool mseccfg_csr_t::get_sseed() const noexcept {
}
bool mseccfg_csr_t::unlogged_write(const reg_t val) noexcept {
- if (proc->n_pmp == 0)
- return false;
-
- // pmpcfg.L is 1 in any rule or entry (including disabled entries)
- const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp,
- [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } );
reg_t new_val = read();
- // When RLB is 0 and pmplock_recorded, RLB is locked to 0.
- // Otherwise set the RLB bit according val
- if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) {
- new_val &= ~MSECCFG_RLB;
- new_val |= (val & MSECCFG_RLB);
- }
+ if (proc->n_pmp != 0) {
+ // pmpcfg.L is 1 in any rule or entry (including disabled entries)
+ const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp,
+ [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } );
+
+ // When RLB is 0 and pmplock_recorded, RLB is locked to 0.
+ // Otherwise set the RLB bit according val
+ if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) {
+ new_val &= ~MSECCFG_RLB;
+ new_val |= (val & MSECCFG_RLB);
+ }
- new_val |= (val & MSECCFG_MMWP); //MMWP is sticky
- new_val |= (val & MSECCFG_MML); //MML is sticky
+ new_val |= (val & MSECCFG_MMWP); //MMWP is sticky
+ new_val |= (val & MSECCFG_MML); //MML is sticky
+
+ proc->get_mmu()->flush_tlb();
+ }
if (proc->extension_enabled(EXT_ZKR)) {
uint64_t mask = MSECCFG_USEED | MSECCFG_SSEED;
new_val = (new_val & ~mask) | (val & mask);
}
- proc->get_mmu()->flush_tlb();
-
if (proc->extension_enabled(EXT_ZICFILP)) {
new_val &= ~MSECCFG_MLPE;
new_val |= (val & MSECCFG_MLPE);
@@ -423,7 +431,7 @@ reg_t cause_csr_t::read() const noexcept {
// implement class base_status_csr_t
base_status_csr_t::base_status_csr_t(processor_t* const proc, const reg_t addr):
csr_t(proc, addr),
- has_page(proc->extension_enabled_const('S') && proc->supports_impl(IMPL_MMU)),
+ has_page(proc->extension_enabled_const('S') && proc->has_mmu()),
sstatus_write_mask(compute_sstatus_write_mask()),
sstatus_read_mask(sstatus_write_mask | SSTATUS_UBE | SSTATUS_UXL
| (proc->get_const_xlen() == 32 ? SSTATUS32_SD : SSTATUS64_SD)) {
@@ -441,7 +449,7 @@ reg_t base_status_csr_t::compute_sstatus_write_mask() const noexcept {
| (has_fs ? SSTATUS_FS : 0)
| (proc->any_custom_extensions() ? SSTATUS_XS : 0)
| (has_vs ? SSTATUS_VS : 0)
- | (proc->extension_enabled(EXT_ZICFILP) ? SSTATUS_SPELP : 0)
+ | (proc->extension_enabled('S') && proc->extension_enabled(EXT_ZICFILP) ? SSTATUS_SPELP : 0)
| (proc->extension_enabled(EXT_SSDBLTRP) ? SSTATUS_SDT : 0)
;
}
@@ -536,11 +544,16 @@ mstatus_csr_t::mstatus_csr_t(processor_t* const proc, const reg_t addr):
val(compute_mstatus_initial_value()) {
}
+reg_t mstatus_csr_t::read() const noexcept {
+ return val & ~reg_t(state->menvcfg->read() & MENVCFG_DTE ? 0 : MSTATUS_SDT);
+}
+
bool mstatus_csr_t::unlogged_write(const reg_t val) noexcept {
const bool has_mpv = proc->extension_enabled('H');
const bool has_gva = has_mpv;
+ const reg_t adj_write_mask = sstatus_write_mask & ~reg_t(state->menvcfg->read() & MENVCFG_DTE ? 0 : SSTATUS_SDT);
- const reg_t mask = sstatus_write_mask
+ const reg_t mask = adj_write_mask
| MSTATUS_MIE | MSTATUS_MPIE
| (proc->extension_enabled('U') ? MSTATUS_MPRV : 0)
| MSTATUS_MPP | MSTATUS_TW
@@ -549,13 +562,12 @@ bool mstatus_csr_t::unlogged_write(const reg_t val) noexcept {
| (has_gva ? MSTATUS_GVA : 0)
| (has_mpv ? MSTATUS_MPV : 0)
| (proc->extension_enabled(EXT_SMDBLTRP) ? MSTATUS_MDT : 0)
- | (proc->extension_enabled(EXT_ZICFILP) ? (MSTATUS_SPELP | MSTATUS_MPELP) : 0)
- | (proc->extension_enabled(EXT_SSDBLTRP) ? SSTATUS_SDT : 0)
+ | (proc->extension_enabled(EXT_ZICFILP) ? (MSTATUS_MPELP | (proc->extension_enabled('S') ? MSTATUS_SPELP : 0)) : 0)
;
const reg_t requested_mpp = proc->legalize_privilege(get_field(val, MSTATUS_MPP));
const reg_t adjusted_val = set_field(val, MSTATUS_MPP, requested_mpp);
- reg_t new_mstatus = (read() & ~mask) | (adjusted_val & mask);
+ reg_t new_mstatus = (this->val & ~mask) | (adjusted_val & mask);
new_mstatus = (new_mstatus & MSTATUS_MDT) ? (new_mstatus & ~MSTATUS_MIE) : new_mstatus;
new_mstatus = (new_mstatus & MSTATUS_SDT) ? (new_mstatus & ~MSTATUS_SIE) : new_mstatus;
maybe_flush_tlb(new_mstatus);
@@ -639,6 +651,22 @@ reg_t rv32_high_csr_t::written_value() const noexcept {
return (orig->written_value() >> 32) & 0xffffffffU;
}
+aia_rv32_high_csr_t::aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig):
+ rv32_high_csr_t(proc, addr, orig) {
+}
+
+void aia_rv32_high_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+ throw trap_illegal_instruction(insn.bits());
+
+ if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+ throw trap_virtual_instruction(insn.bits());
+ }
+
+ rv32_high_csr_t::verify_permissions(insn, write);
+}
+
// implement class sstatus_csr_t
sstatus_csr_t::sstatus_csr_t(processor_t* const proc, sstatus_proxy_csr_t_p orig, vsstatus_csr_t_p virt):
virtualized_csr_t(proc, orig, virt),
@@ -708,17 +736,18 @@ bool misa_csr_t::unlogged_write(const reg_t val) noexcept {
const bool prev_h = old_misa & (1L << ('H' - 'A'));
const reg_t new_misa = (adjusted_val & write_mask) | (old_misa & ~write_mask);
const bool new_h = new_misa & (1L << ('H' - 'A'));
+ const bool new_v = proc->get_isa().has_any_vector();
proc->set_extension_enable(EXT_ZCA, (new_misa & (1L << ('C' - 'A'))) || !proc->get_isa().extension_enabled('C'));
- proc->set_extension_enable(EXT_ZCF, (new_misa & (1L << ('F' - 'A'))) && proc->extension_enabled(EXT_ZCA));
+ proc->set_extension_enable(EXT_ZCF, (new_misa & (1L << ('F' - 'A'))) && proc->extension_enabled(EXT_ZCA) && proc->get_xlen() == 32);
proc->set_extension_enable(EXT_ZCD, (new_misa & (1L << ('D' - 'A'))) && proc->extension_enabled(EXT_ZCA));
proc->set_extension_enable(EXT_ZCB, proc->extension_enabled(EXT_ZCA));
proc->set_extension_enable(EXT_ZCMP, proc->extension_enabled(EXT_ZCA));
proc->set_extension_enable(EXT_ZCMT, proc->extension_enabled(EXT_ZCA));
proc->set_extension_enable(EXT_ZFH, new_misa & (1L << ('F' - 'A')));
proc->set_extension_enable(EXT_ZFHMIN, new_misa & (1L << ('F' - 'A')));
- proc->set_extension_enable(EXT_ZVFH, (new_misa & (1L << ('V' - 'A'))) && proc->extension_enabled(EXT_ZFHMIN));
- proc->set_extension_enable(EXT_ZVFHMIN, new_misa & (1L << ('V' - 'A')));
+ proc->set_extension_enable(EXT_ZVFH, new_v && proc->get_isa().get_zvf() && proc->extension_enabled(EXT_ZFHMIN));
+ proc->set_extension_enable(EXT_ZVFHMIN, new_v && proc->get_isa().get_zvf());
proc->set_extension_enable(EXT_ZAAMO, (new_misa & (1L << ('A' - 'A'))) || !proc->get_isa().extension_enabled('A'));
proc->set_extension_enable(EXT_ZALRSC, (new_misa & (1L << ('A' - 'A'))) || !proc->get_isa().extension_enabled('A'));
proc->set_extension_enable(EXT_ZBA, (new_misa & (1L << ('B' - 'A'))) || !proc->get_isa().extension_enabled('B'));
@@ -749,6 +778,9 @@ bool misa_csr_t::unlogged_write(const reg_t val) noexcept {
}
}
+ proc->get_mmu()->flush_tlb();
+ proc->build_opcode_map();
+
return basic_csr_t::unlogged_write(new_misa);
}
@@ -781,8 +813,14 @@ mip_csr_t::mip_csr_t(processor_t* const proc, const reg_t addr):
mip_or_mie_csr_t(proc, addr) {
}
+void mip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept {
+ if (!(state->mvien->read() & MIP_SEIP) && (mask & MIP_SEIP))
+ state->mvip->write_with_mask(MIP_SEIP, val); // mvip.SEIP is an alias of mip.SEIP when mvien.SEIP=0
+ mip_or_mie_csr_t::write_with_mask(mask & ~MIP_SEIP, val);
+}
+
reg_t mip_csr_t::read() const noexcept {
- return val | state->hvip->basic_csr_t::read();
+ return val | state->hvip->basic_csr_t::read() | ((state->mvien->read() & MIP_SEIP) ? 0 : (state->mvip->basic_csr_t::read() & MIP_SEIP));
}
void mip_csr_t::backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept {
@@ -864,6 +902,15 @@ mip_proxy_csr_t::mip_proxy_csr_t(processor_t* const proc, const reg_t addr, gene
accr(accr) {
}
+void mip_proxy_csr_t::verify_permissions(insn_t insn, bool write) const {
+ csr_t::verify_permissions(insn, write);
+ if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+ if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) &&
+ proc->extension_enabled('S') && state->v)
+ throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sip when hvictl.VTI=1
+ }
+}
+
reg_t mip_proxy_csr_t::read() const noexcept {
return accr->ip_read();
}
@@ -879,6 +926,15 @@ mie_proxy_csr_t::mie_proxy_csr_t(processor_t* const proc, const reg_t addr, gene
accr(accr) {
}
+void mie_proxy_csr_t::verify_permissions(insn_t insn, bool write) const {
+ csr_t::verify_permissions(insn, write);
+ if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+ if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) &&
+ proc->extension_enabled('S') && state->v)
+ throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sie when hvictl.VTI=1
+ }
+}
+
reg_t mie_proxy_csr_t::read() const noexcept {
return accr->ie_read();
}
@@ -924,8 +980,11 @@ medeleg_csr_t::medeleg_csr_t(processor_t* const proc, const reg_t addr):
| (1 << CAUSE_FETCH_GUEST_PAGE_FAULT)
| (1 << CAUSE_LOAD_GUEST_PAGE_FAULT)
| (1 << CAUSE_VIRTUAL_INSTRUCTION)
- | (1 << CAUSE_STORE_GUEST_PAGE_FAULT)
- ) {
+ | (1 << CAUSE_STORE_GUEST_PAGE_FAULT)),
+ mmu_exceptions(0
+ | (1 << CAUSE_FETCH_PAGE_FAULT)
+ | (1 << CAUSE_LOAD_PAGE_FAULT)
+ | (1 << CAUSE_STORE_PAGE_FAULT)) {
}
void medeleg_csr_t::verify_permissions(insn_t insn, bool write) const {
@@ -946,9 +1005,7 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept {
| (1 << CAUSE_STORE_ACCESS)
| (1 << CAUSE_USER_ECALL)
| (1 << CAUSE_SUPERVISOR_ECALL)
- | (1 << CAUSE_FETCH_PAGE_FAULT)
- | (1 << CAUSE_LOAD_PAGE_FAULT)
- | (1 << CAUSE_STORE_PAGE_FAULT)
+ | (proc->has_mmu() ? mmu_exceptions : 0)
| (proc->extension_enabled('H') ? hypervisor_exceptions : 0)
| (1 << CAUSE_SOFTWARE_CHECK_FAULT)
| (1 << CAUSE_HARDWARE_ERROR_FAULT)
@@ -956,6 +1013,38 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept {
return basic_csr_t::unlogged_write((read() & ~mask) | (val & mask));
}
+sip_csr_t::sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr):
+ mip_proxy_csr_t(proc, addr, accr) {
+}
+
+reg_t sip_csr_t::read() const noexcept {
+ const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+ return (mip_proxy_csr_t::read() & ~mask) | (state->mvip->read() & mask);
+}
+
+bool sip_csr_t::unlogged_write(const reg_t val) noexcept {
+ const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+ state->mvip->write_with_mask(mask & accr->get_ip_write_mask(), val);
+ return mip_proxy_csr_t::unlogged_write(val & ~mask);
+}
+
+sie_csr_t::sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr):
+ mie_proxy_csr_t(proc, addr, accr),
+ val(0) {
+}
+
+reg_t sie_csr_t::read() const noexcept {
+ const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+ return (mie_proxy_csr_t::read() & ~mask) | (val & mask);
+}
+
+bool sie_csr_t::unlogged_write(const reg_t val) noexcept {
+ const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+ this->val = (this->val & ~mask) | (val & mask);
+ mie_proxy_csr_t::unlogged_write(val & ~mask);
+ return true;
+}
+
// implement class masked_csr_t
masked_csr_t::masked_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init):
basic_csr_t(proc, addr, init),
@@ -999,7 +1088,7 @@ base_atp_csr_t::base_atp_csr_t(processor_t* const proc, const reg_t addr):
}
bool base_atp_csr_t::unlogged_write(const reg_t val) noexcept {
- const reg_t newval = proc->supports_impl(IMPL_MMU) ? compute_new_satp(val) : 0;
+ const reg_t newval = proc->has_mmu() ? compute_new_satp(val) : 0;
if (newval != read())
proc->get_mmu()->flush_tlb();
return basic_csr_t::unlogged_write(newval);
@@ -1008,23 +1097,23 @@ bool base_atp_csr_t::unlogged_write(const reg_t val) noexcept {
bool base_atp_csr_t::satp_valid(reg_t val) const noexcept {
if (proc->get_xlen() == 32) {
switch (get_field(val, SATP32_MODE)) {
- case SATP_MODE_SV32: return proc->supports_impl(IMPL_MMU_SV32);
case SATP_MODE_OFF: return true;
+ case SATP_MODE_SV32: return proc->get_max_vaddr_bits() >= 32;
default: return false;
}
} else {
switch (get_field(val, SATP64_MODE)) {
- case SATP_MODE_SV39: return proc->supports_impl(IMPL_MMU_SV39);
- case SATP_MODE_SV48: return proc->supports_impl(IMPL_MMU_SV48);
- case SATP_MODE_SV57: return proc->supports_impl(IMPL_MMU_SV57);
case SATP_MODE_OFF: return true;
+ case SATP_MODE_SV39: return proc->get_max_vaddr_bits() >= 39;
+ case SATP_MODE_SV48: return proc->get_max_vaddr_bits() >= 48;
+ case SATP_MODE_SV57: return proc->get_max_vaddr_bits() >= 57;
default: return false;
}
}
}
reg_t base_atp_csr_t::compute_new_satp(reg_t val) const noexcept {
- reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1;
+ reg_t rv64_ppn_mask = (reg_t(1) << (proc->paddr_bits() - PGSHIFT)) - 1;
reg_t mode_mask = proc->get_xlen() == 32 ? SATP32_MODE : SATP64_MODE;
reg_t asid_mask_if_enabled = proc->get_xlen() == 32 ? SATP32_ASID : SATP64_ASID;
@@ -1252,13 +1341,13 @@ bool hgatp_csr_t::unlogged_write(const reg_t val) noexcept {
HGATP32_MODE |
(proc->supports_impl(IMPL_MMU_VMID) ? HGATP32_VMID : 0);
} else {
- mask = (HGATP64_PPN & ((reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1)) |
+ mask = (HGATP64_PPN & ((reg_t(1) << (proc->paddr_bits() - PGSHIFT)) - 1)) |
(proc->supports_impl(IMPL_MMU_VMID) ? HGATP64_VMID : 0);
if (get_field(val, HGATP64_MODE) == HGATP_MODE_OFF ||
- (proc->supports_impl(IMPL_MMU_SV39) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4) ||
- (proc->supports_impl(IMPL_MMU_SV48) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4) ||
- (proc->supports_impl(IMPL_MMU_SV57) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV57X4))
+ (proc->get_max_vaddr_bits() >= 39 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4) ||
+ (proc->get_max_vaddr_bits() >= 48 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4) ||
+ (proc->get_max_vaddr_bits() >= 57 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV57X4))
mask |= HGATP64_MODE;
}
mask &= ~(reg_t)3;
@@ -1347,6 +1436,7 @@ dcsr_csr_t::dcsr_csr_t(processor_t* const proc, const reg_t addr):
ebreakvs(false),
ebreakvu(false),
v(false),
+ mprven(false),
cause(0),
ext_cause(0),
cetrig(0),
@@ -1376,6 +1466,7 @@ reg_t dcsr_csr_t::read() const noexcept {
result = set_field(result, DCSR_STEP, step);
result = set_field(result, DCSR_PRV, prv);
result = set_field(result, CSR_DCSR_V, v);
+ result = set_field(result, DCSR_MPRVEN, mprven);
result = set_field(result, DCSR_PELP, pelp);
return result;
}
@@ -1390,6 +1481,7 @@ bool dcsr_csr_t::unlogged_write(const reg_t val) noexcept {
ebreakvs = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_EBREAKVS) : false;
ebreakvu = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_EBREAKVU) : false;
v = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_V) : false;
+ mprven = get_field(val, CSR_DCSR_MPRVEN);
pelp = proc->extension_enabled(EXT_ZICFILP) ?
static_cast<elp_t>(get_field(val, DCSR_PELP)) : elp_t::NO_LP_EXPECTED;
cetrig = proc->extension_enabled(EXT_SMDBLTRP) ? get_field(val, DCSR_CETRIG) : false;
@@ -1645,10 +1737,6 @@ bool stimecmp_csr_t::unlogged_write(const reg_t val) noexcept {
return basic_csr_t::unlogged_write(val);
}
-virtualized_stimecmp_csr_t::virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt):
- virtualized_csr_t(proc, orig, virt) {
-}
-
void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
if (!(state->menvcfg->read() & MENVCFG_STCE)) {
// access to (v)stimecmp with MENVCFG.STCE = 0
@@ -1664,9 +1752,18 @@ void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
}
basic_csr_t::verify_permissions(insn, write);
+
+ if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+ if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && state->v && write)
+ throw trap_virtual_instruction(insn.bits());
+ }
+}
+
+virtualized_with_special_permission_csr_t::virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt):
+ virtualized_csr_t(proc, orig, virt) {
}
-void virtualized_stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
+void virtualized_with_special_permission_csr_t::verify_permissions(insn_t insn, bool write) const {
orig_csr->verify_permissions(insn, write);
}
@@ -1754,20 +1851,22 @@ sscsrind_reg_csr_t::sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr
}
void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
+ throw trap_illegal_instruction(insn.bits());
+ }
+
// Don't call base verify_permission for VS registers remapped to S-mode
if (insn.csr() == address)
csr_t::verify_permissions(insn, write);
if (proc->extension_enabled(EXT_SMSTATEEN)) {
- if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
- throw trap_illegal_instruction(insn.bits());
-
if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND))
throw trap_virtual_instruction(insn.bits());
}
if (proc->extension_enabled(EXT_SMCDELEG)) {
- if (insn.csr() >= CSR_VSIREG && insn.csr() <= CSR_VSIREG6) {
+ if (address >= CSR_VSIREG && address <= CSR_VSIREG6) {
if (!state->v) {
// An attempt to access any vsireg* from M or S mode raises an illegal instruction exception.
throw trap_illegal_instruction(insn.bits());
@@ -1785,7 +1884,7 @@ void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const {
}
}
}
- if (insn.csr() >= CSR_SIREG && insn.csr() <= CSR_SIREG6) {
+ if (address >= CSR_SIREG && address <= CSR_SIREG6) {
// attempts to access any sireg* when menvcfg.CDE = 0;
if ((state->menvcfg->read() & MENVCFG_CDE) != MENVCFG_CDE) {
if (!state->v) {
@@ -1936,8 +2035,9 @@ hstatus_csr_t::hstatus_csr_t(processor_t* const proc, const reg_t addr):
}
bool hstatus_csr_t::unlogged_write(const reg_t val) noexcept {
- const reg_t mask = HSTATUS_VTSR | HSTATUS_VTW
- | (proc->supports_impl(IMPL_MMU) ? HSTATUS_VTVM : 0)
+ const reg_t mask = (proc->extension_enabled(EXT_SVUKTE) ? HSTATUS_HUKTE : 0)
+ | HSTATUS_VTSR | HSTATUS_VTW
+ | (proc->has_mmu() ? HSTATUS_VTVM : 0)
| (proc->extension_enabled(EXT_SSNPM) ? HSTATUS_HUPMM : 0)
| HSTATUS_HU | HSTATUS_SPVP | HSTATUS_SPV | HSTATUS_GVA;
@@ -1973,3 +2073,176 @@ bool scntinhibit_csr_t::unlogged_write(const reg_t val) noexcept {
reg_t scntinhibit_csr_t::read() const noexcept {
return state->mcounteren->read() & state->mcountinhibit->read();
}
+
+mtopi_csr_t::mtopi_csr_t(processor_t* const proc, const reg_t addr):
+ csr_t(proc, addr) {
+}
+
+reg_t mtopi_csr_t::read() const noexcept {
+ reg_t enabled_interrupts = state->mip->read() & state->mie->read() & ~state->mideleg->read();
+ if (!enabled_interrupts)
+ return 0; // no enabled pending interrupt to M-mode
+
+ reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+ reg_t identity = ctz(selected_interrupt);
+ return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0
+}
+
+bool mtopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+ return false;
+}
+
+mvip_csr_t::mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init):
+ basic_csr_t(proc, addr, init) {
+}
+
+reg_t mvip_csr_t::read() const noexcept {
+ const reg_t val = basic_csr_t::read();
+ const reg_t mvien = state->mvien->read();
+ const reg_t mip = state->mip->read();
+ const reg_t menvcfg = state->menvcfg->read();
+ return 0
+ | (val & MIP_SEIP)
+ | ((menvcfg & MENVCFG_STCE) ? 0 : (mip & MIP_STIP))
+ | (((mvien & MIP_SSIP) ? val : mip) & MIP_SSIP)
+ ;
+}
+
+bool mvip_csr_t::unlogged_write(const reg_t val) noexcept {
+ if (!(state->menvcfg->read() & MENVCFG_STCE))
+ state->mip->write_with_mask(MIP_STIP, val); // mvip.STIP is an alias of mip.STIP when mip.STIP is writable
+ if (!(state->mvien->read() & MIP_SSIP))
+ state->mip->write_with_mask(MIP_SSIP, val); // mvip.SSIP is an alias of mip.SSIP when mvien.SSIP=0
+
+ const reg_t new_val = (val & MIP_SEIP) | (((state->mvien->read() & MIP_SSIP) ? val : basic_csr_t::read()) & MIP_SSIP);
+ return basic_csr_t::unlogged_write(new_val);
+}
+
+void mvip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept {
+ basic_csr_t::unlogged_write((basic_csr_t::read() & ~mask) | (val & mask));
+ log_write();
+}
+
+nonvirtual_stopi_csr_t::nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr):
+ csr_t(proc, addr) {
+}
+
+void nonvirtual_stopi_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+ throw trap_illegal_instruction(insn.bits());
+
+ if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+ throw trap_virtual_instruction(insn.bits());
+ }
+
+ csr_t::verify_permissions(insn, write);
+}
+
+reg_t nonvirtual_stopi_csr_t::read() const noexcept {
+ reg_t enabled_interrupts = state->nonvirtual_sip->read() & state->nonvirtual_sie->read() & ~state->hideleg->read();
+ if (!enabled_interrupts)
+ return 0; // no enabled pending interrupt to S-mode
+
+ reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+ reg_t identity = ctz(selected_interrupt);
+ return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0
+}
+
+bool nonvirtual_stopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+ return false;
+}
+
+inaccessible_csr_t::inaccessible_csr_t(processor_t* const proc, const reg_t addr):
+ csr_t(proc, addr) {
+}
+
+void inaccessible_csr_t::verify_permissions(insn_t insn, bool UNUSED write) const {
+ if (state->v)
+ throw trap_virtual_instruction(insn.bits());
+ else
+ throw trap_illegal_instruction(insn.bits());
+}
+
+vstopi_csr_t::vstopi_csr_t(processor_t* const proc, const reg_t addr):
+ csr_t(proc, addr) {
+}
+
+void vstopi_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+ throw trap_illegal_instruction(insn.bits());
+
+ if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+ throw trap_virtual_instruction(insn.bits());
+ }
+
+ csr_t::verify_permissions(insn, write);
+}
+
+reg_t vstopi_csr_t::read() const noexcept {
+ reg_t hvictl = state->hvictl->read();
+ bool vti = hvictl & HVICTL_VTI;
+ reg_t iid = get_field(hvictl, HVICTL_IID);
+ bool dpr = hvictl & HVICTL_DPR;
+ bool ipriom = hvictl & HVICTL_IPRIOM;
+ reg_t iprio = get_field(hvictl, HVICTL_IPRIO);
+
+ reg_t enabled_interrupts = state->mip->read() & state->mie->read() & state->hideleg->read();
+ enabled_interrupts >>= 1; // VSSIP -> SSIP, etc
+ reg_t vgein = get_field(state->hstatus->read(), HSTATUS_VGEIN);
+ reg_t virtual_sei_priority = (vgein == 0 && iid == IRQ_S_EXT && iprio != 0) ? iprio : 255; // vstopi.IPRIO is 255 for priority number 256
+
+ reg_t identity, priority;
+ if (vti) {
+ if (!(enabled_interrupts & MIP_SEIP) && iid == IRQ_S_EXT)
+ return 0;
+
+ identity = ((enabled_interrupts & MIP_SEIP) && (iid == IRQ_S_EXT || dpr)) ? IRQ_S_EXT : iid;
+ priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : ((iprio != 0 || !dpr) ? iprio : 255);
+ } else {
+ if (!enabled_interrupts)
+ return 0; // no enabled pending interrupt to VS-mode
+
+ reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+ identity = ctz(selected_interrupt);
+ priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : 255; // vstopi.IPRIO is 255 for interrupt with default priority lower than VSEI
+ }
+ return set_field((reg_t)(ipriom ? priority : 1), MTOPI_IID, identity);
+}
+
+bool vstopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+ return false;
+}
+
+siselect_csr_t::siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init):
+ basic_csr_t(proc, addr, init) {
+}
+
+void siselect_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
+ throw trap_illegal_instruction(insn.bits());
+
+ if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND))
+ throw trap_virtual_instruction(insn.bits());
+ }
+
+ basic_csr_t::verify_permissions(insn, write);
+}
+
+aia_csr_t::aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init):
+ masked_csr_t(proc, addr, mask, init) {
+}
+
+void aia_csr_t::verify_permissions(insn_t insn, bool write) const {
+ if (proc->extension_enabled(EXT_SMSTATEEN)) {
+ if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+ throw trap_illegal_instruction(insn.bits());
+
+ if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+ throw trap_virtual_instruction(insn.bits());
+ }
+
+ basic_csr_t::verify_permissions(insn, write);
+}
diff --git a/riscv/csrs.h b/riscv/csrs.h
index 33ac33e..b1d5a3b 100644
--- a/riscv/csrs.h
+++ b/riscv/csrs.h
@@ -255,9 +255,7 @@ class mstatus_csr_t final: public base_status_csr_t {
public:
mstatus_csr_t(processor_t* const proc, const reg_t addr);
- reg_t read() const noexcept override {
- return val;
- }
+ reg_t read() const noexcept override;
protected:
virtual bool unlogged_write(const reg_t val) noexcept override;
@@ -301,6 +299,12 @@ class rv32_high_csr_t: public csr_t {
csr_t_p orig;
};
+class aia_rv32_high_csr_t: public rv32_high_csr_t {
+ public:
+ aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+};
+
// sstatus.sdt is read_only 0 when menvcfg.dte = 0
class sstatus_proxy_csr_t final: public base_status_csr_t {
public:
@@ -356,7 +360,7 @@ class mip_or_mie_csr_t: public csr_t {
mip_or_mie_csr_t(processor_t* const proc, const reg_t addr);
virtual reg_t read() const noexcept override;
- void write_with_mask(const reg_t mask, const reg_t val) noexcept;
+ virtual void write_with_mask(const reg_t mask, const reg_t val) noexcept;
protected:
virtual bool unlogged_write(const reg_t val) noexcept override final;
@@ -371,6 +375,8 @@ class mip_csr_t: public mip_or_mie_csr_t {
mip_csr_t(processor_t* const proc, const reg_t addr);
virtual reg_t read() const noexcept override final;
+ void write_with_mask(const reg_t mask, const reg_t val) noexcept override;
+
// Does not log. Used by external things (clint) that wiggle bits in mip.
void backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept;
private:
@@ -406,6 +412,7 @@ class generic_int_accessor_t {
void ip_write(const reg_t val) noexcept;
reg_t ie_read() const noexcept;
void ie_write(const reg_t val) noexcept;
+ reg_t get_ip_write_mask() { return ip_write_mask; }
private:
state_t* const state;
const reg_t read_mask;
@@ -423,10 +430,10 @@ typedef std::shared_ptr<generic_int_accessor_t> generic_int_accessor_t_p;
class mip_proxy_csr_t: public csr_t {
public:
mip_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
virtual reg_t read() const noexcept override;
protected:
virtual bool unlogged_write(const reg_t val) noexcept override;
- private:
generic_int_accessor_t_p accr;
};
@@ -434,6 +441,7 @@ class mip_proxy_csr_t: public csr_t {
class mie_proxy_csr_t: public csr_t {
public:
mie_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
virtual reg_t read() const noexcept override;
protected:
virtual bool unlogged_write(const reg_t val) noexcept override;
@@ -458,6 +466,25 @@ class medeleg_csr_t: public basic_csr_t {
virtual bool unlogged_write(const reg_t val) noexcept override;
private:
const reg_t hypervisor_exceptions;
+ const reg_t mmu_exceptions;
+};
+
+class sip_csr_t: public mip_proxy_csr_t {
+ public:
+ sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+ virtual reg_t read() const noexcept override;
+ protected:
+ virtual bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class sie_csr_t: public mie_proxy_csr_t {
+ public:
+ sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+ virtual reg_t read() const noexcept override;
+ protected:
+ virtual bool unlogged_write(const reg_t val) noexcept override;
+ private:
+ reg_t val;
};
// For CSRs with certain bits hardwired
@@ -697,6 +724,7 @@ class dcsr_csr_t: public csr_t {
bool ebreakvs;
bool ebreakvu;
bool v;
+ bool mprven;
uint8_t cause;
uint8_t ext_cause;
bool cetrig;
@@ -805,9 +833,9 @@ class stimecmp_csr_t: public basic_csr_t {
reg_t intr_mask;
};
-class virtualized_stimecmp_csr_t: public virtualized_csr_t {
+class virtualized_with_special_permission_csr_t: public virtualized_csr_t {
public:
- virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt);
+ virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt);
virtual void verify_permissions(insn_t insn, bool write) const override;
};
@@ -909,4 +937,63 @@ class scntinhibit_csr_t: public basic_csr_t {
virtual bool unlogged_write(const reg_t val) noexcept override;
};
+class mtopi_csr_t: public csr_t {
+ public:
+ mtopi_csr_t(processor_t* const proc, const reg_t addr);
+ virtual reg_t read() const noexcept override;
+ protected:
+ bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class mvip_csr_t : public basic_csr_t {
+ public:
+ mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init);
+ reg_t read() const noexcept override;
+
+ void write_with_mask(const reg_t mask, const reg_t val) noexcept;
+
+ protected:
+ virtual bool unlogged_write(const reg_t val) noexcept override;
+};
+
+typedef std::shared_ptr<mvip_csr_t> mvip_csr_t_p;
+
+class nonvirtual_stopi_csr_t: public csr_t {
+ public:
+ nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+ virtual reg_t read() const noexcept override;
+ protected:
+ bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class inaccessible_csr_t: public csr_t {
+ public:
+ inaccessible_csr_t(processor_t* const proc, const reg_t addr);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+ reg_t read() const noexcept override { return 0; }
+ protected:
+ bool unlogged_write(const reg_t UNUSED val) noexcept override { return false; }
+};
+
+class vstopi_csr_t: public csr_t {
+ public:
+ vstopi_csr_t(processor_t* const proc, const reg_t addr);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+ virtual reg_t read() const noexcept override;
+ protected:
+ bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class siselect_csr_t: public basic_csr_t {
+ public:
+ siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+};
+
+class aia_csr_t: public masked_csr_t {
+ public:
+ aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init);
+ virtual void verify_permissions(insn_t insn, bool write) const override;
+};
#endif
diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc
index a89a4ff..410e0b3 100644
--- a/riscv/debug_module.cc
+++ b/riscv/debug_module.cc
@@ -1,4 +1,8 @@
+#include <algorithm>
+#include <array>
#include <cassert>
+#include <iterator>
+#include <limits>
#include "simif.h"
#include "devices.h"
@@ -32,6 +36,25 @@ static unsigned field_width(unsigned n)
///////////////////////// debug_module_t
+static bool region_descriptor_comparator(const region_descriptor &lhs,
+ const region_descriptor &rhs) {
+ return lhs.addr < rhs.addr;
+}
+
+template <typename It>
+static bool has_intersection(It begin, It end) {
+ assert(std::is_sorted(begin, end, region_descriptor_comparator));
+
+ // If current interval's end > next interval's start, they intersect
+ auto intersecion =
+ std::adjacent_find(begin, end, [](const auto &lhs, const auto &rhs) {
+ assert(std::numeric_limits<reg_t>::max() - lhs.addr >= lhs.len);
+ return lhs.addr + lhs.len > rhs.addr;
+ });
+
+ return intersecion != end;
+}
+
debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config) :
config(config),
program_buffer_bytes((config.support_impebreak ? 4 : 0) + 4*config.progbufsize),
@@ -57,11 +80,18 @@ debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config
exit(1);
}
+ constexpr unsigned max_data_reg = 12;
+ constexpr unsigned min_data_reg = 1;
+ if (config.datacount < min_data_reg || config.datacount > max_data_reg) {
+ fprintf(stderr, "dm-datacount must be between 1 and 12 (got %u)\n", config.datacount);
+ exit(1);
+ }
+
+ dmdata.resize(config.datacount * dmdata_reg_size);
program_buffer = new uint8_t[program_buffer_bytes];
memset(debug_rom_flags, 0, sizeof(debug_rom_flags));
memset(program_buffer, 0, program_buffer_bytes);
- memset(dmdata, 0, sizeof(dmdata));
if (config.support_impebreak) {
program_buffer[4*config.progbufsize] = ebreak();
@@ -78,6 +108,20 @@ debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config
hart_available_state[i] = true;
}
+ debug_memory_regions = {
+ region_descriptor{DEBUG_ROM_ENTRY, debug_rom_raw_len, debug_rom_raw},
+ region_descriptor{DEBUG_ROM_WHERETO, sizeof(debug_rom_whereto), debug_rom_whereto},
+ region_descriptor{DEBUG_ROM_FLAGS, sizeof(debug_rom_flags), debug_rom_flags},
+ region_descriptor{debug_data_start, dmdata.size(), dmdata.data()},
+ region_descriptor{debug_abstract_start, sizeof(debug_abstract), debug_abstract},
+ region_descriptor{debug_progbuf_start, program_buffer_bytes, program_buffer},
+ };
+
+ std::sort(debug_memory_regions.begin(), debug_memory_regions.end(),
+ region_descriptor_comparator);
+ assert(!has_intersection(debug_memory_regions.begin(),
+ debug_memory_regions.end()));
+
reset();
}
@@ -100,7 +144,7 @@ void debug_module_t::reset()
dmstatus.version = 2;
memset(&abstractcs, 0, sizeof(abstractcs));
- abstractcs.datacount = sizeof(dmdata) / 4;
+ abstractcs.datacount = config.datacount;
abstractcs.progbufsize = config.progbufsize;
memset(&abstractauto, 0, sizeof(abstractauto));
@@ -122,38 +166,27 @@ void debug_module_t::reset()
challenge = random();
}
+static bool belongs_to_range(reg_t access_addr, size_t access_len,
+ reg_t range_addr, size_t range_len)
+{
+ assert(std::numeric_limits<reg_t>::max() - access_addr >= access_len);
+ assert(std::numeric_limits<reg_t>::max() - range_addr >= range_len);
+ return access_addr >= range_addr && (access_addr < range_addr + range_len) &&
+ ((access_addr + access_len) <= (range_addr + range_len));
+}
+
bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes)
{
addr = DEBUG_START + addr;
- if (addr >= DEBUG_ROM_ENTRY &&
- (addr + len) <= (DEBUG_ROM_ENTRY + debug_rom_raw_len)) {
- memcpy(bytes, debug_rom_raw + addr - DEBUG_ROM_ENTRY, len);
- return true;
- }
-
- if (addr >= DEBUG_ROM_WHERETO && (addr + len) <= (DEBUG_ROM_WHERETO + 4)) {
- memcpy(bytes, debug_rom_whereto + addr - DEBUG_ROM_WHERETO, len);
- return true;
- }
-
- if (addr >= DEBUG_ROM_FLAGS && ((addr + len) <= DEBUG_ROM_FLAGS + 1024)) {
- memcpy(bytes, debug_rom_flags + addr - DEBUG_ROM_FLAGS, len);
- return true;
- }
-
- if (addr >= debug_abstract_start && ((addr + len) <= (debug_abstract_start + sizeof(debug_abstract)))) {
- memcpy(bytes, debug_abstract + addr - debug_abstract_start, len);
- return true;
- }
+ const auto interval_ptr =
+ std::find_if(debug_memory_regions.begin(), debug_memory_regions.end(),
+ [addr, len](const auto &range) {
+ return belongs_to_range(addr, len, range.addr, range.len);
+ });
- if (addr >= debug_data_start && (addr + len) <= (debug_data_start + sizeof(dmdata))) {
- memcpy(bytes, dmdata + addr - debug_data_start, len);
- return true;
- }
-
- if (addr >= debug_progbuf_start && ((addr + len) <= (debug_progbuf_start + program_buffer_bytes))) {
- memcpy(bytes, program_buffer + addr - debug_progbuf_start, len);
+ if (interval_ptr != debug_memory_regions.end()) {
+ std::copy_n(std::next(interval_ptr->bytes, addr - interval_ptr->addr), len, bytes);
return true;
}
@@ -163,6 +196,15 @@ bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes)
return false;
}
+static bool handle_range_store(reg_t input_addr, size_t input_len, const uint8_t *bytes,
+ reg_t range_addr, size_t range_len, uint8_t *data)
+{
+ if (!belongs_to_range(input_addr, input_len, range_addr, range_len))
+ return false;
+ std::copy_n(bytes, input_len, std::next(data, input_addr - range_addr));
+ return true;
+}
+
bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes)
{
D(
@@ -188,16 +230,11 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes)
addr = DEBUG_START + addr;
- if (addr >= debug_data_start && (addr + len) <= (debug_data_start + sizeof(dmdata))) {
- memcpy(dmdata + addr - debug_data_start, bytes, len);
+ if (handle_range_store(addr, len, bytes, debug_data_start, dmdata.size(), dmdata.data()))
return true;
- }
-
- if (addr >= debug_progbuf_start && ((addr + len) <= (debug_progbuf_start + program_buffer_bytes))) {
- memcpy(program_buffer + addr - debug_progbuf_start, bytes, len);
+ if (handle_range_store(addr, len, bytes, debug_progbuf_start, program_buffer_bytes, program_buffer))
return true;
- }
if (addr == DEBUG_ROM_HALTED) {
assert (len == 4);
@@ -283,6 +320,16 @@ unsigned debug_module_t::sb_access_bits()
return 8 << sbcs.sbaccess;
}
+uint8_t *debug_module_t::get_dmdata_checked(size_t required_size)
+{
+ if(dmdata.size() < required_size) {
+ fprintf(stderr, "dmdata size (%ld) less then required (%ld)\n",
+ dmdata.size(), required_size);
+ exit(1);
+ }
+ return dmdata.data();
+}
+
void debug_module_t::sb_autoincrement()
{
if (!sbcs.autoincrement || !config.max_sba_data_width)
@@ -392,7 +439,8 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value)
D(fprintf(stderr, "dmi_read(0x%x) -> ", address));
if (address >= DM_DATA0 && address < DM_DATA0 + abstractcs.datacount) {
unsigned i = address - DM_DATA0;
- result = read32(dmdata, i);
+ assert(dmdata.size() >= 4);
+ result = read32(get_dmdata_checked(i + 1), i);
if (abstractcs.busy) {
result = -1;
D(fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i));
@@ -649,130 +697,152 @@ bool debug_module_t::perform_abstract_command()
return true;
}
- if ((command >> 24) == 0) {
- // register access
- unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE);
- bool write = get_field(command, AC_ACCESS_REGISTER_WRITE);
- unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO);
+ auto cmdtype = get_field(command, DM_COMMAND_CMDTYPE);
+ constexpr decltype(cmdtype) CMDTYPE_ACCESS_REGISTER = 0ULL;
+ constexpr decltype(cmdtype) CMDTYPE_ACCESS_MEMORY = 2ULL;
+
+ if (cmdtype == CMDTYPE_ACCESS_REGISTER)
+ return perform_abstract_register_access();
+
+ if (cmdtype == CMDTYPE_ACCESS_MEMORY)
+ return perform_abstract_memory_access();
+
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
+}
+
+bool debug_module_t::perform_abstract_register_access()
+{
+ // register access
+ unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE);
+ bool write = get_field(command, AC_ACCESS_REGISTER_WRITE);
+ unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO);
if (!selected_hart_state().halted) {
abstractcs.cmderr = CMDERR_HALTRESUME;
return true;
}
- unsigned i = 0;
- if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) {
+ assert(size < 8);
+ // Check if register fit in dmdata
+ if ((1U << size) > dmdata.size()) {
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
+ }
+
+ unsigned i = 0;
+ if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) {
+
+ if (is_fpu_reg(regno)) {
+ // Save S0
+ write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0));
+ // Save mstatus
+ write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS));
+ write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1));
+ // Set mstatus.fs
+ assert((MSTATUS_FS & 0xfff) == 0);
+ write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12));
+ write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS));
+ }
- if (is_fpu_reg(regno)) {
- // Save S0
+ if (regno < 0x1000 && config.support_abstract_csr_access) {
+ if (!is_fpu_reg(regno)) {
write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0));
- // Save mstatus
- write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS));
- write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1));
- // Set mstatus.fs
- assert((MSTATUS_FS & 0xfff) == 0);
- write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12));
- write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS));
}
- if (regno < 0x1000 && config.support_abstract_csr_access) {
- if (!is_fpu_reg(regno)) {
- write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0));
- }
-
- if (write) {
- switch (size) {
- case 2:
- write32(debug_abstract, i++, lw(S0, ZERO, debug_data_start));
- break;
- case 3:
- write32(debug_abstract, i++, ld(S0, ZERO, debug_data_start));
- break;
- default:
- abstractcs.cmderr = CMDERR_NOTSUP;
- return true;
- }
- write32(debug_abstract, i++, csrw(S0, regno));
-
- } else {
- write32(debug_abstract, i++, csrr(S0, regno));
- switch (size) {
- case 2:
- write32(debug_abstract, i++, sw(S0, ZERO, debug_data_start));
- break;
- case 3:
- write32(debug_abstract, i++, sd(S0, ZERO, debug_data_start));
- break;
- default:
- abstractcs.cmderr = CMDERR_NOTSUP;
- return true;
- }
- }
- if (!is_fpu_reg(regno)) {
- write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0));
+ if (write) {
+ switch (size) {
+ case 2:
+ write32(debug_abstract, i++, lw(S0, ZERO, debug_data_start));
+ break;
+ case 3:
+ write32(debug_abstract, i++, ld(S0, ZERO, debug_data_start));
+ break;
+ default:
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
}
+ write32(debug_abstract, i++, csrw(S0, regno));
- } else if (regno >= 0x1000 && regno < 0x1020) {
- unsigned regnum = regno - 0x1000;
-
+ } else {
+ write32(debug_abstract, i++, csrr(S0, regno));
switch (size) {
case 2:
- if (write)
- write32(debug_abstract, i++, lw(regnum, ZERO, debug_data_start));
- else
- write32(debug_abstract, i++, sw(regnum, ZERO, debug_data_start));
+ write32(debug_abstract, i++, sw(S0, ZERO, debug_data_start));
break;
case 3:
- if (write)
- write32(debug_abstract, i++, ld(regnum, ZERO, debug_data_start));
- else
- write32(debug_abstract, i++, sd(regnum, ZERO, debug_data_start));
+ write32(debug_abstract, i++, sd(S0, ZERO, debug_data_start));
break;
default:
abstractcs.cmderr = CMDERR_NOTSUP;
return true;
}
+ }
+ if (!is_fpu_reg(regno)) {
+ write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0));
+ }
- if (regno == 0x1000 + S0 && write) {
- /*
- * The exception handler starts out be restoring dscratch to s0,
- * which was saved before executing the abstract memory region. Since
- * we just wrote s0, also make sure to write that same value to
- * dscratch in case an exception occurs in a program buffer that
- * might be executed later.
- */
- write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0));
- }
+ } else if (regno >= 0x1000 && regno < 0x1020) {
+ unsigned regnum = regno - 0x1000;
- } else if (regno >= 0x1020 && regno < 0x1040 && config.support_abstract_fpr_access) {
- unsigned fprnum = regno - 0x1020;
+ switch (size) {
+ case 2:
+ if (write)
+ write32(debug_abstract, i++, lw(regnum, ZERO, debug_data_start));
+ else
+ write32(debug_abstract, i++, sw(regnum, ZERO, debug_data_start));
+ break;
+ case 3:
+ if (write)
+ write32(debug_abstract, i++, ld(regnum, ZERO, debug_data_start));
+ else
+ write32(debug_abstract, i++, sd(regnum, ZERO, debug_data_start));
+ break;
+ default:
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
+ }
- if (write) {
- switch (size) {
- case 2:
- write32(debug_abstract, i++, flw(fprnum, ZERO, debug_data_start));
- break;
- case 3:
- write32(debug_abstract, i++, fld(fprnum, ZERO, debug_data_start));
- break;
- default:
- abstractcs.cmderr = CMDERR_NOTSUP;
- return true;
- }
+ if (regno == 0x1000 + S0 && write) {
+ /*
+ * The exception handler starts out be restoring dscratch to s0,
+ * which was saved before executing the abstract memory region. Since
+ * we just wrote s0, also make sure to write that same value to
+ * dscratch in case an exception occurs in a program buffer that
+ * might be executed later.
+ */
+ write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0));
+ }
- } else {
- switch (size) {
- case 2:
- write32(debug_abstract, i++, fsw(fprnum, ZERO, debug_data_start));
- break;
- case 3:
- write32(debug_abstract, i++, fsd(fprnum, ZERO, debug_data_start));
- break;
- default:
- abstractcs.cmderr = CMDERR_NOTSUP;
- return true;
- }
+ } else if (regno >= 0x1020 && regno < 0x1040 && config.support_abstract_fpr_access) {
+ unsigned fprnum = regno - 0x1020;
+
+ if (write) {
+ switch (size) {
+ case 2:
+ write32(debug_abstract, i++, flw(fprnum, ZERO, debug_data_start));
+ break;
+ case 3:
+ write32(debug_abstract, i++, fld(fprnum, ZERO, debug_data_start));
+ break;
+ default:
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
+ }
+
+ } else {
+ switch (size) {
+ case 2:
+ write32(debug_abstract, i++, fsw(fprnum, ZERO, debug_data_start));
+ break;
+ case 3:
+ write32(debug_abstract, i++, fsd(fprnum, ZERO, debug_data_start));
+ break;
+ default:
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
}
+ }
} else if (regno >= 0xc000 && (regno & 1) == 1) {
// Support odd-numbered custom registers, to allow for debugger testing.
@@ -781,46 +851,146 @@ bool debug_module_t::perform_abstract_command()
if (write) {
// Writing V to custom register N will cause future reads of N to
// return V, reads of N-1 will return V-1, etc.
- custom_base = read32(dmdata, 0) - custom_number;
+ assert(dmdata.size() >= 4);
+ custom_base = read32(get_dmdata_checked(1), 0) - custom_number;
} else {
- write32(dmdata, 0, custom_number + custom_base);
- write32(dmdata, 1, 0);
+ write32(get_dmdata_checked(1), 0, custom_number + custom_base);
+ write32(get_dmdata_checked(2), 1, 0);
}
return true;
- } else {
- abstractcs.cmderr = CMDERR_NOTSUP;
- return true;
- }
-
- if (is_fpu_reg(regno)) {
- // restore mstatus
- write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1));
- write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS));
- // restore s0
- write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0));
- }
- }
-
- if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) {
- write32(debug_abstract, i,
- jal(ZERO, debug_progbuf_start - debug_abstract_start - 4 * i));
- i++;
} else {
- write32(debug_abstract, i++, ebreak());
+ abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
}
- debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO;
- rti_remaining = config.abstract_rti;
- abstract_command_completed = false;
+ if (is_fpu_reg(regno)) {
+ // restore mstatus
+ write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1));
+ write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS));
+ // restore s0
+ write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0));
+ }
+ }
- abstractcs.busy = true;
+ if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) {
+ write32(debug_abstract, i,
+ jal(ZERO, debug_progbuf_start - debug_abstract_start - 4 * i));
+ i++;
} else {
+ write32(debug_abstract, i++, ebreak());
+ }
+
+ debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO;
+ rti_remaining = config.abstract_rti;
+ abstract_command_completed = false;
+
+ abstractcs.busy = true;
+ return true;
+}
+
+static unsigned idx(unsigned xlen)
+{
+ return field_width(xlen) - 3U;
+}
+
+bool debug_module_t::perform_abstract_memory_access() {
+ unsigned aamsize = get_field(command, AC_ACCESS_MEMORY_AAMSIZE);
+ bool aampostincrement = get_field(command, AC_ACCESS_MEMORY_AAMPOSTINCREMENT);
+ bool aamvirtual = get_field(command, AC_ACCESS_MEMORY_AAMVIRTUAL);
+ bool is_write = get_field(command, AC_ACCESS_MEMORY_WRITE);
+ auto xlen = sim->get_harts().at(selected_hart_id())->get_xlen();
+
+ if (!selected_hart_state().halted) {
+ abstractcs.cmderr = CMDERR_HALTRESUME;
+ return true;
+ }
+
+ if (aamsize > idx(xlen)) {
abstractcs.cmderr = CMDERR_NOTSUP;
+ return true;
}
+
+ unsigned offset = 0;
+ generate_initial_sequence(aamvirtual, offset);
+ is_write ? handle_memory_write(xlen, aamsize, offset)
+ : handle_memory_read(xlen, aamsize, offset);
+
+ if (aampostincrement)
+ handle_post_increment(xlen, aamsize, offset);
+
+ generate_termination_sequence(offset);
+ start_command_execution();
+
+ abstractcs.cmderr = CMDERR_NONE;
return true;
}
+using handle_memory_func = uint32_t (*)(unsigned rd_src, unsigned base, uint16_t offset);
+using handle_mstatus_func = uint32_t(*)(unsigned rd, unsigned rs1, unsigned csr);
+static constexpr std::array<handle_memory_func, 4> lx = {&lb, &lh, &lw, &ld};
+static constexpr std::array<handle_memory_func, 4> sx = {&sb, &sh, &sw, &sd};
+static constexpr std::array<handle_mstatus_func, 2> csrrx = {&csrrc, &csrrs};
+
+unsigned debug_module_t::arg(unsigned xlen, unsigned idx)
+{
+ return debug_data_start + idx * xlen / 8;
+}
+
+void debug_module_t::handle_memory_read(size_t xlen, unsigned aamsize, unsigned &offset)
+{
+ write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1)));
+ write32(debug_abstract, offset++, lx[aamsize](S1, S1, 0));
+ write32(debug_abstract, offset++, sx[idx(xlen)](S1, ZERO, arg(xlen, 0)));
+}
+
+void debug_module_t::handle_memory_write(size_t xlen, unsigned aamsize, unsigned &offset)
+{
+ // Use Arg1 as temporary storage for old mstatus value
+ write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1))); // Arg1 -> S1
+ write32(debug_abstract, offset++, sx[idx(xlen)](S0, ZERO, arg(xlen, 1))); // S0 -> Arg1
+ write32(debug_abstract, offset++, lx[idx(xlen)](S0, ZERO, arg(xlen, 0))); // Arg0 -> S0
+
+ write32(debug_abstract, offset++, sx[aamsize](S0, S1, 0));
+
+ write32(debug_abstract, offset++, lx[idx(xlen)](S0, ZERO, arg(xlen, 1))); // Restore S0
+}
+
+void debug_module_t::handle_post_increment(size_t xlen, unsigned aamsize, unsigned &offset)
+{
+ write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1)));
+ write32(debug_abstract, offset++, addi(S1, S1, 1U << aamsize));
+ write32(debug_abstract, offset++, sx[idx(xlen)](S1, ZERO, arg(xlen, 1)));
+}
+
+void debug_module_t::generate_initial_sequence(bool aamvirtual, unsigned &offset)
+{
+ write32(debug_abstract, offset++, csrw(S0, CSR_DSCRATCH0));
+ write32(debug_abstract, offset++, csrw(S1, CSR_DSCRATCH1));
+
+ // Modify mstatus.mprv and save old mstatus
+ write32(debug_abstract, offset++, lui(S0, MSTATUS_MPRV >> 12));
+ write32(debug_abstract, offset++, csrrx[aamvirtual](S0, S0, CSR_MSTATUS));
+}
+
+void debug_module_t::generate_termination_sequence(unsigned &offset)
+{
+ // Restore mstatus
+ write32(debug_abstract, offset++, csrw(S0, CSR_MSTATUS));
+
+ write32(debug_abstract, offset++, csrr(S0, CSR_DSCRATCH0));
+ write32(debug_abstract, offset++, csrr(S1, CSR_DSCRATCH1));
+ write32(debug_abstract, offset++, ebreak());
+}
+
+void debug_module_t::start_command_execution()
+{
+ debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO;
+ rti_remaining = config.abstract_rti;
+ abstract_command_completed = false;
+ abstractcs.busy = true;
+}
+
bool debug_module_t::dmi_write(unsigned address, uint32_t value)
{
D(fprintf(stderr, "dmi_write(0x%x, 0x%x)\n", address, value));
@@ -832,7 +1002,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value)
if (address >= DM_DATA0 && address < DM_DATA0 + abstractcs.datacount) {
unsigned i = address - DM_DATA0;
if (!abstractcs.busy)
- write32(dmdata, address - DM_DATA0, value);
+ write32(get_dmdata_checked(address - DM_DATA0), address - DM_DATA0, value);
if (abstractcs.busy && abstractcs.cmderr == CMDERR_NONE) {
abstractcs.cmderr = CMDERR_BUSY;
@@ -870,8 +1040,6 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value)
dmcontrol.ndmreset = get_field(value, DM_DMCONTROL_NDMRESET);
if (config.support_hasel)
dmcontrol.hasel = get_field(value, DM_DMCONTROL_HASEL);
- else
- dmcontrol.hasel = 0;
dmcontrol.hartsel = get_field(value, DM_DMCONTROL_HARTSELHI) <<
DM_DMCONTROL_HARTSELLO_LENGTH;
dmcontrol.hartsel |= get_field(value, DM_DMCONTROL_HARTSELLO);
@@ -931,10 +1099,12 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value)
return true;
case DM_ABSTRACTAUTO:
- abstractauto.autoexecprogbuf = get_field(value,
- DM_ABSTRACTAUTO_AUTOEXECPROGBUF);
- abstractauto.autoexecdata = get_field(value,
- DM_ABSTRACTAUTO_AUTOEXECDATA);
+ if (config.support_abstractauto) {
+ abstractauto.autoexecprogbuf = get_field(value,
+ DM_ABSTRACTAUTO_AUTOEXECPROGBUF);
+ abstractauto.autoexecdata = get_field(value,
+ DM_ABSTRACTAUTO_AUTOEXECDATA);
+ }
return true;
case DM_SBCS:
sbcs.readonaddr = get_field(value, DM_SBCS_SBREADONADDR);
diff --git a/riscv/debug_module.h b/riscv/debug_module.h
index 904f03e..831df10 100644
--- a/riscv/debug_module.h
+++ b/riscv/debug_module.h
@@ -2,7 +2,7 @@
#ifndef _RISCV_DEBUG_MODULE_H
#define _RISCV_DEBUG_MODULE_H
-#include <set>
+#include <array>
#include <vector>
#include "abstract_device.h"
@@ -15,6 +15,7 @@ struct debug_module_config_t {
// Size of program_buffer in 32-bit words, as exposed to the rest of the
// world.
unsigned progbufsize = 2;
+ unsigned datacount = 2;
unsigned max_sba_data_width = 0;
bool require_authentication = false;
unsigned abstract_rti = 0;
@@ -23,6 +24,7 @@ struct debug_module_config_t {
bool support_abstract_fpr_access = true;
bool support_haltgroups = true;
bool support_impebreak = true;
+ bool support_abstractauto = true;
};
struct dmcontrol_t {
@@ -99,6 +101,13 @@ struct hart_debug_state_t {
uint8_t haltgroup;
};
+// structure to describe mmio region
+struct region_descriptor {
+ reg_t addr; // 1st addr in a range
+ size_t len; // range size
+ const uint8_t *bytes; // data
+};
+
class debug_module_t : public abstract_device_t
{
public:
@@ -131,7 +140,6 @@ class debug_module_t : public abstract_device_t
void proc_reset(unsigned id);
private:
- static const unsigned datasize = 2;
debug_module_config_t config;
// Actual size of the program buffer, which is 1 word bigger than we let on
// to implement the implicit ebreak at the end.
@@ -139,7 +147,7 @@ class debug_module_t : public abstract_device_t
static const unsigned debug_data_start = 0x380;
unsigned debug_progbuf_start;
- static const unsigned debug_abstract_size = 12;
+ static const unsigned debug_abstract_size = 24;
unsigned debug_abstract_start;
// R/W this through custom registers, to allow debuggers to test that
// functionality.
@@ -150,7 +158,8 @@ class debug_module_t : public abstract_device_t
uint8_t debug_rom_whereto[4];
uint8_t debug_abstract[debug_abstract_size * 4];
uint8_t *program_buffer;
- uint8_t dmdata[datasize * 4];
+ static constexpr unsigned dmdata_reg_size = 4;
+ std::vector<uint8_t> dmdata;
std::vector<hart_debug_state_t> hart_state;
uint8_t debug_rom_flags[1024];
@@ -174,6 +183,8 @@ class debug_module_t : public abstract_device_t
unsigned sb_access_bits();
+ uint8_t *get_dmdata_checked(size_t required_size);
+
dmcontrol_t dmcontrol;
dmstatus_t dmstatus;
abstractcs_t abstractcs;
@@ -191,7 +202,20 @@ class debug_module_t : public abstract_device_t
bool hart_selected(unsigned hartid) const;
void reset();
+
bool perform_abstract_command();
+ bool perform_abstract_register_access();
+ bool perform_abstract_memory_access();
+
+ unsigned arg(unsigned xlen, unsigned i);
+
+ void handle_post_increment(size_t xlen, unsigned aamsize, unsigned &offset);
+ void handle_memory_read(size_t xlen, unsigned aamsize, unsigned &offset);
+ void handle_memory_write(size_t xlen, unsigned aamsize, unsigned &offset);
+
+ void generate_initial_sequence(bool aamvirtual, unsigned &offset);
+ void generate_termination_sequence(unsigned &offset);
+ void start_command_execution();
bool abstract_command_completed;
unsigned rti_remaining;
@@ -206,6 +230,8 @@ class debug_module_t : public abstract_device_t
bool hart_available(unsigned hart_id) const;
unsigned sb_read_wait, sb_write_wait;
+
+ std::array<region_descriptor, 6> debug_memory_regions;
};
#endif
diff --git a/riscv/decode.h b/riscv/decode.h
index d17cb6b..0c13528 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -79,6 +79,10 @@ public:
insn_t(insn_bits_t bits) : b(bits) {}
insn_bits_t bits() { return b; }
int length() { return insn_length(b); }
+ [[maybe_unused]] int64_t opcode() { return x(0, 7); }
+ [[maybe_unused]] int64_t funct7() { return x(25, 7); }
+ [[maybe_unused]] int64_t funct3() { return x(12, 3); }
+ [[maybe_unused]] int64_t funct2() { return x(25, 2); }
int64_t i_imm() { return xs(20, 12); }
int64_t shamt() { return x(20, 6); }
int64_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); }
@@ -95,6 +99,7 @@ public:
uint64_t bs() { return x(30, 2); } // Crypto ISE - SM4/AES32 byte select.
uint64_t rcon() { return x(20, 4); } // Crypto ISE - AES64 round const.
+ [[maybe_unused]] int64_t rvc_opcode() { return x(0, 2); }
int64_t rvc_imm() { return x(2, 5) + (xs(12, 1) << 5); }
int64_t rvc_zimm() { return x(2, 5) + (x(12, 1) << 5); }
int64_t rvc_addi4spn_imm() { return (x(6, 1) << 2) + (x(5, 1) << 3) + (x(11, 2) << 4) + (x(7, 4) << 6); }
@@ -149,6 +154,8 @@ public:
uint64_t p_imm5() { return x(20, 5); }
uint64_t p_imm6() { return x(20, 6); }
+ uint64_t b_imm5() { return (x(20, 5) == 0) ? -1ul : x(20, 5); }
+
uint64_t zcmp_regmask() {
unsigned mask = 0;
uint64_t rlist = rvc_rlist();
@@ -240,7 +247,4 @@ private:
#define set_field(reg, mask, val) \
(((reg) & ~(std::remove_cv<decltype(reg)>::type)(mask)) | (((std::remove_cv<decltype(reg)>::type)(val) * ((mask) & ~((mask) << 1))) & (std::remove_cv<decltype(reg)>::type)(mask)))
-#define DEBUG_START 0x0
-#define DEBUG_END (0x1000 - 1)
-
#endif
diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h
index 892515f..6f24799 100644
--- a/riscv/decode_macros.h
+++ b/riscv/decode_macros.h
@@ -146,10 +146,9 @@ do { \
#define SHAMT (insn.i_imm() & 0x3F)
#define BRANCH_TARGET (pc + insn.sb_imm())
#define JUMP_TARGET (pc + insn.uj_imm())
-#define RM ({ int rm = insn.rm(); \
- if (rm == 7) rm = STATE.frm->read(); \
- if (rm > 4) throw trap_illegal_instruction(insn.bits()); \
- rm; })
+#define validate_rm(rm) ({ require(rm < 5); rm; })
+#define VFP_RM validate_rm(STATE.frm->read())
+#define RM (insn.rm() == 7 ? VFP_RM : validate_rm(insn.rm()))
static inline bool is_aligned(const unsigned val, const unsigned pos)
{
@@ -164,7 +163,6 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define require_rv32 require(xlen == 32)
#define require_extension(s) require(p->extension_enabled(s))
#define require_either_extension(A,B) require(p->extension_enabled(A) || p->extension_enabled(B));
-#define require_impl(s) require(p->supports_impl(s))
#define require_fp STATE.fflags->verify_permissions(insn, false)
#define require_accelerator require(STATE.sstatus->enabled(SSTATUS_XS))
#define require_vector_vs require(p->any_vector_extensions() && STATE.sstatus->enabled(SSTATUS_VS))
@@ -226,7 +224,8 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define zext_xlen(x) zext(x, xlen)
#define set_pc(x) \
- do { p->check_pc_alignment(x); \
+ do { if (unlikely((x) & ~p->pc_alignment_mask())) \
+ return p->throw_instruction_address_misaligned(x); \
npc = sext_xlen(x); \
} while (0)
@@ -268,15 +267,21 @@ inline bfloat16_t bf16(freg_t r) { return bf16(unboxBF16(r)); }
inline float32_t f32(freg_t r) { return f32(unboxF32(r)); }
inline float64_t f64(freg_t r) { return f64(unboxF64(r)); }
inline float128_t f128(freg_t r) { return r; }
+inline float16_t f16(freg_t r, reg_t altfmt) { return altfmt ? bf16(r) : f16(r); }
+inline float32_t f32(freg_t r, UNUSED reg_t altfmt) { return f32(r); }
+inline float64_t f64(freg_t r, UNUSED reg_t altfmt) { return f64(r); }
inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; }
inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; }
inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; }
inline freg_t freg(float128_t f) { return f; }
#define F16_SIGN ((uint16_t)1 << 15)
+#define BF16_SIGN F16_SIGN
#define F32_SIGN ((uint32_t)1 << 31)
#define F64_SIGN ((uint64_t)1 << 63)
#define fsgnj16(a, b, n, x) \
f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN))
+#define bfsgnj16(a, b, n, x) \
+ bf16((bf16(a).v & ~BF16_SIGN) | ((((x) ? bf16(a).v : (n) ? BF16_SIGN : 0) ^ bf16(b).v) & BF16_SIGN))
#define fsgnj32(a, b, n, x) \
f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN))
#define fsgnj64(a, b, n, x) \
@@ -369,3 +374,10 @@ inline long double to_f(float128_t f) { long double r; memcpy(&r, &f, sizeof(r))
#define ZICFILP_IS_LP_EXPECTED(reg_num) \
(((reg_num) != 1 && (reg_num) != 5 && (reg_num) != 7) ? \
elp_t::LP_EXPECTED : elp_t::NO_LP_EXPECTED)
+#define maybe_set_elp(reg_num) \
+ if (unlikely(p->extension_enabled(EXT_ZICFILP))) { \
+ if (unlikely(ZICFILP_IS_LP_EXPECTED(reg_num) == elp_t::LP_EXPECTED)) { \
+ serialize(); \
+ return p->set_lpad_expected(npc); \
+ } \
+ }
diff --git a/riscv/devices.cc b/riscv/devices.cc
index fb5bb5a..b816ca1 100644
--- a/riscv/devices.cc
+++ b/riscv/devices.cc
@@ -156,21 +156,21 @@ void mem_t::dump(std::ostream& o) {
}
}
-external_sim_device_t::external_sim_device_t(void* sim)
+external_sim_device_t::external_sim_device_t(abstract_sim_if_t* sim)
: external_simulator(sim) {}
-void external_sim_device_t::set_simulator(void* sim) {
+void external_sim_device_t::set_simulator(abstract_sim_if_t* sim) {
external_simulator = sim;
}
bool external_sim_device_t::load(reg_t addr, size_t len, uint8_t* bytes) {
if (unlikely(external_simulator == nullptr)) return false;
- return static_cast<abstract_sim_if_t*>(external_simulator)->load(addr, len, bytes);
+ return external_simulator->load(addr, len, bytes);
}
bool external_sim_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) {
if (unlikely(external_simulator == nullptr)) return false;
- return static_cast<abstract_sim_if_t*>(external_simulator)->store(addr, len, bytes);
+ return external_simulator->store(addr, len, bytes);
}
reg_t external_sim_device_t::size() {
diff --git a/riscv/devices.h b/riscv/devices.h
index e7b80ad..ccb5c9b 100644
--- a/riscv/devices.h
+++ b/riscv/devices.h
@@ -80,14 +80,14 @@ public:
class external_sim_device_t : public abstract_device_t {
public:
- external_sim_device_t(void* sim);
- void set_simulator(void* sim);
+ external_sim_device_t(abstract_sim_if_t* sim);
+ void set_simulator(abstract_sim_if_t* sim);
bool load(reg_t addr, size_t len, uint8_t* bytes) override;
bool store(reg_t addr, size_t len, const uint8_t* bytes) override;
reg_t size() override;
private:
- void* external_simulator;
+ abstract_sim_if_t* external_simulator;
};
class clint_t : public abstract_device_t {
diff --git a/riscv/disasm.h b/riscv/disasm.h
index 4a1ea42..64cfd2e 100644
--- a/riscv/disasm.h
+++ b/riscv/disasm.h
@@ -15,6 +15,7 @@ extern const char* xpr_name[NXPR];
extern const char* fpr_name[NFPR];
extern const char* vr_name[NVPR];
extern const char* csr_name(int which);
+extern const char* frm_name(int which);
class arg_t
{
diff --git a/riscv/encoding.h b/riscv/encoding.h
index bcc1ace..776a2ae 100644
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@@ -4,7 +4,7 @@
/*
* This file is auto-generated by running 'make' in
- * https://github.com/riscv/riscv-opcodes (8899b32)
+ * https://github.com/riscv/riscv-opcodes (3deaa8c)
*/
#ifndef RISCV_CSR_ENCODING_H
@@ -65,16 +65,17 @@
#define SSTATUS_UXL 0x0000000300000000
#define SSTATUS64_SD 0x8000000000000000
-#define HSTATUS_VSXL 0x300000000
-#define HSTATUS_VTSR 0x00400000
-#define HSTATUS_VTW 0x00200000
-#define HSTATUS_VTVM 0x00100000
-#define HSTATUS_VGEIN 0x0003f000
-#define HSTATUS_HU 0x00000200
-#define HSTATUS_SPVP 0x00000100
-#define HSTATUS_SPV 0x00000080
-#define HSTATUS_GVA 0x00000040
#define HSTATUS_VSBE 0x00000020
+#define HSTATUS_GVA 0x00000040
+#define HSTATUS_SPV 0x00000080
+#define HSTATUS_SPVP 0x00000100
+#define HSTATUS_HU 0x00000200
+#define HSTATUS_VGEIN 0x0003f000
+#define HSTATUS_VTVM 0x00100000
+#define HSTATUS_VTW 0x00200000
+#define HSTATUS_VTSR 0x00400000
+#define HSTATUS_HUKTE 0x01000000
+#define HSTATUS_VSXL 0x0000000300000000
#define HSTATUS_HUPMM 0x0003000000000000
#define USTATUS_UIE 0x00000001
@@ -197,18 +198,20 @@
#define MSTATEEN0_FCSR 0x00000002
#define MSTATEEN0_JVT 0x00000004
#define MSTATEEN0_CTR 0x0040000000000000
-#define MSTATEEN0_PRIV113 0x0100000000000000
#define MSTATEEN0_PRIV114 0x0080000000000000
+#define MSTATEEN0_PRIV113 0x0100000000000000
#define MSTATEEN0_HCONTEXT 0x0200000000000000
+#define MSTATEEN0_IMSIC 0x0400000000000000
#define MSTATEEN0_AIA 0x0800000000000000
#define MSTATEEN0_CSRIND 0x1000000000000000
#define MSTATEEN0_HENVCFG 0x4000000000000000
#define MSTATEEN_HSTATEEN 0x8000000000000000
#define MSTATEEN0H_CTR 0x00400000
-#define MSTATEEN0H_PRIV113 0x01000000
#define MSTATEEN0H_PRIV114 0x00800000
+#define MSTATEEN0H_PRIV113 0x01000000
#define MSTATEEN0H_HCONTEXT 0x02000000
+#define MSTATEEN0H_IMSIC 0x04000000
#define MSTATEEN0H_AIA 0x08000000
#define MSTATEEN0H_CSRIND 0x10000000
#define MSTATEEN0H_HENVCFG 0x40000000
@@ -268,11 +271,25 @@
#define SISELECT_SMCDELEG_HPMEVENT_3 0x43
#define SISELECT_SMCDELEG_END 0x5f
+#define MISELECT_IPRIO 0x30
+#define MISELECT_IPRIO_TOP 0x3f
+#define MISELECT_IMSIC 0x70
+#define MISELECT_IMSIC_TOP 0xff
+
+#define SISELECT_IPRIO 0x30
+#define SISELECT_IPRIO_TOP 0x3f
+#define SISELECT_IMSIC 0x70
+#define SISELECT_IMSIC_TOP 0xff
+
+#define VSISELECT_IMSIC 0x70
+#define VSISELECT_IMSIC_TOP 0xff
+
#define HSTATEEN0_CS 0x00000001
#define HSTATEEN0_FCSR 0x00000002
#define HSTATEEN0_JVT 0x00000004
#define HSTATEEN0_CTR 0x0040000000000000
#define HSTATEEN0_SCONTEXT 0x0200000000000000
+#define HSTATEEN0_IMSIC 0x0400000000000000
#define HSTATEEN0_AIA 0x0800000000000000
#define HSTATEEN0_CSRIND 0x1000000000000000
#define HSTATEEN0_SENVCFG 0x4000000000000000
@@ -280,6 +297,7 @@
#define HSTATEEN0H_CTR 0x00400000
#define HSTATEEN0H_SCONTEXT 0x02000000
+#define HSTATEEN0H_IMSIC 0x04000000
#define HSTATEEN0H_AIA 0x08000000
#define HSTATEEN0H_CSRIND 0x10000000
#define HSTATEEN0H_SENVCFG 0x40000000
@@ -291,6 +309,7 @@
#define SENVCFG_CBIE 0x00000030
#define SENVCFG_CBCFE 0x00000040
#define SENVCFG_CBZE 0x00000080
+#define SENVCFG_UKTE 0x00000100
#define SENVCFG_PMM 0x0000000300000000
#define SSTATEEN0_CS 0x00000001
@@ -356,6 +375,7 @@
#define PMP_W 0x02
#define PMP_X 0x04
#define PMP_A 0x18
+#define PMP_MT 0x60
#define PMP_L 0x80
#define PMP_SHIFT 2
@@ -363,6 +383,9 @@
#define PMP_NA4 0x10
#define PMP_NAPOT 0x18
+#define SPMP_U 0x100
+#define SPMP_SHARED 0x200
+
#define MCTRCTL_U 0x0000000000000001
#define MCTRCTL_S 0x0000000000000002
#define MCTRCTL_M 0x0000000000000004
@@ -384,6 +407,7 @@
#define MCTRCTL_RETINH 0x0000200000000000
#define MCTRCTL_INDLJMPINH 0x0000400000000000
#define MCTRCTL_DIRLJMPINH 0x0000800000000000
+#define MCTRCTL_CUSTOM 0xF000000000000000
#define SCTRCTL_U 0x0000000000000001
#define SCTRCTL_S 0x0000000000000002
@@ -424,12 +448,25 @@
#define VSCTRCTL_RETINH 0x0000200000000000
#define VSCTRCTL_INDLJMPINH 0x0000400000000000
#define VSCTRCTL_DIRLJMPINH 0x0000800000000000
+#define VSCTRCTL_CUSTOM 0xF000000000000000
#define SCTRDEPTH_DEPTH 0x00000007
#define SCTRSTATUS_WRPTR 0x000000FF
#define SCTRSTATUS_FROZEN 0x80000000
+#define SCTR_ENTRY_BASE 0x200
+
+#define SCTR_SOURCE_V 0x0000000000000001
+#define SCTR_SOURCE_PC 0xFFFFFFFFFFFFFFFE
+
+#define SCTR_TARGET_MISP 0x0000000000000001
+#define SCTR_TARGET_PC 0xFFFFFFFFFFFFFFFE
+
+#define SCTR_DATA_TYPE 0x000000000000000F
+#define SCTR_DATA_CCV 0x0000000000008000
+#define SCTR_DATA_CC 0x00000000FFFF0000
+
#define IRQ_U_SOFT 0
#define IRQ_S_SOFT 1
#define IRQ_VS_SOFT 2
@@ -458,7 +495,8 @@
#define PTE_A 0x040 /* Accessed */
#define PTE_D 0x080 /* Dirty */
#define PTE_SOFT 0x300 /* Reserved for Software */
-#define PTE_RSVD 0x1FC0000000000000 /* Reserved for future standard use */
+#define PTE_SVRSW60T59B 0x1800000000000000 /* Svrsw60t59b: Reserved for software use */
+#define PTE_RSVD 0x07C0000000000000 /* Reserved for future standard use */
#define PTE_PBMT 0x6000000000000000 /* Svpbmt: Page-based memory types */
#define PTE_N 0x8000000000000000 /* Svnapot: NAPOT translation contiguity */
#define PTE_ATTR 0xFFC0000000000000 /* All attributes and reserved bits */
@@ -657,6 +695,8 @@
#define MASK_BCLRI 0xfc00707f
#define MATCH_BEQ 0x63
#define MASK_BEQ 0x707f
+#define MATCH_BEQI 0x2063
+#define MASK_BEQI 0x707f
#define MATCH_BEXT 0x48005033
#define MASK_BEXT 0xfe00707f
#define MATCH_BEXTI 0x48005013
@@ -675,6 +715,8 @@
#define MASK_BLTU 0x707f
#define MATCH_BNE 0x1063
#define MASK_BNE 0x707f
+#define MATCH_BNEI 0x3063
+#define MASK_BNEI 0x707f
#define MATCH_BSET 0x28001033
#define MASK_BSET 0xfe00707f
#define MATCH_BSETI 0x28001013
@@ -1671,6 +1713,8 @@
#define MASK_VFADD_VF 0xfc00707f
#define MATCH_VFADD_VV 0x1057
#define MASK_VFADD_VV 0xfc00707f
+#define MATCH_VFBDOT_VV 0xac001077
+#define MASK_VFBDOT_VV 0xfc00707f
#define MATCH_VFCLASS_V 0x4c081057
#define MASK_VFCLASS_V 0xfc0ff07f
#define MATCH_VFCVT_F_X_V 0x48019057
@@ -1689,6 +1733,8 @@
#define MASK_VFDIV_VF 0xfc00707f
#define MATCH_VFDIV_VV 0x80001057
#define MASK_VFDIV_VV 0xfc00707f
+#define MATCH_VFEXT_VF2 0x480b2057
+#define MASK_VFEXT_VF2 0xfc0ff07f
#define MATCH_VFIRST_M 0x4008a057
#define MASK_VFIRST_M 0xfc0ff07f
#define MATCH_VFMACC_VF 0xb0005057
@@ -1727,6 +1773,8 @@
#define MASK_VFMV_S_F 0xfff0707f
#define MATCH_VFMV_V_F 0x5e005057
#define MASK_VFMV_V_F 0xfff0707f
+#define MATCH_VFNCVT_F_F_Q 0x480c9057
+#define MASK_VFNCVT_F_F_Q 0xfc0ff07f
#define MATCH_VFNCVT_F_F_W 0x480a1057
#define MASK_VFNCVT_F_F_W 0xfc0ff07f
#define MATCH_VFNCVT_F_X_W 0x48099057
@@ -1739,12 +1787,16 @@
#define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f
#define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057
#define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f
+#define MATCH_VFNCVT_SAT_F_F_Q 0x480d9057
+#define MASK_VFNCVT_SAT_F_F_Q 0xfc0ff07f
#define MATCH_VFNCVT_X_F_W 0x48089057
#define MASK_VFNCVT_X_F_W 0xfc0ff07f
#define MATCH_VFNCVT_XU_F_W 0x48081057
#define MASK_VFNCVT_XU_F_W 0xfc0ff07f
#define MATCH_VFNCVTBF16_F_F_W 0x480e9057
#define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f
+#define MATCH_VFNCVTBF16_SAT_F_F_W 0x480f9057
+#define MASK_VFNCVTBF16_SAT_F_F_W 0xfc0ff07f
#define MATCH_VFNMACC_VF 0xb4005057
#define MASK_VFNMACC_VF 0xfc00707f
#define MATCH_VFNMACC_VV 0xb4001057
@@ -1761,6 +1813,14 @@
#define MASK_VFNMSUB_VF 0xfc00707f
#define MATCH_VFNMSUB_VV 0xac001057
#define MASK_VFNMSUB_VV 0xfc00707f
+#define MATCH_VFQBDOT_ALT_VV 0xbc001077
+#define MASK_VFQBDOT_ALT_VV 0xfc00707f
+#define MATCH_VFQBDOT_VV 0xb8001077
+#define MASK_VFQBDOT_VV 0xfc00707f
+#define MATCH_VFQLDOT_ALT_VV 0x9c001077
+#define MASK_VFQLDOT_ALT_VV 0xfc00707f
+#define MATCH_VFQLDOT_VV 0x98001077
+#define MASK_VFQLDOT_VV 0xfc00707f
#define MATCH_VFRDIV_VF 0x84005057
#define MASK_VFRDIV_VF 0xfc00707f
#define MATCH_VFREC7_V 0x4c029057
@@ -1807,6 +1867,8 @@
#define MASK_VFWADD_WF 0xfc00707f
#define MATCH_VFWADD_WV 0xd0001057
#define MASK_VFWADD_WV 0xfc00707f
+#define MATCH_VFWBDOT_VV 0xb0001077
+#define MASK_VFWBDOT_VV 0xfc00707f
#define MATCH_VFWCVT_F_F_V 0x48061057
#define MASK_VFWCVT_F_F_V 0xfc0ff07f
#define MATCH_VFWCVT_F_X_V 0x48059057
@@ -1823,6 +1885,8 @@
#define MASK_VFWCVT_XU_F_V 0xfc0ff07f
#define MATCH_VFWCVTBF16_F_F_V 0x48069057
#define MASK_VFWCVTBF16_F_F_V 0xfc0ff07f
+#define MATCH_VFWLDOT_VV 0x90001077
+#define MASK_VFWLDOT_VV 0xfc00707f
#define MATCH_VFWMACC_VF 0xf0005057
#define MASK_VFWMACC_VF 0xfc00707f
#define MATCH_VFWMACC_VV 0xf0001057
@@ -2145,6 +2209,10 @@
#define MASK_VOR_VV 0xfc00707f
#define MATCH_VOR_VX 0x28004057
#define MASK_VOR_VX 0xfc00707f
+#define MATCH_VQBDOTS_VV 0xbc000077
+#define MASK_VQBDOTS_VV 0xfc00707f
+#define MATCH_VQBDOTU_VV 0xb8000077
+#define MASK_VQBDOTU_VV 0xfc00707f
#define MATCH_VQDOT_VV 0xb0002057
#define MASK_VQDOT_VV 0xfc00707f
#define MATCH_VQDOT_VX 0xb0006057
@@ -2159,6 +2227,10 @@
#define MASK_VQDOTU_VX 0xfc00707f
#define MATCH_VQDOTUS_VX 0xb8006057
#define MASK_VQDOTUS_VX 0xfc00707f
+#define MATCH_VQLDOTS_VV 0x9c000077
+#define MASK_VQLDOTS_VV 0xfc00707f
+#define MATCH_VQLDOTU_VV 0x98000077
+#define MASK_VQLDOTU_VV 0xfc00707f
#define MATCH_VREDAND_VS 0x4002057
#define MASK_VREDAND_VS 0xfc00707f
#define MATCH_VREDMAX_VS 0x1c002057
@@ -2498,8 +2570,6 @@
#define CSR_VTYPE 0xc21
#define CSR_VLENB 0xc22
#define CSR_SSTATUS 0x100
-#define CSR_SEDELEG 0x102
-#define CSR_SIDELEG 0x103
#define CSR_SIE 0x104
#define CSR_STVEC 0x105
#define CSR_SCOUNTEREN 0x106
@@ -2967,7 +3037,6 @@
#define INSN_FIELD_IMM4 0xf00000
#define INSN_FIELD_IMM5 0x1f00000
#define INSN_FIELD_IMM6 0x3f00000
-#define INSN_FIELD_ZIMM 0xf8000
#define INSN_FIELD_OPCODE 0x7f
#define INSN_FIELD_FUNCT7 0xfe000000
#define INSN_FIELD_VD 0xf80
@@ -3033,6 +3102,12 @@
#define INSN_FIELD_C_RS2 0x7c
#define INSN_FIELD_C_SREG1 0x380
#define INSN_FIELD_C_SREG2 0x1c
+#define INSN_FIELD_RD_P_E 0x18
+#define INSN_FIELD_RS2_P_E 0x18
+#define INSN_FIELD_RD_N0_E 0xf00
+#define INSN_FIELD_C_RS2_E 0x78
+#define INSN_FIELD_RD_E 0xf00
+#define INSN_FIELD_RS2_E 0x1e00000
#define INSN_FIELD_MOP_R_T_30 0x40000000
#define INSN_FIELD_MOP_R_T_27_26 0xc000000
#define INSN_FIELD_MOP_R_T_21_20 0x300000
@@ -3106,6 +3181,7 @@ DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
DECLARE_INSN(bclr, MATCH_BCLR, MASK_BCLR)
DECLARE_INSN(bclri, MATCH_BCLRI, MASK_BCLRI)
DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ)
+DECLARE_INSN(beqi, MATCH_BEQI, MASK_BEQI)
DECLARE_INSN(bext, MATCH_BEXT, MASK_BEXT)
DECLARE_INSN(bexti, MATCH_BEXTI, MASK_BEXTI)
DECLARE_INSN(bge, MATCH_BGE, MASK_BGE)
@@ -3115,6 +3191,7 @@ DECLARE_INSN(binvi, MATCH_BINVI, MASK_BINVI)
DECLARE_INSN(blt, MATCH_BLT, MASK_BLT)
DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU)
DECLARE_INSN(bne, MATCH_BNE, MASK_BNE)
+DECLARE_INSN(bnei, MATCH_BNEI, MASK_BNEI)
DECLARE_INSN(bset, MATCH_BSET, MASK_BSET)
DECLARE_INSN(bseti, MATCH_BSETI, MASK_BSETI)
DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD)
@@ -3613,6 +3690,7 @@ DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV)
DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX)
DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF)
DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV)
+DECLARE_INSN(vfbdot_vv, MATCH_VFBDOT_VV, MASK_VFBDOT_VV)
DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V)
DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V)
DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V)
@@ -3622,6 +3700,7 @@ DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V)
DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V)
DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF)
DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV)
+DECLARE_INSN(vfext_vf2, MATCH_VFEXT_VF2, MASK_VFEXT_VF2)
DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M)
DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF)
DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV)
@@ -3641,15 +3720,18 @@ DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV)
DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S)
DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F)
DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F)
+DECLARE_INSN(vfncvt_f_f_q, MATCH_VFNCVT_F_F_Q, MASK_VFNCVT_F_F_Q)
DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W)
DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W)
DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W)
DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W)
DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W)
DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W)
+DECLARE_INSN(vfncvt_sat_f_f_q, MATCH_VFNCVT_SAT_F_F_Q, MASK_VFNCVT_SAT_F_F_Q)
DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W)
DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W)
DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W)
+DECLARE_INSN(vfncvtbf16_sat_f_f_w, MATCH_VFNCVTBF16_SAT_F_F_W, MASK_VFNCVTBF16_SAT_F_F_W)
DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF)
DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV)
DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF)
@@ -3658,6 +3740,10 @@ DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF)
DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV)
DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF)
DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV)
+DECLARE_INSN(vfqbdot_alt_vv, MATCH_VFQBDOT_ALT_VV, MASK_VFQBDOT_ALT_VV)
+DECLARE_INSN(vfqbdot_vv, MATCH_VFQBDOT_VV, MASK_VFQBDOT_VV)
+DECLARE_INSN(vfqldot_alt_vv, MATCH_VFQLDOT_ALT_VV, MASK_VFQLDOT_ALT_VV)
+DECLARE_INSN(vfqldot_vv, MATCH_VFQLDOT_VV, MASK_VFQLDOT_VV)
DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF)
DECLARE_INSN(vfrec7_v, MATCH_VFREC7_V, MASK_VFREC7_V)
DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS)
@@ -3681,6 +3767,7 @@ DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF)
DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV)
DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF)
DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV)
+DECLARE_INSN(vfwbdot_vv, MATCH_VFWBDOT_VV, MASK_VFWBDOT_VV)
DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V)
DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V)
DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V)
@@ -3689,6 +3776,7 @@ DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V)
DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V)
DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V)
DECLARE_INSN(vfwcvtbf16_f_f_v, MATCH_VFWCVTBF16_F_F_V, MASK_VFWCVTBF16_F_F_V)
+DECLARE_INSN(vfwldot_vv, MATCH_VFWLDOT_VV, MASK_VFWLDOT_VV)
DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF)
DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV)
DECLARE_INSN(vfwmaccbf16_vf, MATCH_VFWMACCBF16_VF, MASK_VFWMACCBF16_VF)
@@ -3850,6 +3938,8 @@ DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX)
DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI)
DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV)
DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX)
+DECLARE_INSN(vqbdots_vv, MATCH_VQBDOTS_VV, MASK_VQBDOTS_VV)
+DECLARE_INSN(vqbdotu_vv, MATCH_VQBDOTU_VV, MASK_VQBDOTU_VV)
DECLARE_INSN(vqdot_vv, MATCH_VQDOT_VV, MASK_VQDOT_VV)
DECLARE_INSN(vqdot_vx, MATCH_VQDOT_VX, MASK_VQDOT_VX)
DECLARE_INSN(vqdotsu_vv, MATCH_VQDOTSU_VV, MASK_VQDOTSU_VV)
@@ -3857,6 +3947,8 @@ DECLARE_INSN(vqdotsu_vx, MATCH_VQDOTSU_VX, MASK_VQDOTSU_VX)
DECLARE_INSN(vqdotu_vv, MATCH_VQDOTU_VV, MASK_VQDOTU_VV)
DECLARE_INSN(vqdotu_vx, MATCH_VQDOTU_VX, MASK_VQDOTU_VX)
DECLARE_INSN(vqdotus_vx, MATCH_VQDOTUS_VX, MASK_VQDOTUS_VX)
+DECLARE_INSN(vqldots_vv, MATCH_VQLDOTS_VV, MASK_VQLDOTS_VV)
+DECLARE_INSN(vqldotu_vv, MATCH_VQLDOTU_VV, MASK_VQLDOTU_VV)
DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS)
DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS)
DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS)
@@ -4051,8 +4143,6 @@ DECLARE_CSR(vl, CSR_VL)
DECLARE_CSR(vtype, CSR_VTYPE)
DECLARE_CSR(vlenb, CSR_VLENB)
DECLARE_CSR(sstatus, CSR_SSTATUS)
-DECLARE_CSR(sedeleg, CSR_SEDELEG)
-DECLARE_CSR(sideleg, CSR_SIDELEG)
DECLARE_CSR(sie, CSR_SIE)
DECLARE_CSR(stvec, CSR_STVEC)
DECLARE_CSR(scounteren, CSR_SCOUNTEREN)
diff --git a/riscv/execute.cc b/riscv/execute.cc
index 39d5ca4..97c90de 100644
--- a/riscv/execute.cc
+++ b/riscv/execute.cc
@@ -201,7 +201,7 @@ static inline reg_t execute_insn_logged(processor_t* p, reg_t pc, insn_fetch_t f
return npc;
}
-bool processor_t::slow_path()
+bool processor_t::slow_path() const
{
return debug || state.single_step != state.STEP_NONE || state.debug_mode ||
log_commits_enabled || histogram_enabled || in_wfi || check_triggers_icount;
@@ -210,6 +210,8 @@ bool processor_t::slow_path()
// fetch/decode/execute loop
void processor_t::step(size_t n)
{
+ mmu_t* _mmu = mmu;
+
if (!state.debug_mode) {
if (halt_request == HR_REGULAR) {
enter_debug_mode(DCSR_CAUSE_DEBUGINT, 0);
@@ -224,11 +226,10 @@ void processor_t::step(size_t n)
while (n > 0) {
size_t instret = 0;
reg_t pc = state.pc;
- mmu_t* _mmu = mmu;
state.prv_changed = false;
state.v_changed = false;
- #define advance_pc() \
+ #define advance_pc() { \
if (unlikely(invalid_pc(pc))) { \
switch (pc) { \
case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
@@ -236,11 +237,11 @@ void processor_t::step(size_t n)
default: abort(); \
} \
pc = state.pc; \
- break; \
+ goto serialize; \
} else { \
state.pc = pc; \
instret++; \
- }
+ }}
try
{
@@ -301,19 +302,21 @@ void processor_t::step(size_t n)
else while (instret < n)
{
// Main simulation loop, fast path.
- for (auto ic_entry = _mmu->access_icache(pc); ; ) {
+ for (auto ic_entry = _mmu->access_icache(pc); instret < n; instret++) {
auto fetch = ic_entry->data;
- pc = execute_insn_fast(this, pc, fetch);
ic_entry = ic_entry->next;
- if (unlikely(ic_entry->tag != pc))
- break;
- if (unlikely(instret + 1 == n))
- break;
- instret++;
- state.pc = pc;
+ auto new_pc = execute_insn_fast(this, pc, fetch);
+ if (unlikely(ic_entry->tag != new_pc)) {
+ ic_entry = &_mmu->icache[_mmu->icache_index(new_pc)];
+ _mmu->icache[_mmu->icache_index(pc)].next = ic_entry;
+ if (ic_entry->tag != new_pc) {
+ pc = new_pc;
+ advance_pc();
+ break;
+ }
+ }
+ state.pc = pc = ic_entry->tag;
}
-
- advance_pc();
}
}
catch(trap_t& t)
@@ -360,6 +363,7 @@ void processor_t::step(size_t n)
in_wfi = true;
}
+serialize:
state.minstret->bump((state.mcountinhibit->read() & MCOUNTINHIBIT_IR) ? 0 : instret);
// Model a hart whose CPI is 1.
diff --git a/riscv/insn_template.cc b/riscv/insn_template.cc
index 168e2dc..12d564b 100644
--- a/riscv/insn_template.cc
+++ b/riscv/insn_template.cc
@@ -6,7 +6,8 @@
#define DECODE_MACRO_USAGE_LOGGED 0
#define PROLOGUE \
- reg_t npc = sext_xlen(pc + insn_length(OPCODE))
+ reg_t npc = sext_xlen(pc + insn_length(OPCODE)); \
+ if (!p->extension_enabled(EXT_ZCA)) assume(insn_length(OPCODE) % 4 == 0)
#define EPILOGUE \
trace_opcode(p, OPCODE, insn); \
diff --git a/riscv/insns/amoadd_d.h b/riscv/insns/amoadd_d.h
index 8573aa5..f9ccd89 100644
--- a/riscv/insns/amoadd_d.h
+++ b/riscv/insns/amoadd_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs + RS2; }));
diff --git a/riscv/insns/amoadd_w.h b/riscv/insns/amoadd_w.h
index c288b3b..8f1265b 100644
--- a/riscv/insns/amoadd_w.h
+++ b/riscv/insns/amoadd_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs + RS2; })));
diff --git a/riscv/insns/amoand_d.h b/riscv/insns/amoand_d.h
index 2df7ce2..e44cd21 100644
--- a/riscv/insns/amoand_d.h
+++ b/riscv/insns/amoand_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs & RS2; }));
diff --git a/riscv/insns/amoand_w.h b/riscv/insns/amoand_w.h
index 962165f..05ff2db 100644
--- a/riscv/insns/amoand_w.h
+++ b/riscv/insns/amoand_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs & RS2; })));
diff --git a/riscv/insns/amomax_d.h b/riscv/insns/amomax_d.h
index ab95da0..7445fe3 100644
--- a/riscv/insns/amomax_d.h
+++ b/riscv/insns/amomax_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](int64_t lhs) { return std::max(lhs, int64_t(RS2)); }));
diff --git a/riscv/insns/amomax_w.h b/riscv/insns/amomax_w.h
index 132c2e0..a2b65fd 100644
--- a/riscv/insns/amomax_w.h
+++ b/riscv/insns/amomax_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](int32_t lhs) { return std::max(lhs, int32_t(RS2)); })));
diff --git a/riscv/insns/amomaxu_d.h b/riscv/insns/amomaxu_d.h
index e2371aa..32c6d95 100644
--- a/riscv/insns/amomaxu_d.h
+++ b/riscv/insns/amomaxu_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return std::max(lhs, RS2); }));
diff --git a/riscv/insns/amomaxu_w.h b/riscv/insns/amomaxu_w.h
index ebbdd41..030ce30 100644
--- a/riscv/insns/amomaxu_w.h
+++ b/riscv/insns/amomaxu_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return std::max(lhs, uint32_t(RS2)); })));
diff --git a/riscv/insns/amomin_d.h b/riscv/insns/amomin_d.h
index 419e42e..97f5173 100644
--- a/riscv/insns/amomin_d.h
+++ b/riscv/insns/amomin_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](int64_t lhs) { return std::min(lhs, int64_t(RS2)); }));
diff --git a/riscv/insns/amomin_w.h b/riscv/insns/amomin_w.h
index 749149c..266f574 100644
--- a/riscv/insns/amomin_w.h
+++ b/riscv/insns/amomin_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](int32_t lhs) { return std::min(lhs, int32_t(RS2)); })));
diff --git a/riscv/insns/amominu_d.h b/riscv/insns/amominu_d.h
index b4bab47..9f67295 100644
--- a/riscv/insns/amominu_d.h
+++ b/riscv/insns/amominu_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return std::min(lhs, RS2); }));
diff --git a/riscv/insns/amominu_w.h b/riscv/insns/amominu_w.h
index 680eef2..34475d7 100644
--- a/riscv/insns/amominu_w.h
+++ b/riscv/insns/amominu_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return std::min(lhs, uint32_t(RS2)); })));
diff --git a/riscv/insns/amoor_d.h b/riscv/insns/amoor_d.h
index c201d88..0b255d3 100644
--- a/riscv/insns/amoor_d.h
+++ b/riscv/insns/amoor_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs | RS2; }));
diff --git a/riscv/insns/amoor_w.h b/riscv/insns/amoor_w.h
index 0adac5b..6dc2e33 100644
--- a/riscv/insns/amoor_w.h
+++ b/riscv/insns/amoor_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs | RS2; })));
diff --git a/riscv/insns/amoswap_d.h b/riscv/insns/amoswap_d.h
index 62a95b0..2fb1398 100644
--- a/riscv/insns/amoswap_d.h
+++ b/riscv/insns/amoswap_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t UNUSED lhs) { return RS2; }));
diff --git a/riscv/insns/amoswap_w.h b/riscv/insns/amoswap_w.h
index 819579c..3ca7513 100644
--- a/riscv/insns/amoswap_w.h
+++ b/riscv/insns/amoswap_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t UNUSED lhs) { return RS2; })));
diff --git a/riscv/insns/amoxor_d.h b/riscv/insns/amoxor_d.h
index a40050f..4f257df 100644
--- a/riscv/insns/amoxor_d.h
+++ b/riscv/insns/amoxor_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs ^ RS2; }));
diff --git a/riscv/insns/amoxor_w.h b/riscv/insns/amoxor_w.h
index af025d6..6eb7a20 100644
--- a/riscv/insns/amoxor_w.h
+++ b/riscv/insns/amoxor_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZAAMO);
WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs ^ RS2; })));
diff --git a/riscv/insns/beqi.h b/riscv/insns/beqi.h
new file mode 100644
index 0000000..14555fe
--- /dev/null
+++ b/riscv/insns/beqi.h
@@ -0,0 +1,5 @@
+require_extension(EXT_ZIBI);
+
+if (RS1 == insn.b_imm5()) {
+ set_pc(BRANCH_TARGET);
+}
diff --git a/riscv/insns/bnei.h b/riscv/insns/bnei.h
new file mode 100644
index 0000000..ac557ed
--- /dev/null
+++ b/riscv/insns/bnei.h
@@ -0,0 +1,5 @@
+require_extension(EXT_ZIBI);
+
+if (RS1 != insn.b_imm5()) {
+ set_pc(BRANCH_TARGET);
+}
diff --git a/riscv/insns/c_add.h b/riscv/insns/c_add.h
index 796e634..0c97e3e 100644
--- a/riscv/insns/c_add.h
+++ b/riscv/insns/c_add.h
@@ -1,3 +1,2 @@
require_extension(EXT_ZCA);
-require(insn.rvc_rs2() != 0);
WRITE_RD(sext_xlen(RVC_RS1 + RVC_RS2));
diff --git a/riscv/insns/c_jalr.h b/riscv/insns/c_jalr.h
index 694f183..df91254 100644
--- a/riscv/insns/c_jalr.h
+++ b/riscv/insns/c_jalr.h
@@ -1,10 +1,6 @@
require_extension(EXT_ZCA);
-require(insn.rvc_rs1() != 0);
reg_t tmp = npc;
set_pc(RVC_RS1 & ~reg_t(1));
WRITE_REG(X_RA, tmp);
-if (ZICFILP_xLPE(STATE.v, STATE.prv)) {
- STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rvc_rs1());
- serialize();
-}
+maybe_set_elp(insn.rvc_rs1());
diff --git a/riscv/insns/c_jr.h b/riscv/insns/c_jr.h
index af43dd3..1a1d14e 100644
--- a/riscv/insns/c_jr.h
+++ b/riscv/insns/c_jr.h
@@ -2,7 +2,4 @@ require_extension(EXT_ZCA);
require(insn.rvc_rs1() != 0);
set_pc(RVC_RS1 & ~reg_t(1));
-if (ZICFILP_xLPE(STATE.v, STATE.prv)) {
- STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rvc_rs1());
- serialize();
-}
+maybe_set_elp(insn.rvc_rs1());
diff --git a/riscv/insns/c_mv.h b/riscv/insns/c_mv.h
index b227005..8edefc5 100644
--- a/riscv/insns/c_mv.h
+++ b/riscv/insns/c_mv.h
@@ -1,3 +1,2 @@
require_extension(EXT_ZCA);
-require(insn.rvc_rs2() != 0);
WRITE_RD(RVC_RS2);
diff --git a/riscv/insns/fli_h.h b/riscv/insns/fli_h.h
index ddf41a9..71fd64d 100644
--- a/riscv/insns/fli_h.h
+++ b/riscv/insns/fli_h.h
@@ -1,4 +1,4 @@
-require_extension(EXT_ZFH);
+require_either_extension(EXT_ZFH, EXT_ZVFH);
require_extension(EXT_ZFA);
require_fp;
{
diff --git a/riscv/insns/flq.h b/riscv/insns/flq.h
index 81d225c..6a60c0c 100644
--- a/riscv/insns/flq.h
+++ b/riscv/insns/flq.h
@@ -1,3 +1,5 @@
require_extension('Q');
require_fp;
-WRITE_FRD(MMU.load_float128(RS1 + insn.i_imm()));
+uint128_t v = MMU.load<uint128_t>(RS1 + insn.i_imm());
+float128_t f = { uint64_t(v), uint64_t(v >> 64) };
+WRITE_FRD(f);
diff --git a/riscv/insns/fsq.h b/riscv/insns/fsq.h
index 610960e..7a4bdd8 100644
--- a/riscv/insns/fsq.h
+++ b/riscv/insns/fsq.h
@@ -1,3 +1,4 @@
require_extension('Q');
require_fp;
-MMU.store_float128(RS1 + insn.s_imm(), FRS2);
+uint128_t v = FRS2.v[0] | (uint128_t(FRS2.v[1]) << 64);
+MMU.store<uint128_t>(RS1 + insn.s_imm(), v);
diff --git a/riscv/insns/jalr.h b/riscv/insns/jalr.h
index de84e89..abff855 100644
--- a/riscv/insns/jalr.h
+++ b/riscv/insns/jalr.h
@@ -3,7 +3,4 @@ reg_t tmp = npc;
set_pc((RS1 + insn.i_imm()) & ~reg_t(1));
WRITE_RD(tmp);
-if (ZICFILP_xLPE(STATE.v, STATE.prv)) {
- STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rs1());
- serialize();
-}
+maybe_set_elp(insn.rs1());
diff --git a/riscv/insns/lr_d.h b/riscv/insns/lr_d.h
index 214daff..32a16e5 100644
--- a/riscv/insns/lr_d.h
+++ b/riscv/insns/lr_d.h
@@ -1,3 +1,3 @@
-require_extension('A');
+require_extension(EXT_ZALRSC);
require_rv64;
WRITE_RD(MMU.load_reserved<int64_t>(RS1));
diff --git a/riscv/insns/lr_w.h b/riscv/insns/lr_w.h
index 354590f..fb0005c 100644
--- a/riscv/insns/lr_w.h
+++ b/riscv/insns/lr_w.h
@@ -1,2 +1,2 @@
-require_extension('A');
+require_extension(EXT_ZALRSC);
WRITE_RD(MMU.load_reserved<int32_t>(RS1));
diff --git a/riscv/insns/sc_d.h b/riscv/insns/sc_d.h
index ac82c3e..1b6880b 100644
--- a/riscv/insns/sc_d.h
+++ b/riscv/insns/sc_d.h
@@ -1,4 +1,4 @@
-require_extension('A');
+require_extension(EXT_ZALRSC);
require_rv64;
bool have_reservation = MMU.store_conditional<uint64_t>(RS1, RS2);
diff --git a/riscv/insns/sc_w.h b/riscv/insns/sc_w.h
index 48fea4b..6df6a67 100644
--- a/riscv/insns/sc_w.h
+++ b/riscv/insns/sc_w.h
@@ -1,4 +1,4 @@
-require_extension('A');
+require_extension(EXT_ZALRSC);
bool have_reservation = MMU.store_conditional<uint32_t>(RS1, RS2);
diff --git a/riscv/insns/sfence_inval_ir.h b/riscv/insns/sfence_inval_ir.h
index 6f76a3f..42fb177 100644
--- a/riscv/insns/sfence_inval_ir.h
+++ b/riscv/insns/sfence_inval_ir.h
@@ -1,4 +1,4 @@
require_extension('S');
require_extension(EXT_SVINVAL);
-require_impl(IMPL_MMU);
+require(p->has_mmu());
require_privilege_hs_qualified(PRV_S);
diff --git a/riscv/insns/sfence_vma.h b/riscv/insns/sfence_vma.h
index 7d6c01a..156331d 100644
--- a/riscv/insns/sfence_vma.h
+++ b/riscv/insns/sfence_vma.h
@@ -1,5 +1,5 @@
require_extension('S');
-require_impl(IMPL_MMU);
+require(p->has_mmu());
if (STATE.v) {
if (STATE.prv == PRV_U || get_field(STATE.hstatus->read(), HSTATUS_VTVM))
require_novirt();
diff --git a/riscv/insns/sret.h b/riscv/insns/sret.h
index efb4fa6..3bbdb82 100644
--- a/riscv/insns/sret.h
+++ b/riscv/insns/sret.h
@@ -30,7 +30,7 @@ if (ZICFILP_xLPE(prev_virt, prev_prv)) {
if (STATE.prv == PRV_M) {
STATE.mstatus->write(STATE.mstatus->read() & ~MSTATUS_MDT);
if (prev_prv == PRV_U || prev_virt)
- STATE.mstatus->write(STATE.mstatus->read() & ~MSTATUS_SDT);
+ s = set_field(s, SSTATUS_SDT, 0);
if (prev_virt && prev_prv == PRV_U)
STATE.vsstatus->write(STATE.vsstatus->read() & ~SSTATUS_SDT);
}
diff --git a/riscv/insns/ssamoswap_d.h b/riscv/insns/ssamoswap_d.h
index 10ea5ef..4169ac3 100644
--- a/riscv/insns/ssamoswap_d.h
+++ b/riscv/insns/ssamoswap_d.h
@@ -1,5 +1,5 @@
require_extension(EXT_ZICFISS);
-require_extension('A');
+require_extension(EXT_ZAAMO);
require_rv64;
DECLARE_XENVCFG_VARS(SSE);
diff --git a/riscv/insns/ssamoswap_w.h b/riscv/insns/ssamoswap_w.h
index 3cdefc7..d971ebe 100644
--- a/riscv/insns/ssamoswap_w.h
+++ b/riscv/insns/ssamoswap_w.h
@@ -1,7 +1,6 @@
require_extension(EXT_ZICFISS);
-require_extension('A');
+require_extension(EXT_ZAAMO);
DECLARE_XENVCFG_VARS(SSE);
require_envcfg(SSE);
WRITE_RD(sext32(MMU.ssamoswap<uint32_t>(RS1, RS2)));
-
diff --git a/riscv/insns/vandn_vv.h b/riscv/insns/vandn_vv.h
index d85e47d..411c97d 100644
--- a/riscv/insns/vandn_vv.h
+++ b/riscv/insns/vandn_vv.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
VI_VV_LOOP
({
diff --git a/riscv/insns/vandn_vx.h b/riscv/insns/vandn_vx.h
index 1c66a40..417b8d2 100644
--- a/riscv/insns/vandn_vx.h
+++ b/riscv/insns/vandn_vx.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
VI_VX_LOOP
({
diff --git a/riscv/insns/vbrev8_v.h b/riscv/insns/vbrev8_v.h
index a6d3cda..19fa723 100644
--- a/riscv/insns/vbrev8_v.h
+++ b/riscv/insns/vbrev8_v.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
VI_V_ULOOP
({
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
index 2b808e0..be3bd9a 100644
--- a/riscv/insns/vfadd_vf.h
+++ b/riscv/insns/vfadd_vf.h
@@ -1,7 +1,9 @@
// vfadd.vf vd, vs2, rs1
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_add(rs1, vs2);
+ vd = VFP_OP_16(add, vs2, rs1);
},
{
vd = f32_add(rs1, vs2);
diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h
index ce94921..67ddd5c 100644
--- a/riscv/insns/vfadd_vv.h
+++ b/riscv/insns/vfadd_vv.h
@@ -1,7 +1,9 @@
// vfadd.vv vd, vs2, vs1
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_add(vs1, vs2);
+ vd = VFP_OP_16(add, vs2, vs1);
},
{
vd = f32_add(vs1, vs2);
diff --git a/riscv/insns/vfbdot_vv.h b/riscv/insns/vfbdot_vv.h
new file mode 100644
index 0000000..8d4c792
--- /dev/null
+++ b/riscv/insns/vfbdot_vv.h
@@ -0,0 +1,16 @@
+VI_VFP_BASE;
+ZVBDOT_INIT(1);
+
+switch (P.VU.vsew) {
+ case 32: {
+ // This implementation rounds intermediate products to FP32 then sums them
+ // sequentially; other implementations are also valid. If a more
+ // realistic scheme (e.g. binary reduction tree, plus final accumulation)
+ // becomes popular, we might change this implementation accordingly.
+ require_extension(EXT_ZVFBDOT32F);
+ auto macc = [](auto a, auto b, auto c) { return f32_add(c, f32_mul(a, b)); };
+ ZVBDOT_GENERIC_LOOP(float32_t, float32_t, float32_t, macc);
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h
index a307d2d..4378bd4 100644
--- a/riscv/insns/vfclass_v.h
+++ b/riscv/insns/vfclass_v.h
@@ -1,7 +1,7 @@
// vfclass.v vd, vs2, vm
VI_VFP_V_LOOP
({
- vd = f16(f16_classify(vs2));
+ vd = P.VU.altfmt ? bf16(bf16_classify(vs2)) : f16(f16_classify(vs2));
},
{
vd = f32(f32_classify(vs2));
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
index a703ef0..2707daa 100644
--- a/riscv/insns/vfdiv_vf.h
+++ b/riscv/insns/vfdiv_vf.h
@@ -1,4 +1,6 @@
// vfdiv.vf vd, vs2, rs1
+VI_NON_ALTFMT_INSN
+
VI_VFP_VF_LOOP
({
vd = f16_div(vs2, rs1);
diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h
index c66d751..5f5ed74 100644
--- a/riscv/insns/vfdiv_vv.h
+++ b/riscv/insns/vfdiv_vv.h
@@ -1,4 +1,6 @@
// vfdiv.vv vd, vs2, vs1
+VI_NON_ALTFMT_INSN
+
VI_VFP_VV_LOOP
({
vd = f16_div(vs2, vs1);
diff --git a/riscv/insns/vfext_vf2.h b/riscv/insns/vfext_vf2.h
new file mode 100644
index 0000000..523bb28
--- /dev/null
+++ b/riscv/insns/vfext_vf2.h
@@ -0,0 +1,14 @@
+static const uint8_t ofp4_to_e4m3[16] = {
+ 0x00, 0x30, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, // positive values (sign bit 0)
+ 0x80, 0xb0, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc // negative values (sign bit 1)
+};
+
+require_extension(EXT_ZVFOFP4MIN);
+VI_NON_ALTFMT_INSN
+VI_VF_EXT(2,
+ {
+ uint_fast8_t packed_ofp4_reg = P.VU.elt<uint8_t>(rs2_num, i / 2);
+ uint_fast8_t data = ((packed_ofp4_reg >> ((i & 1UL)*4)) & 0xF);
+ P.VU.elt<uint8_t>(rd_num, i, true) = ofp4_to_e4m3[data];
+ }
+)
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
index 61578d3..293a06b 100644
--- a/riscv/insns/vfmacc_vf.h
+++ b/riscv/insns/vfmacc_vf.h
@@ -1,7 +1,10 @@
// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(rs1, vs2, vd);
+ vd = VFP_MULADD_16(rs1, vs2, vd);
},
{
vd = f32_mulAdd(rs1, vs2, vd);
diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h
index 499b1d4..3fadb66 100644
--- a/riscv/insns/vfmacc_vv.h
+++ b/riscv/insns/vfmacc_vv.h
@@ -1,7 +1,10 @@
// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(vs1, vs2, vd);
+ vd = VFP_MULADD_16(vs1, vs2, vd);
},
{
vd = f32_mulAdd(vs1, vs2, vd);
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
index 2a01429..258e620 100644
--- a/riscv/insns/vfmadd_vf.h
+++ b/riscv/insns/vfmadd_vf.h
@@ -1,7 +1,9 @@
// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(vd, rs1, vs2);
+ vd = VFP_MULADD_16(vd, rs1, vs2);
},
{
vd = f32_mulAdd(vd, rs1, vs2);
diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h
index 7ef734f..42ece0a 100644
--- a/riscv/insns/vfmadd_vv.h
+++ b/riscv/insns/vfmadd_vv.h
@@ -1,7 +1,9 @@
// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i]
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(vd, vs1, vs2);
+ vd = VFP_MULADD_16(vd, vs1, vs2);
},
{
vd = f32_mulAdd(vd, vs1, vs2);
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
index c4b74cb..3bd8013 100644
--- a/riscv/insns/vfmax_vf.h
+++ b/riscv/insns/vfmax_vf.h
@@ -1,7 +1,10 @@
// vfmax
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_max(vs2, rs1);
+ vd = VFP_OP_16(max, vs2, rs1);
},
{
vd = f32_max(vs2, rs1);
diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h
index 6439c89..4801b86 100644
--- a/riscv/insns/vfmax_vv.h
+++ b/riscv/insns/vfmax_vv.h
@@ -1,7 +1,10 @@
// vfmax
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_max(vs2, vs1);
+ vd = VFP_OP_16(max, vs2, vs1);
},
{
vd = f32_max(vs2, vs1);
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
index 1560cdf7..efae5a1 100644
--- a/riscv/insns/vfmin_vf.h
+++ b/riscv/insns/vfmin_vf.h
@@ -1,7 +1,10 @@
// vfmin vd, vs2, rs1
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_min(vs2, rs1);
+ vd = VFP_OP_16(min, vs2, rs1);
},
{
vd = f32_min(vs2, rs1);
diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h
index 882a774..9e2bee2 100644
--- a/riscv/insns/vfmin_vv.h
+++ b/riscv/insns/vfmin_vv.h
@@ -1,7 +1,10 @@
// vfmin vd, vs2, vs1
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_min(vs2, vs1);
+ vd = VFP_OP_16(min, vs2, vs1);
},
{
vd = f32_min(vs2, vs1);
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
index 8af397b..2251e93 100644
--- a/riscv/insns/vfmsac_vf.h
+++ b/riscv/insns/vfmsac_vf.h
@@ -1,7 +1,11 @@
// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(rs1, vs2, bf16(vd.v ^ BF16_SIGN))
+ : f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h
index 3bb50e5..3b9cf12 100644
--- a/riscv/insns/vfmsac_vv.h
+++ b/riscv/insns/vfmsac_vv.h
@@ -1,7 +1,11 @@
// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(vs1, vs2, bf16(vd.v ^ BF16_SIGN))
+ : f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
index ab77b4c..5ce6a44 100644
--- a/riscv/insns/vfmsub_vf.h
+++ b/riscv/insns/vfmsub_vf.h
@@ -1,7 +1,11 @@
// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(vd, rs1, bf16(vs2.v ^ BF16_SIGN))
+ : f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h
index 3cac937..bedc934 100644
--- a/riscv/insns/vfmsub_vv.h
+++ b/riscv/insns/vfmsub_vv.h
@@ -1,7 +1,11 @@
// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(vd, vs1, bf16(vs2.v ^ BF16_SIGN))
+ : f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
index f5f63e4..93ca216 100644
--- a/riscv/insns/vfmul_vf.h
+++ b/riscv/insns/vfmul_vf.h
@@ -1,7 +1,9 @@
// vfmul.vf vd, vs2, rs1, vm
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mul(vs2, rs1);
+ vd = VFP_OP_16(mul, vs2, rs1);
},
{
vd = f32_mul(vs2, rs1);
diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h
index 7930fd0..75daad6 100644
--- a/riscv/insns/vfmul_vv.h
+++ b/riscv/insns/vfmul_vv.h
@@ -1,7 +1,9 @@
// vfmul.vv vd, vs1, vs2, vm
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mul(vs1, vs2);
+ vd = VFP_OP_16(mul, vs1, vs2);
},
{
vd = f32_mul(vs1, vs2);
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 1ad6bc6..65a3cff 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -1,12 +1,6 @@
// vfmv_f_s: rd = vs2[0] (rs1=0)
-require_vector(true);
-require_fp;
-require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) ||
- (P.VU.vsew == e32 && p->extension_enabled('F')) ||
- (P.VU.vsew == e64 && p->extension_enabled('D')));
-require(STATE.frm->read() < 0x5);
+VI_VFP_COMMON;
-reg_t rs2_num = insn.rs2();
uint64_t vs2_0 = 0;
const reg_t sew = P.VU.vsew;
switch (sew) {
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 4b1b955..917948d 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -1,19 +1,14 @@
// vfmv_s_f: vd[0] = rs1 (vs2=0)
-require_vector(true);
-require_fp;
-require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) ||
- (P.VU.vsew == e32 && p->extension_enabled('F')) ||
- (P.VU.vsew == e64 && p->extension_enabled('D')));
-require(STATE.frm->read() < 0x5);
+require_zvfbfa
-reg_t vl = P.VU.vl->read();
+VI_VFP_COMMON;
if (vl > 0 && P.VU.vstart->read() < vl) {
reg_t rd_num = insn.rd();
switch (P.VU.vsew) {
case e16:
- P.VU.elt<uint16_t>(rd_num, 0, true) = f16(FRS1).v;
+ P.VU.elt<uint16_t>(rd_num, 0, true) = P.VU.altfmt ? bf16(FRS1).v : f16(FRS1).v;
break;
case e32:
P.VU.elt<uint32_t>(rd_num, 0, true) = f32(FRS1).v;
diff --git a/riscv/insns/vfncvt_f_f_q.h b/riscv/insns/vfncvt_f_f_q.h
new file mode 100644
index 0000000..abbe5e8
--- /dev/null
+++ b/riscv/insns/vfncvt_f_f_q.h
@@ -0,0 +1,7 @@
+// vfncvt.f.f.q vd, vs2, vm
+VI_VFP_NCVT_FP_TO_OFP8(
+ {
+ vd = P.VU.altfmt ? f32_to_e5m2(vs2, false) : f32_to_e4m3(vs2, false);
+ }, // BODY
+ { require_extension(EXT_ZVFOFP8MIN); } // CHECK
+)
diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h
index 97de40e..ede707d 100644
--- a/riscv/insns/vfncvt_f_f_w.h
+++ b/riscv/insns/vfncvt_f_f_w.h
@@ -1,7 +1,7 @@
// vfncvt.f.f.w vd, vs2, vm
VI_VFP_NCVT_FP_TO_FP(
- { vd = f32_to_f16(vs2); }, // BODY32
- { vd = f64_to_f32(vs2); }, // BODY64
- { require_extension(EXT_ZVFHMIN); }, // CHECK32
- { require_extension('D'); } // CHECK64
+ { vd = P.VU.altfmt ? f32_to_bf16(vs2) : f32_to_f16(vs2); }, // BODY32
+ { vd = f64_to_f32(vs2); }, // BODY64
+ { require_zvfbfa_or_zvfhmin }, // CHECK32
+ { require(p->get_isa().get_zvd()); } // CHECK64
)
diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h
index 46f2d92..676cd3a 100644
--- a/riscv/insns/vfncvt_f_x_w.h
+++ b/riscv/insns/vfncvt_f_x_w.h
@@ -1,8 +1,10 @@
// vfncvt.f.x.w vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_NCVT_INT_TO_FP(
- { vd = i32_to_f16(vs2); }, // BODY32
- { vd = i64_to_f32(vs2); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK32
- { require_extension('F'); }, // CHECK64
- int // sign
+ { vd = i32_to_f16(vs2); }, // BODY32
+ { vd = i64_to_f32(vs2); }, // BODY64
+ { require_extension(EXT_ZVFH); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK64
+ int // sign
)
diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h
index 729fb52..f90d3d7 100644
--- a/riscv/insns/vfncvt_f_xu_w.h
+++ b/riscv/insns/vfncvt_f_xu_w.h
@@ -1,8 +1,10 @@
// vfncvt.f.xu.w vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_NCVT_INT_TO_FP(
- { vd = ui32_to_f16(vs2); }, // BODY32
- { vd = ui64_to_f32(vs2); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK32
- { require_extension('F'); }, // CHECK64
- uint // sign
+ { vd = ui32_to_f16(vs2); }, // BODY32
+ { vd = ui64_to_f32(vs2); }, // BODY64
+ { require_extension(EXT_ZVFH); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK64
+ uint // sign
)
diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h
index 93002dc..020a4df 100644
--- a/riscv/insns/vfncvt_rod_f_f_w.h
+++ b/riscv/insns/vfncvt_rod_f_f_w.h
@@ -1,13 +1,13 @@
// vfncvt.rod.f.f.w vd, vs2, vm
VI_VFP_NCVT_FP_TO_FP(
- { // BODY32
+ { // BODY32
softfloat_roundingMode = softfloat_round_odd;
- vd = f32_to_f16(vs2);
+ vd = P.VU.altfmt ? f32_to_bf16(vs2) : f32_to_f16(vs2);
},
- { // BODY64
+ { // BODY64
softfloat_roundingMode = softfloat_round_odd;
vd = f64_to_f32(vs2);
},
- { require_extension(EXT_ZVFH); }, // CHECK32
- { require_extension('F'); } // CHECK64
+ { require_zvfbfa_or_zvfh; }, // CHECK32
+ { require(p->get_isa().get_zvd()); } // CHECK64
)
diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h
index ee47e22..ad9ce21 100644
--- a/riscv/insns/vfncvt_rtz_x_f_w.h
+++ b/riscv/insns/vfncvt_rtz_x_f_w.h
@@ -1,10 +1,12 @@
// vfncvt.rtz.x.f.w vd, vs2, vm
+
VI_VFP_NCVT_FP_TO_INT(
- { vd = f16_to_i8(vs2, softfloat_round_minMag, true); }, // BODY16
+ { vd = P.VU.altfmt ? bf16_to_i8(vs2, softfloat_round_minMag, true)
+ : f16_to_i8(vs2, softfloat_round_minMag, true); }, // BODY16
{ vd = f32_to_i16(vs2, softfloat_round_minMag, true); }, // BODY32
{ vd = f64_to_i32(vs2, softfloat_round_minMag, true); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK16
- { require(p->extension_enabled('F')); }, // CHECK32
- { require(p->extension_enabled('D')); }, // CHECK64
+ { require_zvfbfa_or_zvfh; }, // CHECK16
+ { require(p->get_isa().get_zvf()); }, // CHECK32
+ { require(p->get_isa().get_zvd()); }, // CHECK64
int // sign
)
diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h
index 3d029f3..d258aea 100644
--- a/riscv/insns/vfncvt_rtz_xu_f_w.h
+++ b/riscv/insns/vfncvt_rtz_xu_f_w.h
@@ -1,10 +1,12 @@
// vfncvt.rtz.xu.f.w vd, vs2, vm
+
VI_VFP_NCVT_FP_TO_INT(
- { vd = f16_to_ui8(vs2, softfloat_round_minMag, true); }, // BODY16
+ { vd = P.VU.altfmt ? bf16_to_ui8(vs2, softfloat_round_minMag, true)
+ : f16_to_ui8(vs2, softfloat_round_minMag, true); }, // BODY16
{ vd = f32_to_ui16(vs2, softfloat_round_minMag, true); }, // BODY32
{ vd = f64_to_ui32(vs2, softfloat_round_minMag, true); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK16
- { require(p->extension_enabled('F')); }, // CHECK32
- { require(p->extension_enabled('D')); }, // CHECK64
+ { require_zvfbfa_or_zvfh; }, // CHECK16
+ { require(p->get_isa().get_zvf()); }, // CHECK32
+ { require(p->get_isa().get_zvd()); }, // CHECK64
uint // sign
)
diff --git a/riscv/insns/vfncvt_sat_f_f_q.h b/riscv/insns/vfncvt_sat_f_f_q.h
new file mode 100644
index 0000000..3d545a9
--- /dev/null
+++ b/riscv/insns/vfncvt_sat_f_f_q.h
@@ -0,0 +1,7 @@
+// vfncvt.sat.f.f.q vd, vs2, vm
+VI_VFP_NCVT_FP_TO_OFP8(
+ {
+ vd = P.VU.altfmt ? f32_to_e5m2(vs2, true) : f32_to_e4m3(vs2, true);
+ }, // BODY
+ { require_extension(EXT_ZVFOFP8MIN); } // CHECK
+)
diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h
index 0da5a75..929ae62 100644
--- a/riscv/insns/vfncvt_x_f_w.h
+++ b/riscv/insns/vfncvt_x_f_w.h
@@ -1,10 +1,12 @@
// vfncvt.x.f.w vd, vs2, vm
+
VI_VFP_NCVT_FP_TO_INT(
- { vd = f16_to_i8(vs2, softfloat_roundingMode, true); }, // BODY16
+ { vd = P.VU.altfmt ? bf16_to_i8(vs2, softfloat_roundingMode, true)
+ : f16_to_i8(vs2, softfloat_roundingMode, true); }, // BODY16
{ vd = f32_to_i16(vs2, softfloat_roundingMode, true); }, // BODY32
{ vd = f64_to_i32(vs2, softfloat_roundingMode, true); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK16
- { require(p->extension_enabled('F')); }, // CHECK32
- { require(p->extension_enabled('D')); }, // CHECK64
+ { require_zvfbfa_or_zvfh; }, // CHECK16
+ { require(p->get_isa().get_zvf()); }, // CHECK32
+ { require(p->get_isa().get_zvd()); }, // CHECK64
int // sign
)
diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h
index da5a52d..c56bb53 100644
--- a/riscv/insns/vfncvt_xu_f_w.h
+++ b/riscv/insns/vfncvt_xu_f_w.h
@@ -1,10 +1,12 @@
// vfncvt.xu.f.w vd, vs2, vm
+
VI_VFP_NCVT_FP_TO_INT(
- { vd = f16_to_ui8(vs2, softfloat_roundingMode, true); }, // BODY16
+ { vd = P.VU.altfmt ? bf16_to_ui8(vs2, softfloat_roundingMode, true)
+ : f16_to_ui8(vs2, softfloat_roundingMode, true); }, // BODY16
{ vd = f32_to_ui16(vs2, softfloat_roundingMode, true); }, // BODY32
{ vd = f64_to_ui32(vs2, softfloat_roundingMode, true); }, // BODY64
- { require_extension(EXT_ZVFH); }, // CHECK16
- { require(p->extension_enabled('F')); }, // CHECK32
- { require(p->extension_enabled('D')); }, // CHECK64
+ { require_zvfbfa_or_zvfh; }, // CHECK16
+ { require(p->get_isa().get_zvf()); }, // CHECK32
+ { require(p->get_isa().get_zvd()); }, // CHECK64
uint // sign
)
diff --git a/riscv/insns/vfncvtbf16_f_f_w.h b/riscv/insns/vfncvtbf16_f_f_w.h
index 4708802..6248328 100644
--- a/riscv/insns/vfncvtbf16_f_f_w.h
+++ b/riscv/insns/vfncvtbf16_f_f_w.h
@@ -1,5 +1,9 @@
// vfncvtbf16.f.f.w vd, vs2, vm
-VI_VFP_NCVT_BF16_TO_FP(
- { vd = f32_to_bf16(vs2); }, // BODY16
- { require_extension(EXT_ZVFBFMIN); } // CHECK16
+VI_VFP_NCVT_FP_BF16_OFP8(
+ {
+ vd = P.VU.altfmt ? bf16_to_e5m2(vs2, false) : bf16_to_e4m3(vs2, false);
+ }, // BODY16
+ { vd = f32_to_bf16(vs2); }, // BODY32
+ { require(p->extension_enabled(EXT_ZVFOFP8MIN)); }, // CHECK16
+ { require_extension(EXT_ZVFBFMIN); } // CHECK32
)
diff --git a/riscv/insns/vfncvtbf16_sat_f_f_w.h b/riscv/insns/vfncvtbf16_sat_f_f_w.h
new file mode 100644
index 0000000..238e415
--- /dev/null
+++ b/riscv/insns/vfncvtbf16_sat_f_f_w.h
@@ -0,0 +1,8 @@
+// vfncvtbf16.f.f.w vd, vs2, vm
+VI_VFP_NCVT_SAT_BF16_TO_OFP8(
+ {
+ vd = P.VU.altfmt ? bf16_to_e5m2(vs2, true) : bf16_to_e4m3(vs2, true);
+ }, // BODY16
+ { require_extension(EXT_ZVFOFP8MIN); } // CHECK16
+ )
+
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
index 1b99302..e9688ea 100644
--- a/riscv/insns/vfnmacc_vf.h
+++ b/riscv/insns/vfnmacc_vf.h
@@ -1,7 +1,12 @@
// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(rs1, bf16(vs2.v ^ BF16_SIGN), bf16(vd.v ^ BF16_SIGN))
+ : f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h
index 7200e06..c8f22be 100644
--- a/riscv/insns/vfnmacc_vv.h
+++ b/riscv/insns/vfnmacc_vv.h
@@ -1,7 +1,11 @@
// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vs2.v ^ BF16_SIGN), vs1, bf16(vd.v ^ BF16_SIGN))
+ : f16_mulAdd( f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
index cb9c217..32754c3 100644
--- a/riscv/insns/vfnmadd_vf.h
+++ b/riscv/insns/vfnmadd_vf.h
@@ -1,7 +1,11 @@
// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), rs1, bf16(vs2.v ^ BF16_SIGN))
+ : f16_mulAdd( f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h
index 7160ed7..d71f9ed 100644
--- a/riscv/insns/vfnmadd_vv.h
+++ b/riscv/insns/vfnmadd_vv.h
@@ -1,7 +1,11 @@
// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN));
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), vs1, bf16(vs2.v ^ BF16_SIGN))
+ : f16_mulAdd( f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN));
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
index aa6baa3..a69603f 100644
--- a/riscv/insns/vfnmsac_vf.h
+++ b/riscv/insns/vfnmsac_vf.h
@@ -1,7 +1,11 @@
// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd);
+ vd = P.VU.altfmt ? bf16_mulAdd(rs1, bf16(vs2.v ^ BF16_SIGN), vd)
+ : f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd);
},
{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h
index 47db61d..f150641 100644
--- a/riscv/insns/vfnmsac_vv.h
+++ b/riscv/insns/vfnmsac_vv.h
@@ -1,7 +1,11 @@
// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd);
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vs1.v ^ BF16_SIGN), vs2, vd)
+ : f16_mulAdd( f16(vs1.v ^ F16_SIGN), vs2, vd);
},
{
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
index 43aa9e2..55be4d1 100644
--- a/riscv/insns/vfnmsub_vf.h
+++ b/riscv/insns/vfnmsub_vf.h
@@ -1,7 +1,11 @@
// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
+
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2);
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), rs1, vs2)
+ : f16_mulAdd( f16(vd.v ^ F16_SIGN), rs1, vs2);
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h
index 2a45c8f..a9ff58c 100644
--- a/riscv/insns/vfnmsub_vv.h
+++ b/riscv/insns/vfnmsub_vv.h
@@ -1,7 +1,11 @@
// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
+
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2);
+ vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), vs1, vs2)
+ : f16_mulAdd( f16(vd.v ^ F16_SIGN), vs1, vs2);
},
{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
diff --git a/riscv/insns/vfqbdot_alt_vv.h b/riscv/insns/vfqbdot_alt_vv.h
new file mode 100644
index 0000000..f1df781
--- /dev/null
+++ b/riscv/insns/vfqbdot_alt_vv.h
@@ -0,0 +1,17 @@
+VI_VFP_BASE;
+ZVBDOT_INIT(4);
+
+#define COMMA ,
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVFQBDOT8F);
+ if (P.VU.altfmt) {
+ ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e5m2>);
+ } else {
+ ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e5m2>);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfqbdot_vv.h b/riscv/insns/vfqbdot_vv.h
new file mode 100644
index 0000000..fe3e652
--- /dev/null
+++ b/riscv/insns/vfqbdot_vv.h
@@ -0,0 +1,17 @@
+VI_VFP_BASE;
+ZVBDOT_INIT(4);
+
+#define COMMA ,
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVFQBDOT8F);
+ if (P.VU.altfmt) {
+ ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e4m3>);
+ } else {
+ ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e4m3>);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfqldot_alt_vv.h b/riscv/insns/vfqldot_alt_vv.h
new file mode 100644
index 0000000..ea18828
--- /dev/null
+++ b/riscv/insns/vfqldot_alt_vv.h
@@ -0,0 +1,17 @@
+VI_VFP_BASE;
+ZVLDOT_INIT(4);
+
+#define COMMA ,
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVFQLDOT8F);
+ if (P.VU.altfmt) {
+ ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e5m2>);
+ } else {
+ ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e5m2>);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfqldot_vv.h b/riscv/insns/vfqldot_vv.h
new file mode 100644
index 0000000..b03ec29
--- /dev/null
+++ b/riscv/insns/vfqldot_vv.h
@@ -0,0 +1,17 @@
+VI_VFP_BASE;
+ZVLDOT_INIT(4);
+
+#define COMMA ,
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVFQLDOT8F);
+ if (P.VU.altfmt) {
+ ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e4m3>);
+ } else {
+ ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e4m3>);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
index b283343..399a496 100644
--- a/riscv/insns/vfrdiv_vf.h
+++ b/riscv/insns/vfrdiv_vf.h
@@ -1,4 +1,6 @@
// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
+VI_NON_ALTFMT_INSN
+
VI_VFP_VF_LOOP
({
vd = f16_div(rs1, vs2);
diff --git a/riscv/insns/vfrec7_v.h b/riscv/insns/vfrec7_v.h
index 69c026b..40bc33c 100644
--- a/riscv/insns/vfrec7_v.h
+++ b/riscv/insns/vfrec7_v.h
@@ -1,7 +1,7 @@
// vfclass.v vd, vs2, vm
VI_VFP_V_LOOP
({
- vd = f16_recip7(vs2);
+ vd = P.VU.altfmt ? bf16_recip7(vs2) : f16_recip7(vs2);
},
{
vd = f32_recip7(vs2);
diff --git a/riscv/insns/vfrsqrt7_v.h b/riscv/insns/vfrsqrt7_v.h
index 2505639..8b3290e 100644
--- a/riscv/insns/vfrsqrt7_v.h
+++ b/riscv/insns/vfrsqrt7_v.h
@@ -1,7 +1,7 @@
// vfrsqrt7.v vd, vs2, vm
VI_VFP_V_LOOP
({
- vd = f16_rsqrte7(vs2);
+ vd = P.VU.altfmt ? bf16_rsqrte7(vs2) : f16_rsqrte7(vs2);
},
{
vd = f32_rsqrte7(vs2);
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
index 7fb26a5..2ae5f7c 100644
--- a/riscv/insns/vfrsub_vf.h
+++ b/riscv/insns/vfrsub_vf.h
@@ -1,7 +1,7 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
- vd = f16_sub(rs1, vs2);
+ vd = VFP_OP_16(sub, rs1, vs2);
},
{
vd = f32_sub(rs1, vs2);
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
index ce06185..c84361a 100644
--- a/riscv/insns/vfsgnj_vf.h
+++ b/riscv/insns/vfsgnj_vf.h
@@ -1,7 +1,8 @@
// vfsgnj vd, vs2, vs1
VI_VFP_VF_LOOP
({
- vd = fsgnj16(vs2.v, rs1.v, false, false);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, false, false)
+ : fsgnj16(vs2.v, rs1.v, false, false);
},
{
vd = fsgnj32(vs2.v, rs1.v, false, false);
diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h
index 722cb29..a31c8ba 100644
--- a/riscv/insns/vfsgnj_vv.h
+++ b/riscv/insns/vfsgnj_vv.h
@@ -1,7 +1,8 @@
// vfsgnj
VI_VFP_VV_LOOP
({
- vd = fsgnj16(vs2.v, vs1.v, false, false);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, false, false)
+ : fsgnj16(vs2.v, vs1.v, false, false);
},
{
vd = fsgnj32(vs2.v, vs1.v, false, false);
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
index e489412..9313cf3 100644
--- a/riscv/insns/vfsgnjn_vf.h
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -1,7 +1,8 @@
// vfsgnn
VI_VFP_VF_LOOP
({
- vd = fsgnj16(vs2.v, rs1.v, true, false);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, true, false)
+ : fsgnj16(vs2.v, rs1.v, true, false);
},
{
vd = fsgnj32(vs2.v, rs1.v, true, false);
diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h
index 1d91f69..3b6b270 100644
--- a/riscv/insns/vfsgnjn_vv.h
+++ b/riscv/insns/vfsgnjn_vv.h
@@ -1,7 +1,8 @@
// vfsgnn
VI_VFP_VV_LOOP
({
- vd = fsgnj16(vs2.v, vs1.v, true, false);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, true, false)
+ : fsgnj16(vs2.v, vs1.v, true, false);
},
{
vd = fsgnj32(vs2.v, vs1.v, true, false);
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
index 7be164c..081f797 100644
--- a/riscv/insns/vfsgnjx_vf.h
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -1,7 +1,8 @@
// vfsgnx
VI_VFP_VF_LOOP
({
- vd = fsgnj16(vs2.v, rs1.v, false, true);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, false, true)
+ : fsgnj16(vs2.v, rs1.v, false, true);
},
{
vd = fsgnj32(vs2.v, rs1.v, false, true);
diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h
index b04b845..db05439 100644
--- a/riscv/insns/vfsgnjx_vv.h
+++ b/riscv/insns/vfsgnjx_vv.h
@@ -1,7 +1,8 @@
// vfsgnx
VI_VFP_VV_LOOP
({
- vd = fsgnj16(vs2.v, vs1.v, false, true);
+ vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, false, true)
+ : fsgnj16(vs2.v, vs1.v, false, true);
},
{
vd = fsgnj32(vs2.v, vs1.v, false, true);
diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h
index 40f3c18..e8374f4 100644
--- a/riscv/insns/vfslide1down_vf.h
+++ b/riscv/insns/vfslide1down_vf.h
@@ -23,7 +23,7 @@ if (i != vl - 1) {
} else {
switch (P.VU.vsew) {
case e16:
- P.VU.elt<float16_t>(rd_num, vl - 1, true) = FRS1_H;
+ P.VU.elt<float16_t>(rd_num, vl - 1, true) = P.VU.altfmt ? FRS1_BF : FRS1_H;
break;
case e32:
P.VU.elt<float32_t>(rd_num, vl - 1, true) = FRS1_F;
diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h
index 4e4e499..7ca6bf3 100644
--- a/riscv/insns/vfslide1up_vf.h
+++ b/riscv/insns/vfslide1up_vf.h
@@ -23,7 +23,7 @@ if (i != 0) {
} else {
switch (P.VU.vsew) {
case e16:
- P.VU.elt<float16_t>(rd_num, 0, true) = FRS1_H;
+ P.VU.elt<float16_t>(rd_num, 0, true) = P.VU.altfmt ? FRS1_BF : FRS1_H;
break;
case e32:
P.VU.elt<float32_t>(rd_num, 0, true) = FRS1_F;
diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h
index 86f0148..5a866e1 100644
--- a/riscv/insns/vfsqrt_v.h
+++ b/riscv/insns/vfsqrt_v.h
@@ -1,4 +1,6 @@
// vsqrt.v vd, vd2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_V_LOOP
({
vd = f16_sqrt(vs2);
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
index fc6877c..a011ae8 100644
--- a/riscv/insns/vfsub_vf.h
+++ b/riscv/insns/vfsub_vf.h
@@ -1,7 +1,9 @@
// vfsub.vf vd, vs2, rs1
+require_zvfbfa
+
VI_VFP_VF_LOOP
({
- vd = f16_sub(vs2, rs1);
+ vd = VFP_OP_16(sub, vs2, rs1);
},
{
vd = f32_sub(vs2, rs1);
diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h
index b0403f1..db4cd5e 100644
--- a/riscv/insns/vfsub_vv.h
+++ b/riscv/insns/vfsub_vv.h
@@ -1,7 +1,9 @@
// vfsub.vv vd, vs2, vs1
+require_zvfbfa
+
VI_VFP_VV_LOOP
({
- vd = f16_sub(vs2, vs1);
+ vd = VFP_OP_16(sub, vs2, vs1);
},
{
vd = f32_sub(vs2, vs1);
diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h
index 7255a50..9dd56a2 100644
--- a/riscv/insns/vfwadd_vv.h
+++ b/riscv/insns/vfwadd_vv.h
@@ -1,4 +1,7 @@
// vfwadd.vv vd, vs2, vs1
+
+require_zvfbfa
+
VI_VFP_VV_LOOP_WIDE
({
vd = f32_add(vs2, vs1);
diff --git a/riscv/insns/vfwbdot_vv.h b/riscv/insns/vfwbdot_vv.h
new file mode 100644
index 0000000..b8d35a7
--- /dev/null
+++ b/riscv/insns/vfwbdot_vv.h
@@ -0,0 +1,15 @@
+VI_VFP_BASE;
+ZVBDOT_INIT(2);
+
+switch (P.VU.vsew) {
+ case 16: {
+ if (P.VU.altfmt) {
+ require_extension(EXT_ZVFWBDOT16BF);
+ ZVBDOT_LOOP(uint16_t, uint16_t, float32_t, zvfwbdot16bf_dot_acc);
+ } else {
+ require(false);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h
index 111a231..253b92d 100644
--- a/riscv/insns/vfwcvt_f_f_v.h
+++ b/riscv/insns/vfwcvt_f_f_v.h
@@ -1,7 +1,7 @@
// vfwcvt.f.f.v vd, vs2, vm
VI_VFP_WCVT_FP_TO_FP(
- { vd = f16_to_f32(vs2); }, // BODY16
- { vd = f32_to_f64(vs2); }, // BODY32
- { require_extension(EXT_ZVFHMIN); }, // CHECK16
- { require_extension('D'); } // CHECK32
+ { vd = P.VU.altfmt ? bf16_to_f32(vs2) : f16_to_f32(vs2); }, // BODY16
+ { vd = f32_to_f64(vs2); }, // BODY32
+ { require_zvfbfa_or_zvfhmin }, // CHECK16
+ { require(p->get_isa().get_zvd()); } // CHECK32
)
diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h
index c7678dc..76a0696 100644
--- a/riscv/insns/vfwcvt_f_x_v.h
+++ b/riscv/insns/vfwcvt_f_x_v.h
@@ -1,10 +1,11 @@
// vfwcvt.f.x.v vd, vs2, vm
+
VI_VFP_WCVT_INT_TO_FP(
- { vd = i32_to_f16(vs2); }, // BODY8
+ { vd = P.VU.altfmt ? i32_to_bf16(vs2) : i32_to_f16(vs2); }, // BODY8
{ vd = i32_to_f32(vs2); }, // BODY16
{ vd = i32_to_f64(vs2); }, // BODY32
- { require(p->extension_enabled(EXT_ZVFH)); }, // CHECK8
- { require_extension('F'); }, // CHECK16
- { require_extension('D'); }, // CHECK32
+ { require_zvfbfa_or_zvfh; }, // CHECK8
+ { require(p->get_isa().get_zvf()); }, // CHECK64
+ { require(p->get_isa().get_zvd()); }, // CHECK64
int // sign
)
diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h
index e3b7e9f..f322068 100644
--- a/riscv/insns/vfwcvt_f_xu_v.h
+++ b/riscv/insns/vfwcvt_f_xu_v.h
@@ -1,10 +1,11 @@
// vfwcvt.f.xu.v vd, vs2, vm
+
VI_VFP_WCVT_INT_TO_FP(
- { vd = ui32_to_f16(vs2); }, // BODY8
+ { vd = P.VU.altfmt ? ui32_to_bf16(vs2) : ui32_to_f16(vs2); }, // BODY8
{ vd = ui32_to_f32(vs2); }, // BODY16
{ vd = ui32_to_f64(vs2); }, // BODY32
- { require(p->extension_enabled(EXT_ZVFH)); }, // CHECK8
- { require_extension('F'); }, // CHECK16
- { require_extension('D'); }, // CHECK32
+ { require_zvfbfa_or_zvfh; }, // CHECK8
+ { require(p->get_isa().get_zvf()); }, // CHECK32
+ { require(p->get_isa().get_zvd()); }, // CHECK64
uint // sign
)
diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h
index 9caf617..3ed454b 100644
--- a/riscv/insns/vfwcvt_rtz_x_f_v.h
+++ b/riscv/insns/vfwcvt_rtz_x_f_v.h
@@ -1,8 +1,10 @@
// vfwcvt.rtz.x.f.v vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_WCVT_FP_TO_INT(
{ vd = f16_to_i32(vs2, softfloat_round_minMag, true); }, // BODY16
{ vd = f32_to_i64(vs2, softfloat_round_minMag, true); }, // BODY32
{ require_extension(EXT_ZVFH); }, // CHECK16
- { require_extension('F'); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK32
int // sign
)
diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h
index a25d847..e0c737c 100644
--- a/riscv/insns/vfwcvt_rtz_xu_f_v.h
+++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h
@@ -1,8 +1,10 @@
// vfwcvt.rtz,xu.f.v vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_WCVT_FP_TO_INT(
{ vd = f16_to_ui32(vs2, softfloat_round_minMag, true); }, // BODY16
{ vd = f32_to_ui64(vs2, softfloat_round_minMag, true); }, // BODY32
{ require_extension(EXT_ZVFH); }, // CHECK16
- { require_extension('F'); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK32
uint // sign
)
diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h
index 2d536ad..b974c86 100644
--- a/riscv/insns/vfwcvt_x_f_v.h
+++ b/riscv/insns/vfwcvt_x_f_v.h
@@ -1,8 +1,10 @@
// vfwcvt.x.f.v vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_WCVT_FP_TO_INT(
{ vd = f16_to_i32(vs2, softfloat_roundingMode, true); }, // BODY16
{ vd = f32_to_i64(vs2, softfloat_roundingMode, true); }, // BODY32
{ require_extension(EXT_ZVFH); }, // CHECK16
- { require_extension('F'); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK32
int // sign
)
diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h
index 37201f5..7cf0dab 100644
--- a/riscv/insns/vfwcvt_xu_f_v.h
+++ b/riscv/insns/vfwcvt_xu_f_v.h
@@ -1,8 +1,10 @@
// vfwcvt.xu.f.v vd, vs2, vm
+VI_NON_ALTFMT_INSN
+
VI_VFP_WCVT_FP_TO_INT(
{ vd = f16_to_ui32(vs2, softfloat_roundingMode, true); }, // BODY16
{ vd = f32_to_ui64(vs2, softfloat_roundingMode, true); }, // BODY32
{ require_extension(EXT_ZVFH); }, // CHECK16
- { require_extension('F'); }, // CHECK32
+ { require(p->get_isa().get_zvf()); }, // CHECK32
uint // sign
)
diff --git a/riscv/insns/vfwcvtbf16_f_f_v.h b/riscv/insns/vfwcvtbf16_f_f_v.h
index ee9a59c..7a4dca4 100644
--- a/riscv/insns/vfwcvtbf16_f_f_v.h
+++ b/riscv/insns/vfwcvtbf16_f_f_v.h
@@ -1,5 +1,7 @@
// vfwcvtbf16.f.f.v vd, vs2, vm
-VI_VFP_WCVT_FP_TO_BF16(
- { vd = bf16_to_f32(vs2); }, // BODY16
- { require_extension(EXT_ZVFBFMIN); } // CHECK16
+VI_VFP_WCVT_OFP8_BF16_FP(
+ { vd = P.VU.altfmt ? e5m2_to_bf16(vs2) : e4m3_to_bf16(vs2); }, // BODY8
+ { vd = bf16_to_f32(vs2); }, // BODY16
+ { require(p->extension_enabled(EXT_ZVFOFP8MIN)); }, // CHECK8
+ { require_extension(EXT_ZVFBFMIN); } // CHECK16
)
diff --git a/riscv/insns/vfwldot_vv.h b/riscv/insns/vfwldot_vv.h
new file mode 100644
index 0000000..63a4e47
--- /dev/null
+++ b/riscv/insns/vfwldot_vv.h
@@ -0,0 +1,15 @@
+VI_VFP_BASE;
+ZVLDOT_INIT(2);
+
+switch (P.VU.vsew) {
+ case 16: {
+ if (P.VU.altfmt) {
+ require_extension(EXT_ZVFWLDOT16BF);
+ ZVLDOT_LOOP(uint16_t, uint16_t, float32_t, zvfwbdot16bf_dot_acc);
+ } else {
+ require(false);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h
index bcbfe74..728678c 100644
--- a/riscv/insns/vghsh_vv.h
+++ b/riscv/insns/vghsh_vv.h
@@ -2,9 +2,13 @@
#include "zvk_ext_macros.h"
+const uint32_t EGS = 4;
+
require_zvkg;
require(P.VU.vsew == 32);
require_egw_fits(128);
+require(P.VU.vl->read() % EGS == 0);
+VI_CHECK_SSS(true)
VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
{},
diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h
index 820b396..0d223e8 100644
--- a/riscv/insns/vgmul_vv.h
+++ b/riscv/insns/vgmul_vv.h
@@ -2,9 +2,13 @@
#include "zvk_ext_macros.h"
+const uint32_t EGS = 4;
+
require_zvkg;
require(P.VU.vsew == 32);
require_egw_fits(128);
+require(P.VU.vl->read() % EGS == 0);
+VI_CHECK_SSS(false)
VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
{},
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index 49c804c..00155db 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -21,23 +21,22 @@ for (reg_t i = 0; i < vl; ++i) {
}
}
- bool use_ori = (insn.v_vm() == 0) && !do_mask;
+ // Bypass masked-off elements
+ if ((insn.v_vm() == 0) && !do_mask)
+ continue;
+
switch (sew) {
case e8:
- P.VU.elt<uint8_t>(rd_num, i, true) = use_ori ?
- P.VU.elt<uint8_t>(rd_num, i) : cnt;
+ P.VU.elt<uint8_t>(rd_num, i, true) = cnt;
break;
case e16:
- P.VU.elt<uint16_t>(rd_num, i, true) = use_ori ?
- P.VU.elt<uint16_t>(rd_num, i) : cnt;
+ P.VU.elt<uint16_t>(rd_num, i, true) = cnt;
break;
case e32:
- P.VU.elt<uint32_t>(rd_num, i, true) = use_ori ?
- P.VU.elt<uint32_t>(rd_num, i) : cnt;
+ P.VU.elt<uint32_t>(rd_num, i, true) = cnt;
break;
default:
- P.VU.elt<uint64_t>(rd_num, i, true) = use_ori ?
- P.VU.elt<uint64_t>(rd_num, i) : cnt;
+ P.VU.elt<uint64_t>(rd_num, i, true) = cnt;
break;
}
diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h
index e9a87cf..49129f7 100644
--- a/riscv/insns/vmandn_mm.h
+++ b/riscv/insns/vmandn_mm.h
@@ -1,2 +1,2 @@
// vmandn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 & ~vs1);
+VI_LOOP_MASK(vs2 & !vs1);
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index a4d7c50..adb5d0a 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -1,7 +1,7 @@
// vmfeq.vf vd, vs2, fs1
VI_VFP_VF_LOOP_CMP
({
- res = f16_eq(vs2, rs1);
+ res = VFP_OP_16(eq, vs2, rs1);
},
{
res = f32_eq(vs2, rs1);
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index b08ce98..97b7a0d 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -1,7 +1,9 @@
// vmfeq.vv vd, vs2, vs1
+require_zvfbfa
+
VI_VFP_VV_LOOP_CMP
({
- res = f16_eq(vs2, vs1);
+ res = VFP_OP_16(eq, vs2, vs1);
},
{
res = f32_eq(vs2, vs1);
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index ab4df5c..eb4bdce 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,7 +1,7 @@
// vmfge.vf vd, vs2, rs1
VI_VFP_VF_LOOP_CMP
({
- res = f16_le(rs1, vs2);
+ res = VFP_OP_16(le, rs1, vs2);
},
{
res = f32_le(rs1, vs2);
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index dcc3ea3..bfcf251 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,7 +1,7 @@
// vmfgt.vf vd, vs2, rs1
VI_VFP_VF_LOOP_CMP
({
- res = f16_lt(rs1, vs2);
+ res = VFP_OP_16(lt, rs1, vs2);
},
{
res = f32_lt(rs1, vs2);
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index a942705..9415516 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -1,7 +1,7 @@
// vmfle.vf vd, vs2, rs1
VI_VFP_VF_LOOP_CMP
({
- res = f16_le(vs2, rs1);
+ res = VFP_OP_16(le, vs2, rs1);
},
{
res = f32_le(vs2, rs1);
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index dd6f81d..5fe2d44 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,7 +1,9 @@
// vmfle.vv vd, vs2, rs1
+require_zvfbfa
+
VI_VFP_VV_LOOP_CMP
({
- res = f16_le(vs2, vs1);
+ res = VFP_OP_16(le, vs2, vs1);
},
{
res = f32_le(vs2, vs1);
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index 110dbd1..62173bb 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,7 +1,7 @@
// vmflt.vf vd, vs2, rs1
VI_VFP_VF_LOOP_CMP
({
- res = f16_lt(vs2, rs1);
+ res = VFP_OP_16(lt, vs2, rs1);
},
{
res = f32_lt(vs2, rs1);
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index 35f8d70..b019b9c 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,7 +1,9 @@
// vmflt.vv vd, vs2, vs1
+require_zvfbfa
+
VI_VFP_VV_LOOP_CMP
({
- res = f16_lt(vs2, vs1);
+ res = VFP_OP_16(lt, vs2, vs1);
},
{
res = f32_lt(vs2, vs1);
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index 1b61d57..74b788f 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -1,7 +1,7 @@
// vmfne.vf vd, vs2, rs1
VI_VFP_VF_LOOP_CMP
({
- res = !f16_eq(vs2, rs1);
+ res = !VFP_OP_16(eq, vs2, rs1);
},
{
res = !f32_eq(vs2, rs1);
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index 4447c3c..017206c 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -1,7 +1,9 @@
// vmfne.vv vd, vs2, rs1
+require_zvfbfa
+
VI_VFP_VV_LOOP_CMP
({
- res = !f16_eq(vs2, vs1);
+ res = !VFP_OP_16(eq, vs2, vs1);
},
{
res = !f32_eq(vs2, vs1);
diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h
index 5a3ab09..4659e2f 100644
--- a/riscv/insns/vmnand_mm.h
+++ b/riscv/insns/vmnand_mm.h
@@ -1,2 +1,2 @@
// vmnand.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 & vs1));
+VI_LOOP_MASK(!(vs2 & vs1));
diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h
index ab93378..37327c0 100644
--- a/riscv/insns/vmnor_mm.h
+++ b/riscv/insns/vmnor_mm.h
@@ -1,2 +1,2 @@
// vmnor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 | vs1));
+VI_LOOP_MASK(!(vs2 | vs1));
diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h
index 23026f5..71acc05 100644
--- a/riscv/insns/vmorn_mm.h
+++ b/riscv/insns/vmorn_mm.h
@@ -1,2 +1,2 @@
// vmorn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 | ~vs1);
+VI_LOOP_MASK(vs2 | !vs1);
diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h
index e861a33..273d3e8 100644
--- a/riscv/insns/vmulh_vv.h
+++ b/riscv/insns/vmulh_vv.h
@@ -1,4 +1,6 @@
// vmulh vd, vs2, vs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VV_LOOP
({
vd = ((int128_t)vs2 * vs1) >> sew;
diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h
index b6b5503..aaf591c 100644
--- a/riscv/insns/vmulh_vx.h
+++ b/riscv/insns/vmulh_vx.h
@@ -1,4 +1,6 @@
// vmulh vd, vs2, rs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VX_LOOP
({
vd = ((int128_t)vs2 * rs1) >> sew;
diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h
index e1c0ba6..3903d52 100644
--- a/riscv/insns/vmulhsu_vv.h
+++ b/riscv/insns/vmulhsu_vv.h
@@ -1,4 +1,6 @@
// vmulhsu.vv vd, vs2, vs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VV_SU_LOOP({
vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew;
})
diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h
index 4619ea8..b8210bc 100644
--- a/riscv/insns/vmulhsu_vx.h
+++ b/riscv/insns/vmulhsu_vx.h
@@ -1,4 +1,6 @@
// vmulhsu.vx vd, vs2, rs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VX_SU_LOOP({
vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew;
})
diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h
index 0ff488c..5e44aec 100644
--- a/riscv/insns/vmulhu_vv.h
+++ b/riscv/insns/vmulhu_vv.h
@@ -1,4 +1,6 @@
// vmulhu vd, vs2, vs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VV_ULOOP
({
vd = ((uint128_t)vs2 * vs1) >> sew;
diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h
index 672ad32..35e6ed6 100644
--- a/riscv/insns/vmulhu_vx.h
+++ b/riscv/insns/vmulhu_vx.h
@@ -1,4 +1,6 @@
// vmulhu vd ,vs2, rs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VX_ULOOP
({
vd = ((uint128_t)vs2 * rs1) >> sew;
diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h
index 0736d5b..8db61c2 100644
--- a/riscv/insns/vmxnor_mm.h
+++ b/riscv/insns/vmxnor_mm.h
@@ -1,2 +1,2 @@
// vmnxor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 ^ vs1));
+VI_LOOP_MASK(!(vs2 ^ vs1));
diff --git a/riscv/insns/vqbdots_vv.h b/riscv/insns/vqbdots_vv.h
new file mode 100644
index 0000000..55c3dd2
--- /dev/null
+++ b/riscv/insns/vqbdots_vv.h
@@ -0,0 +1,23 @@
+ZVBDOT_INIT(4);
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVQBDOT8I);
+ if (P.VU.altfmt) {
+ ZVBDOT_SIMPLE_LOOP(int8_t, int8_t, uint32_t);
+ } else {
+ ZVBDOT_SIMPLE_LOOP(uint8_t, int8_t, uint32_t);
+ }
+ break;
+ }
+ case 16: {
+ require_extension(EXT_ZVQBDOT16I);
+ if (P.VU.altfmt) {
+ ZVBDOT_SIMPLE_LOOP(int16_t, int16_t, uint64_t);
+ } else {
+ ZVBDOT_SIMPLE_LOOP(uint16_t, int16_t, uint64_t);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vqbdotu_vv.h b/riscv/insns/vqbdotu_vv.h
new file mode 100644
index 0000000..a73d568
--- /dev/null
+++ b/riscv/insns/vqbdotu_vv.h
@@ -0,0 +1,23 @@
+ZVBDOT_INIT(4);
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVQBDOT8I);
+ if (P.VU.altfmt) {
+ ZVBDOT_SIMPLE_LOOP(int8_t, uint8_t, uint32_t);
+ } else {
+ ZVBDOT_SIMPLE_LOOP(uint8_t, uint8_t, uint32_t);
+ }
+ break;
+ }
+ case 16: {
+ require_extension(EXT_ZVQBDOT16I);
+ if (P.VU.altfmt) {
+ ZVBDOT_SIMPLE_LOOP(int16_t, uint16_t, uint64_t);
+ } else {
+ ZVBDOT_SIMPLE_LOOP(uint16_t, uint16_t, uint64_t);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vqldots_vv.h b/riscv/insns/vqldots_vv.h
new file mode 100644
index 0000000..ce6376a
--- /dev/null
+++ b/riscv/insns/vqldots_vv.h
@@ -0,0 +1,23 @@
+ZVLDOT_INIT(4);
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVQLDOT8I);
+ if (P.VU.altfmt) {
+ ZVLDOT_SIMPLE_LOOP(int8_t, int8_t, uint32_t);
+ } else {
+ ZVLDOT_SIMPLE_LOOP(uint8_t, int8_t, uint32_t);
+ }
+ break;
+ }
+ case 16: {
+ require_extension(EXT_ZVQLDOT16I);
+ if (P.VU.altfmt) {
+ ZVLDOT_SIMPLE_LOOP(int16_t, int16_t, uint64_t);
+ } else {
+ ZVLDOT_SIMPLE_LOOP(uint16_t, int16_t, uint64_t);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vqldotu_vv.h b/riscv/insns/vqldotu_vv.h
new file mode 100644
index 0000000..2b674b1
--- /dev/null
+++ b/riscv/insns/vqldotu_vv.h
@@ -0,0 +1,23 @@
+ZVLDOT_INIT(4);
+
+switch (P.VU.vsew) {
+ case 8: {
+ require_extension(EXT_ZVQLDOT8I);
+ if (P.VU.altfmt) {
+ ZVLDOT_SIMPLE_LOOP(int8_t, uint8_t, uint32_t);
+ } else {
+ ZVLDOT_SIMPLE_LOOP(uint8_t, uint8_t, uint32_t);
+ }
+ break;
+ }
+ case 16: {
+ require_extension(EXT_ZVQLDOT16I);
+ if (P.VU.altfmt) {
+ ZVLDOT_SIMPLE_LOOP(int16_t, uint16_t, uint64_t);
+ } else {
+ ZVLDOT_SIMPLE_LOOP(uint16_t, uint16_t, uint64_t);
+ }
+ break;
+ }
+ default: require(false);
+}
diff --git a/riscv/insns/vrev8_v.h b/riscv/insns/vrev8_v.h
index f26c5a0..e39c5c0 100644
--- a/riscv/insns/vrev8_v.h
+++ b/riscv/insns/vrev8_v.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
VI_V_ULOOP
({
diff --git a/riscv/insns/vrol_vv.h b/riscv/insns/vrol_vv.h
index fb2e483..a2ac832 100644
--- a/riscv/insns/vrol_vv.h
+++ b/riscv/insns/vrol_vv.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
// 'mask' selects the low log2(vsew) bits of the shift amount,
// to limit the maximum shift to "vsew - 1" bits.
diff --git a/riscv/insns/vrol_vx.h b/riscv/insns/vrol_vx.h
index b0c89a2..8e4b41b 100644
--- a/riscv/insns/vrol_vx.h
+++ b/riscv/insns/vrol_vx.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
// 'mask' selects the low log2(vsew) bits of the shift amount,
// to limit the maximum shift to "vsew - 1" bits.
diff --git a/riscv/insns/vror_vi.h b/riscv/insns/vror_vi.h
index 1269c3d..6ae9fcd 100644
--- a/riscv/insns/vror_vi.h
+++ b/riscv/insns/vror_vi.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
// 'mask' selects the low log2(vsew) bits of the shift amount,
// to limit the maximum shift to "vsew - 1" bits.
diff --git a/riscv/insns/vror_vv.h b/riscv/insns/vror_vv.h
index c649c6d..276d7ec 100644
--- a/riscv/insns/vror_vv.h
+++ b/riscv/insns/vror_vv.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
// 'mask' selects the low log2(vsew) bits of the shift amount,
// to limit the maximum shift to "vsew - 1" bits.
diff --git a/riscv/insns/vror_vx.h b/riscv/insns/vror_vx.h
index 50c8e5c..98e1248 100644
--- a/riscv/insns/vror_vx.h
+++ b/riscv/insns/vror_vx.h
@@ -2,7 +2,7 @@
#include "zvk_ext_macros.h"
-require_zvbb;
+require_zvkb;
// 'mask' selects the low log2(vsew) bits of the shift amount,
// to limit the maximum shift to "vsew - 1" bits.
diff --git a/riscv/insns/vsm3c_vi.h b/riscv/insns/vsm3c_vi.h
index b3e8121..f9375a5 100644
--- a/riscv/insns/vsm3c_vi.h
+++ b/riscv/insns/vsm3c_vi.h
@@ -3,6 +3,7 @@
#include "zvksh_ext_macros.h"
require_vsm3_constraints;
+VI_CHECK_SSS(false)
VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP(
{},
diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h
index dd6cb52..388b79f 100644
--- a/riscv/insns/vsm3me_vv.h
+++ b/riscv/insns/vsm3me_vv.h
@@ -13,6 +13,7 @@
(ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6))
require_vsm3_constraints;
+VI_CHECK_SSS(true)
VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP(
{},
diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h
index 8f52e68..dd6f67d 100644
--- a/riscv/insns/vsm4k_vi.h
+++ b/riscv/insns/vsm4k_vi.h
@@ -15,6 +15,7 @@ static constexpr uint32_t zvksed_ck[32] = {
};
require_vsm4_constraints;
+VI_CHECK_SSS(false)
VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(
{},
diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h
index 44011eb..8db1050 100644
--- a/riscv/insns/vsm4r_vs.h
+++ b/riscv/insns/vsm4r_vs.h
@@ -3,8 +3,10 @@
#include "zvksed_ext_macros.h"
require_vsm4_constraints;
+require_align(insn.rd(), P.VU.vflmul);
+require_vs2_align_eglmul(128);
// No overlap of vd and vs2.
-require(insn.rd() != insn.rs2());
+require_noover_eglmul(insn.rd(), insn.rs2());
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h
index 9a18cec..18afee6 100644
--- a/riscv/insns/vsm4r_vv.h
+++ b/riscv/insns/vsm4r_vv.h
@@ -2,7 +2,9 @@
#include "zvksed_ext_macros.h"
+
require_vsm4_constraints;
+VI_CHECK_SSS(false)
VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
{},
diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h
index c1d0a57..bacd757 100644
--- a/riscv/insns/vsmul_vv.h
+++ b/riscv/insns/vsmul_vv.h
@@ -1,4 +1,6 @@
// vsmul.vv vd, vs2, vs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VV_LOOP
({
VRM xrm = P.VU.get_vround_mode();
diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h
index c2e531c..62dfa7c 100644
--- a/riscv/insns/vsmul_vx.h
+++ b/riscv/insns/vsmul_vx.h
@@ -1,4 +1,6 @@
// vsmul.vx vd, vs2, rs1
+require(p->extension_enabled('V') || P.VU.vsew < e64);
+
VI_VX_LOOP
({
VRM xrm = P.VU.get_vround_mode();
diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h
index 5c58927..4cf616d 100644
--- a/riscv/insns/vsra_vi.h
+++ b/riscv/insns/vsra_vi.h
@@ -1,5 +1,5 @@
// vsra.vi vd, vs2, zimm5
VI_VI_LOOP
({
- vd = vs2 >> (simm5 & (sew - 1) & 0x1f);
+ vd = vs2 >> (insn.v_zimm5() & (sew - 1));
})
diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h
index cbdf47a..12f1240 100644
--- a/riscv/insns/vssra_vi.h
+++ b/riscv/insns/vssra_vi.h
@@ -1,8 +1,8 @@
-// vssra.vi vd, vs2, simm5
+// vssra.vi vd, vs2, zimm5
VI_VI_LOOP
({
VRM xrm = P.VU.get_vround_mode();
- int sh = simm5 & (sew - 1);
+ int sh = insn.v_zimm5() & (sew - 1);
int128_t val = vs2;
INT_ROUNDING(val, xrm, sh);
diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h
index 74fa37c..a2de49e 100644
--- a/riscv/insns/vssrl_vi.h
+++ b/riscv/insns/vssrl_vi.h
@@ -1,4 +1,4 @@
-// vssra.vi vd, vs2, simm5
+// vssra.vi vd, vs2, zimm5
VI_VI_ULOOP
({
VRM xrm = P.VU.get_vround_mode();
diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h
index 13b5eb4..866cd78 100644
--- a/riscv/insns/vwsll_vi.h
+++ b/riscv/insns/vwsll_vi.h
@@ -3,6 +3,7 @@
#include "zvk_ext_macros.h"
require_zvbb;
+VI_CHECK_DSS(false);
VI_ZVK_VI_WIDENING_ULOOP({
const reg_t shift = zimm5 & ((2 * sew) - 1);
diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h
index 5a64c6c..180fe97 100644
--- a/riscv/insns/vwsll_vv.h
+++ b/riscv/insns/vwsll_vv.h
@@ -3,6 +3,7 @@
#include "zvk_ext_macros.h"
require_zvbb;
+VI_CHECK_DSS(true);
VI_ZVK_VV_WIDENING_ULOOP({
const reg_t shift = (vs1 & ((2 * sew) - 1));
diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h
index 5264e80..4137d39 100644
--- a/riscv/insns/vwsll_vx.h
+++ b/riscv/insns/vwsll_vx.h
@@ -3,6 +3,7 @@
#include "zvk_ext_macros.h"
require_zvbb;
+VI_CHECK_DSS(false);
VI_ZVK_VX_WIDENING_ULOOP({
const reg_t shift = (rs1 & ((2 * sew) - 1));
diff --git a/riscv/insns/wrs_nto.h b/riscv/insns/wrs_nto.h
index 710e670..7a4fe67 100644
--- a/riscv/insns/wrs_nto.h
+++ b/riscv/insns/wrs_nto.h
@@ -1,3 +1,5 @@
+require_extension(EXT_ZAWRS);
+
if (get_field(STATE.mstatus->read(), MSTATUS_TW)) {
require_privilege(PRV_M);
} else if (STATE.v) {
diff --git a/riscv/insns/wrs_sto.h b/riscv/insns/wrs_sto.h
index 4e71aa0..24d37a7 100644
--- a/riscv/insns/wrs_sto.h
+++ b/riscv/insns/wrs_sto.h
@@ -1 +1,3 @@
+require_extension(EXT_ZAWRS);
+
// WRS.STO stalls for a short duration
diff --git a/riscv/interactive.cc b/riscv/interactive.cc
index 9afc718..55406b8 100644
--- a/riscv/interactive.cc
+++ b/riscv/interactive.cc
@@ -72,6 +72,12 @@ processor_t *sim_t::get_core(const std::string& i)
return get_core(p);
}
+static void do_write(int fd, const void* buf, size_t n)
+{
+ auto res = write(fd, buf, n);
+ (void) res;
+}
+
static void clear_str(bool noncanonical, int fd, std::string target_str)
{
if (noncanonical)
@@ -83,7 +89,7 @@ static void clear_str(bool noncanonical, int fd, std::string target_str)
clear_motion += ' ';
}
clear_motion += '\r';
- (void) write(fd, clear_motion.c_str(), clear_motion.size() + 1);
+ do_write(fd, clear_motion.c_str(), clear_motion.size() + 1);
}
}
@@ -96,7 +102,7 @@ static void send_key(bool noncanonical, int fd, keybuffer_t key_code, const int
{
key_motion += (char) ((key_code >> (i * BITS_PER_CHAR)) & 0xff);
}
- (void) write(fd, key_motion.c_str(), len);
+ do_write(fd, key_motion.c_str(), len);
}
}
@@ -144,7 +150,7 @@ static std::string readline(int fd)
cursor_pos--;
s.erase(cursor_pos, 1);
if (noncanonical)
- (void) write(fd, s.c_str(), s.size() + 1);
+ do_write(fd, s.c_str(), s.size() + 1);
// move cursor by left arrow key
for (unsigned i = 0; i < s.size() - cursor_pos; i++) {
send_key(noncanonical, fd, KEYCODE_LEFT, 3);
@@ -176,7 +182,7 @@ static std::string readline(int fd)
history_index = std::min(history_commands.size(), history_index + 1);
s = history_commands[history_commands.size() - history_index];
if (noncanonical)
- (void) write(fd, s.c_str(), s.size() + 1);
+ do_write(fd, s.c_str(), s.size() + 1);
cursor_pos = s.size();
}
key_buffer = 0;
@@ -192,7 +198,7 @@ static std::string readline(int fd)
s = history_commands[history_commands.size() - history_index];
}
if (noncanonical)
- (void) write(fd, s.c_str(), s.size() + 1);
+ do_write(fd, s.c_str(), s.size() + 1);
cursor_pos = s.size();
}
key_buffer = 0;
@@ -221,7 +227,7 @@ static std::string readline(int fd)
break;
case KEYCODE_ENTER:
if (noncanonical)
- (void) write(fd, &ch, 1);
+ do_write(fd, &ch, 1);
if (s.size() > initial_s_len && (history_commands.size() == 0 || s != history_commands[history_commands.size() - 1])) {
history_commands.push_back(s);
}
@@ -236,7 +242,7 @@ static std::string readline(int fd)
s.insert(cursor_pos, 1, ch);
cursor_pos++;
if (noncanonical)
- (void) write(fd, s.c_str(), s.size() + 1);
+ do_write(fd, s.c_str(), s.size() + 1);
// send left arrow key to move cursor
for (unsigned i = 0; i < s.size() - cursor_pos; i++) {
send_key(noncanonical, fd, KEYCODE_LEFT, 3);
diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h
index ea64660..ff2867a 100644
--- a/riscv/isa_parser.h
+++ b/riscv/isa_parser.h
@@ -34,9 +34,13 @@ typedef enum {
EXT_ZKR,
EXT_ZMMUL,
EXT_ZVFH,
+ EXT_ZVFBFA,
EXT_ZVFHMIN,
+ EXT_ZVFOFP4MIN,
+ EXT_ZVFOFP8MIN,
EXT_SMEPMP,
EXT_SMSTATEEN,
+ EXT_SMPMPMT,
EXT_SMRNMI,
EXT_SSCOFPMF,
EXT_SVADU,
@@ -44,12 +48,16 @@ typedef enum {
EXT_SVNAPOT,
EXT_SVPBMT,
EXT_SVINVAL,
+ EXT_SVUKTE,
+ EXT_SVRSW60T59B,
EXT_ZDINX,
EXT_ZFA,
EXT_ZFBFMIN,
EXT_ZFINX,
EXT_ZHINX,
EXT_ZHINXMIN,
+ EXT_ZIBI,
+ EXT_ZICCID,
EXT_ZICBOM,
EXT_ZICBOZ,
EXT_ZICNTR,
@@ -57,6 +65,7 @@ typedef enum {
EXT_ZIHPM,
EXT_ZILSD,
EXT_ZVBB,
+ EXT_ZVKB,
EXT_ZVBC,
EXT_ZVFBFMIN,
EXT_ZVFBFWMA,
@@ -67,6 +76,15 @@ typedef enum {
EXT_ZVKSED,
EXT_ZVKSH,
EXT_ZVQDOTQ,
+ EXT_ZVQBDOT8I,
+ EXT_ZVQBDOT16I,
+ EXT_ZVFQBDOT8F,
+ EXT_ZVFWBDOT16BF,
+ EXT_ZVFBDOT32F,
+ EXT_ZVQLDOT8I,
+ EXT_ZVQLDOT16I,
+ EXT_ZVFQLDOT8F,
+ EXT_ZVFWLDOT16BF,
EXT_SSTC,
EXT_ZAAMO,
EXT_ZALRSC,
@@ -90,16 +108,12 @@ typedef enum {
EXT_SMMPM,
EXT_SMNPM,
EXT_SSNPM,
+ EXT_SMAIA,
+ EXT_SSAIA,
NUM_ISA_EXTENSIONS
} isa_extension_t;
typedef enum {
- IMPL_MMU_SV32,
- IMPL_MMU_SV39,
- IMPL_MMU_SV48,
- IMPL_MMU_SV57,
- IMPL_MMU_SBARE,
- IMPL_MMU,
IMPL_MMU_VMID,
IMPL_MMU_ASID,
} impl_extension_t;
diff --git a/riscv/jtag_dtm.cc b/riscv/jtag_dtm.cc
index 9ca38af..7f9cfe4 100644
--- a/riscv/jtag_dtm.cc
+++ b/riscv/jtag_dtm.cc
@@ -199,6 +199,6 @@ void jtag_dtm_t::update_dr()
}
D(fprintf(stderr, "dmi=0x%lx\n", dmi));
- rti_remaining = required_rti_cycles;
+ rti_remaining = op == DMI_OP_NOP ? 0 : required_rti_cycles;
}
}
diff --git a/riscv/mmu.cc b/riscv/mmu.cc
index 01017f6..6b4a571 100644
--- a/riscv/mmu.cc
+++ b/riscv/mmu.cc
@@ -6,9 +6,10 @@
#include "simif.h"
#include "processor.h"
#include "decode_macros.h"
+#include "platform.h"
-mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc)
- : sim(sim), proc(proc),
+mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz)
+ : sim(sim), proc(proc), blocksz(cache_blocksz),
#ifdef RISCV_ENABLE_DUAL_ENDIAN
target_big_endian(endianness == endianness_big),
#endif
@@ -38,6 +39,7 @@ void mmu_t::flush_tlb()
memset(tlb_insn, -1, sizeof(tlb_insn));
memset(tlb_load, -1, sizeof(tlb_load));
memset(tlb_store, -1, sizeof(tlb_store));
+ memset(pte_cache, -1, sizeof(pte_cache));
flush_icache();
}
@@ -52,6 +54,16 @@ void throw_access_exception(bool virt, reg_t addr, access_type type)
}
}
+[[noreturn]] void throw_page_fault_exception(bool virt, reg_t addr, access_type type)
+{
+ switch (type) {
+ case FETCH: throw trap_instruction_page_fault(virt, addr, 0, 0);
+ case LOAD: throw trap_load_page_fault(virt, addr, 0, 0);
+ case STORE: throw trap_store_page_fault(virt, addr, 0, 0);
+ default: abort();
+ }
+}
+
reg_t mmu_t::translate(mem_access_info_t access_info, reg_t len)
{
reg_t addr = access_info.transformed_vaddr;
@@ -95,18 +107,29 @@ mmu_t::insn_parcel_t mmu_t::fetch_slow_path(reg_t vaddr)
auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_insn, vaddr, TLB_FLAGS);
auto access_info = generate_access_info(vaddr, FETCH, {});
- check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt);
+
+ if (check_triggers_fetch)
+ check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt);
if (!tlb_hit) {
paddr = translate(access_info, sizeof(insn_parcel_t));
host_addr = (uintptr_t)sim->addr_to_mem(paddr);
+ if (proc->extension_enabled(EXT_ZICCID)) {
+ // Maintain exclusion with all store TLBs
+ for (auto [_, p2] : sim->get_harts())
+ p2->mmu->flush_stlb_ppn(paddr >> PGSHIFT);
+
+ tlb_insn_reverse_tags.insert(paddr >> PGSHIFT);
+ }
+
refill_tlb(vaddr, paddr, (char*)host_addr, FETCH);
}
auto res = perform_intrapage_fetch(vaddr, host_addr, paddr);
- check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(res));
+ if (check_triggers_fetch)
+ check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(res));
return res;
}
@@ -140,8 +163,8 @@ reg_t reg_from_bytes(size_t len, const uint8_t* bytes)
bool mmu_t::mmio_ok(reg_t paddr, access_type UNUSED type)
{
// Disallow access to debug region when not in debug mode
- static_assert(DEBUG_START == 0);
- if (/* paddr >= DEBUG_START && */ paddr <= DEBUG_END && proc && !proc->state.debug_mode)
+ reg_t debug_start = DEBUG_START; // suppress -Wtype-limits
+ if (paddr >= debug_start && paddr - debug_start < DEBUG_SIZE && proc && !proc->state.debug_mode)
return false;
return true;
@@ -229,16 +252,17 @@ void mmu_t::load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_
{
reg_t vaddr = access_info.vaddr;
auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_load, vaddr, TLB_FLAGS);
- if (!tlb_hit || access_info.flags.is_special_access()) {
+ bool special = access_info.flags.is_special_access() && !access_info.flags.lr;
+ if (!tlb_hit || special) {
paddr = translate(access_info, len);
host_addr = (uintptr_t)sim->addr_to_mem(paddr);
- if (!access_info.flags.is_special_access())
+ if (!special)
refill_tlb(vaddr, paddr, (char*)host_addr, LOAD);
+ }
- if (access_info.flags.lr && !sim->reservable(paddr)) {
- throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0);
- }
+ if (access_info.flags.lr && !sim->reservable(paddr)) {
+ throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0);
}
perform_intrapage_load(vaddr, host_addr, paddr, len, bytes, access_info.flags);
@@ -263,7 +287,9 @@ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate
auto access_info = generate_access_info(original_addr, LOAD, xlate_flags);
reg_t transformed_addr = access_info.transformed_vaddr;
- check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt);
+
+ if (check_triggers_load)
+ check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt);
if ((transformed_addr & (len - 1)) == 0) {
load_slow_path_intrapage(len, bytes, access_info);
@@ -283,12 +309,14 @@ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate
}
}
- while (len > sizeof(reg_t)) {
- check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), bytes));
- len -= sizeof(reg_t);
- bytes += sizeof(reg_t);
+ if (check_triggers_load) {
+ while (len > sizeof(reg_t)) {
+ check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), bytes));
+ len -= sizeof(reg_t);
+ bytes += sizeof(reg_t);
+ }
+ check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(len, bytes));
}
- check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(len, bytes));
if (proc && unlikely(proc->get_log_commits_enabled()))
proc->state.log_mem_read.push_back(std::make_tuple(original_addr, 0, len));
@@ -315,6 +343,14 @@ void mmu_t::store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_acces
paddr = translate(access_info, len);
host_addr = (uintptr_t)sim->addr_to_mem(paddr);
+ if (proc && proc->extension_enabled(EXT_ZICCID)) {
+ // Maintain exclusion with all instruction TLBs
+ for (auto [_, p2] : sim->get_harts())
+ p2->mmu->flush_itlb_ppn(paddr >> PGSHIFT);
+
+ tlb_store_reverse_tags.insert(paddr >> PGSHIFT);
+ }
+
if (!access_info.flags.is_special_access())
refill_tlb(vaddr, paddr, (char*)host_addr, STORE);
}
@@ -340,7 +376,8 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes
auto access_info = generate_access_info(original_addr, STORE, xlate_flags);
reg_t transformed_addr = access_info.transformed_vaddr;
- if (actually_store) {
+
+ if (actually_store && check_triggers_store) {
reg_t trig_len = len;
const uint8_t* trig_bytes = bytes;
while (trig_len > sizeof(reg_t)) {
@@ -369,8 +406,47 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes
store_slow_path_intrapage(len, bytes, access_info, actually_store);
}
- if (proc && unlikely(proc->get_log_commits_enabled()))
- proc->state.log_mem_write.push_back(std::make_tuple(original_addr, reg_from_bytes(len, bytes), len));
+ if (actually_store && proc && unlikely(proc->get_log_commits_enabled())) {
+ // amocas.q sends len == 16, reg_from_bytes only supports up to 8
+ // bytes per conversion. Make multiple entries in the log
+ reg_t offset = 0;
+ const auto reg_size = sizeof(reg_t);
+ while (unlikely(len > reg_size)) {
+ proc->state.log_mem_write.push_back(std::make_tuple(original_addr + offset, reg_from_bytes(reg_size, bytes + offset), reg_size));
+ offset += reg_size;
+ len -= reg_size;
+ }
+ proc->state.log_mem_write.push_back(std::make_tuple(original_addr + offset, reg_from_bytes(len, bytes + offset), len));
+ }
+}
+
+bool mmu_t::flush_tlb_ppn(reg_t ppn, dtlb_entry_t* tlb, reverse_tags_t& filter)
+{
+ if (!filter.contains(ppn))
+ return false;
+
+ filter.clear();
+
+ for (size_t i = 0; i < TLB_ENTRIES; i++) {
+ auto entry_ppn = tlb[i].data.target_addr >> PGSHIFT;
+ if (entry_ppn == ppn)
+ tlb[i].tag = -1;
+ else if (tlb[i].tag != (reg_t)-1)
+ filter.insert(entry_ppn);
+ }
+
+ return true;
+}
+
+void mmu_t::flush_stlb_ppn(reg_t ppn)
+{
+ flush_tlb_ppn(ppn, tlb_store, tlb_store_reverse_tags);
+}
+
+void mmu_t::flush_itlb_ppn(reg_t ppn)
+{
+ if (flush_tlb_ppn(ppn, tlb_insn, tlb_insn_reverse_tags))
+ flush_icache();
}
tlb_entry_t mmu_t::refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type)
@@ -414,12 +490,15 @@ bool mmu_t::pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode, bool hlv
if (!proc || proc->n_pmp == 0)
return true;
+ reg_t gran = reg_t(1) << proc->lg_pmp_granularity;
+ auto first_addr_aligned = addr & -gran;
+ auto last_addr_aligned = (addr + len - 1) & -gran;
+
for (size_t i = 0; i < proc->n_pmp; i++) {
- // Check each 4-byte sector of the access
+ // Check each PMP-granularity sector of the access
bool any_match = false;
bool all_match = true;
- for (reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) {
- reg_t cur_addr = addr + offset;
+ for (reg_t cur_addr = first_addr_aligned; cur_addr <= last_addr_aligned; cur_addr += gran) {
bool match = proc->state.pmpaddr[i]->match4(cur_addr);
any_match |= match;
all_match &= match;
@@ -495,6 +574,8 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty
if (pte & PTE_RSVD) {
break;
+ } else if (!proc->extension_enabled(EXT_SVRSW60T59B) && (pte & PTE_SVRSW60T59B)) {
+ break;
} else if (!proc->extension_enabled(EXT_SVNAPOT) && (pte & PTE_N)) {
break;
} else if (!pbmte && (pte & PTE_PBMT)) {
@@ -523,7 +604,7 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty
if ((pte & ad) != ad) {
if (hade) {
// set accessed and possibly dirty bits
- pte_store(pte_paddr, pte | ad, gva, virt, type, vm.ptesize);
+ pte_store(pte_paddr, pte | ad, gva, virt, trap_type, vm.ptesize);
} else {
// take exception if access or possibly dirty bit is not set.
break;
@@ -549,6 +630,35 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty
}
}
+bool mmu_t::svukte_qualified(mem_access_info_t access_info)
+{
+ state_t* state = proc->get_state();
+
+ if (access_info.effective_priv != PRV_U)
+ return false;
+
+ bool ukte = get_field(state->senvcfg->read(), SENVCFG_UKTE);
+ if (access_info.flags.forced_virt && state->prv == PRV_U)
+ ukte = get_field(state->hstatus->read(), HSTATUS_HUKTE);
+
+ if (!ukte)
+ return false;
+
+ reg_t mode_mask = proc->get_xlen() == 32 ? SATP32_MODE : SATP64_MODE;
+ if (get_field(proc->get_state()->satp->readvirt(access_info.effective_virt), mode_mask) == 0)
+ return false;
+
+ return true;
+}
+
+bool mmu_t::svukte_fault(reg_t addr, mem_access_info_t access_info)
+{
+ if (!svukte_qualified(access_info))
+ return false;
+
+ return addr >> (proc->get_xlen() - 1);
+}
+
reg_t mmu_t::walk(mem_access_info_t access_info)
{
access_type type = access_info.type;
@@ -571,6 +681,10 @@ reg_t mmu_t::walk(mem_access_info_t access_info)
if (vm.levels == 0)
return s2xlate(addr, addr & ((reg_t(2) << (proc->xlen-1))-1), type, type, virt, hlvx, false) & ~page_mask; // zero-extend from xlen
+ if (svukte_fault(addr, access_info)) {
+ throw_page_fault_exception(virt, addr, type);
+ }
+
bool s_mode = mode == PRV_S;
bool sum = proc->state.sstatus->readvirt(virt) & MSTATUS_SUM;
bool mxr = (proc->state.sstatus->readvirt(false) | proc->state.sstatus->readvirt(virt)) & MSTATUS_MXR;
@@ -599,6 +713,8 @@ reg_t mmu_t::walk(mem_access_info_t access_info)
if (pte & PTE_RSVD) {
break;
+ } else if (!proc->extension_enabled(EXT_SVRSW60T59B) && (pte & PTE_SVRSW60T59B)) {
+ break;
} else if (!proc->extension_enabled(EXT_SVNAPOT) && (pte & PTE_N)) {
break;
} else if (!pbmte && (pte & PTE_PBMT)) {
@@ -661,12 +777,7 @@ reg_t mmu_t::walk(mem_access_info_t access_info)
}
}
- switch (type) {
- case FETCH: throw trap_instruction_page_fault(virt, addr, 0, 0);
- case LOAD: throw trap_load_page_fault(virt, addr, 0, 0);
- case STORE: throw trap_store_page_fault(virt, addr, 0, 0);
- default: abort();
- }
+ throw_page_fault_exception(virt, addr, type);
}
void mmu_t::register_memtracer(memtracer_t* t)
diff --git a/riscv/mmu.h b/riscv/mmu.h
index 86f06ab..bd8bfd3 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -3,6 +3,7 @@
#ifndef _RISCV_MMU_H
#define _RISCV_MMU_H
+#include "bloom_filter.h"
#include "decode.h"
#include "trap.h"
#include "common.h"
@@ -18,7 +19,6 @@
// virtual memory configuration
#define PGSHIFT 12
const reg_t PGSIZE = 1 << PGSHIFT;
-#define MAX_PADDR_BITS 64
// observability hooks for load, store and fetch
// intentionally empty not to cause runtime overhead
@@ -43,7 +43,7 @@ struct insn_fetch_t
struct icache_entry_t {
reg_t tag;
- struct icache_entry_t* next;
+ icache_entry_t* next;
insn_fetch_t data;
};
@@ -57,6 +57,11 @@ struct dtlb_entry_t {
reg_t tag;
};
+struct pte_cache_entry_t {
+ reg_t paddr;
+ reg_t pte;
+};
+
struct xlate_flags_t {
const bool forced_virt : 1 {false};
const bool hlvx : 1 {false};
@@ -79,6 +84,7 @@ struct mem_access_info_t {
};
void throw_access_exception(bool virt, reg_t addr, access_type type);
+[[noreturn]] void throw_page_fault_exception(bool virt, reg_t addr, access_type type);
// this class implements a processor's port into the virtual memory system.
// an MMU and instruction cache are maintained for simulator performance.
@@ -89,7 +95,7 @@ private:
mem_access_info_t generate_access_info(reg_t addr, access_type type, xlate_flags_t xlate_flags);
public:
- mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc);
+ mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz);
~mmu_t();
template<typename T>
@@ -129,7 +135,7 @@ public:
T ss_load(reg_t addr) {
if ((addr & (sizeof(T) - 1)) != 0)
throw trap_store_access_fault((proc) ? proc->state.v : false, addr, 0, 0);
- return load<T>(addr, {.forced_virt=false, .hlvx=false, .lr=false, .ss_access=true});
+ return load<T>(addr, {.ss_access=true});
}
template<typename T>
@@ -156,7 +162,7 @@ public:
void ss_store(reg_t addr, T val) {
if ((addr & (sizeof(T) - 1)) != 0)
throw trap_store_access_fault((proc) ? proc->state.v : false, addr, 0, 0);
- store<T>(addr, val, {.forced_virt=false, .hlvx=false, .lr=false, .ss_access=true});
+ store<T>(addr, val, {.ss_access=true});
}
// AMO/Zicbom faults should be reported as store faults
@@ -188,13 +194,9 @@ public:
// for shadow stack amoswap
template<typename T>
T ssamoswap(reg_t addr, reg_t value) {
- bool forced_virt = false;
- bool hlvx = false;
- bool lr = false;
- bool ss_access = true;
- store_slow_path(addr, sizeof(T), nullptr, {forced_virt, hlvx, lr, ss_access}, false, true);
- auto data = load<T>(addr, {forced_virt, hlvx, lr, ss_access});
- store<T>(addr, value, {forced_virt, hlvx, lr, ss_access});
+ store_slow_path(addr, sizeof(T), nullptr, {.ss_access=true}, false, true);
+ auto data = load<T>(addr, {.ss_access=true});
+ store<T>(addr, value, {.ss_access=true});
return data;
}
@@ -209,28 +211,6 @@ public:
})
}
- void store_float128(reg_t addr, float128_t val)
- {
- if (unlikely(addr & (sizeof(float128_t)-1)) && !is_misaligned_enabled()) {
- throw trap_store_address_misaligned((proc) ? proc->state.v : false, addr, 0, 0);
- }
-
- store<uint64_t>(addr, val.v[0]);
- store<uint64_t>(addr + 8, val.v[1]);
- }
-
- float128_t load_float128(reg_t addr)
- {
- if (unlikely(addr & (sizeof(float128_t)-1)) && !is_misaligned_enabled()) {
- throw trap_load_address_misaligned((proc) ? proc->state.v : false, addr, 0, 0);
- }
-
- float128_t res;
- res.v[0] = load<uint64_t>(addr);
- res.v[1] = load<uint64_t>(addr + 8);
- return res;
- }
-
void cbo_zero(reg_t addr) {
auto access_info = generate_access_info(addr, STORE, {});
reg_t transformed_addr = access_info.transformed_vaddr;
@@ -272,7 +252,10 @@ public:
store_slow_path(vaddr, size, nullptr, {}, false, true);
}
- reg_t paddr = translate(generate_access_info(vaddr, STORE, {}), 1);
+ auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_store, vaddr);
+ if (!tlb_hit)
+ paddr = translate(generate_access_info(vaddr, STORE, {}), 1);
+
if (sim->reservable(paddr))
return load_reservation_address == paddr;
else
@@ -292,7 +275,7 @@ public:
return have_reservation;
}
- static const reg_t ICACHE_ENTRIES = 1024;
+ static const reg_t ICACHE_ENTRIES = 4096;
inline size_t icache_index(reg_t addr)
{
@@ -303,7 +286,7 @@ public:
T ALWAYS_INLINE fetch_jump_table(reg_t addr) {
T res = 0;
for (size_t i = 0; i < sizeof(T) / sizeof(insn_parcel_t); i++)
- res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t));
+ res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t) * 8);
// table accesses use data endianness, not instruction (little) endianness
return target_big_endian ? to_be(res) : res;
@@ -312,21 +295,11 @@ public:
inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry)
{
insn_bits_t insn = fetch_insn_parcel(addr);
+ unsigned length = insn_length(insn);
- int length = insn_length(insn);
-
- if (likely(length == 4)) {
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16;
- } else if (length == 2) {
- // entire instruction already fetched
- } else if (length == 6) {
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16;
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32;
- } else {
- static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t");
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16;
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32;
- insn |= (insn_bits_t)fetch_insn_parcel(addr + 6) << 48;
+ for (unsigned pos = sizeof(insn_parcel_t); pos < length; pos += sizeof(insn_parcel_t)) {
+ insn |= (insn_bits_t)fetch_insn_parcel(addr + pos) << (8 * pos);
+ length = insn_length(insn);
}
insn_fetch_t fetch = {proc->decode_insn(insn), insn};
@@ -357,8 +330,7 @@ public:
inline insn_fetch_t load_insn(reg_t addr)
{
- icache_entry_t entry;
- return refill_icache(addr, &entry)->data;
+ return refill_icache(addr, &icache[icache_index(addr)])->data;
}
std::tuple<bool, uintptr_t, reg_t> ALWAYS_INLINE access_tlb(const dtlb_entry_t* tlb, reg_t vaddr, reg_t allowed_flags = 0, reg_t required_flags = 0)
@@ -397,11 +369,6 @@ public:
return target_big_endian? target_endian<T>::to_be(n) : target_endian<T>::to_le(n);
}
- void set_cache_blocksz(reg_t size)
- {
- blocksz = size;
- }
-
private:
simif_t* sim;
processor_t* proc;
@@ -424,6 +391,17 @@ private:
dtlb_entry_t tlb_store[TLB_ENTRIES];
dtlb_entry_t tlb_insn[TLB_ENTRIES];
+ static const reg_t PTE_CACHE_ENTRIES = 251;
+ pte_cache_entry_t pte_cache[PTE_CACHE_ENTRIES];
+
+ typedef bloom_filter_t<reg_t, simple_hash1, simple_hash2, TLB_ENTRIES * 16, 3> reverse_tags_t;
+ reverse_tags_t tlb_store_reverse_tags;
+ reverse_tags_t tlb_insn_reverse_tags;
+
+ bool flush_tlb_ppn(reg_t ppn, dtlb_entry_t* tlb, reverse_tags_t& filter);
+ void flush_itlb_ppn(reg_t ppn);
+ void flush_stlb_ppn(reg_t ppn);
+
// finish translation on a TLB miss and update the TLB
tlb_entry_t refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type);
const char* fill_from_mmio(reg_t vaddr, reg_t paddr);
@@ -453,6 +431,8 @@ private:
check_triggers(operation, address, virt, address, data);
}
void check_triggers(triggers::operation_t operation, reg_t address, bool virt, reg_t tval, std::optional<reg_t> data);
+ bool svukte_qualified(mem_access_info_t access_info);
+ bool svukte_fault(reg_t addr, mem_access_info_t access_info);
reg_t translate(mem_access_info_t access_info, reg_t len);
reg_t pte_load(reg_t pte_paddr, reg_t addr, bool virt, access_type trap_type, size_t ptesize) {
@@ -471,6 +451,9 @@ private:
template<typename T> inline reg_t pte_load(reg_t pte_paddr, reg_t addr, bool virt, access_type trap_type)
{
+ if (auto [hit, pte] = pte_cache_access(pte_paddr); hit)
+ return pte;
+
const size_t ptesize = sizeof(T);
if (!pmp_ok(pte_paddr, ptesize, LOAD, PRV_S, false))
@@ -483,7 +466,10 @@ private:
} else if (!mmio_load(pte_paddr, ptesize, (uint8_t*)&target_pte)) {
throw_access_exception(virt, addr, trap_type);
}
- return from_target(target_pte);
+
+ auto res = from_target(target_pte);
+ pte_cache_insert(pte_paddr, res);
+ return res;
}
template<typename T> inline void pte_store(reg_t pte_paddr, reg_t new_pte, reg_t addr, bool virt, access_type trap_type)
@@ -500,6 +486,20 @@ private:
} else if (!mmio_store(pte_paddr, ptesize, (uint8_t*)&target_pte)) {
throw_access_exception(virt, addr, trap_type);
}
+
+ pte_cache_insert(pte_paddr, new_pte);
+ }
+
+ std::tuple<bool, reg_t> pte_cache_access(reg_t key)
+ {
+ auto e = pte_cache[key % PTE_CACHE_ENTRIES];
+ return std::make_tuple(e.paddr == key, e.pte);
+ }
+
+ void pte_cache_insert(reg_t key, reg_t value)
+ {
+ if (value & PTE_V)
+ pte_cache[key % PTE_CACHE_ENTRIES] = {key, value};
}
inline insn_parcel_t fetch_insn_parcel(reg_t addr) {
@@ -513,7 +513,7 @@ private:
{
return proc != nullptr
&& !(proc->state.mnstatus && !get_field(proc->state.mnstatus->read(), MNSTATUS_NMIE))
- && !proc->state.debug_mode
+ && (!proc->state.debug_mode || get_field(proc->state.dcsr->read(), DCSR_MPRVEN))
&& get_field(proc->state.mstatus->read(), MSTATUS_MPRV);
}
diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc
index 15e0873..4ae9dbe 100644
--- a/riscv/ns16550.cc
+++ b/riscv/ns16550.cc
@@ -93,8 +93,8 @@ void ns16550_t::update_interrupt(void)
uint8_t interrupts = 0;
/* Handle clear rx */
- if (lcr & UART_FCR_CLEAR_RCVR) {
- lcr &= ~UART_FCR_CLEAR_RCVR;
+ if (fcr & UART_FCR_CLEAR_RCVR) {
+ fcr &= ~UART_FCR_CLEAR_RCVR;
while (!rx_queue.empty()) {
rx_queue.pop();
}
@@ -102,8 +102,8 @@ void ns16550_t::update_interrupt(void)
}
/* Handle clear tx */
- if (lcr & UART_FCR_CLEAR_XMIT) {
- lcr &= ~UART_FCR_CLEAR_XMIT;
+ if (fcr & UART_FCR_CLEAR_XMIT) {
+ fcr &= ~UART_FCR_CLEAR_XMIT;
lsr |= UART_LSR_TEMT | UART_LSR_THRE;
}
@@ -361,4 +361,4 @@ ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base
}
}
-REGISTER_DEVICE(ns16550, ns16550_parse_from_fdt, ns16550_generate_dts)
+REGISTER_BUILTIN_DEVICE(ns16550, ns16550_parse_from_fdt, ns16550_generate_dts)
diff --git a/riscv/opcodes.h b/riscv/opcodes.h
index 065934a..2ca7332 100644
--- a/riscv/opcodes.h
+++ b/riscv/opcodes.h
@@ -130,6 +130,16 @@ static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) {
return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRS;
}
+static uint32_t csrrc(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused));
+static uint32_t csrrc(unsigned int rd, unsigned int rs1, unsigned int csr) {
+ return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRC;
+}
+
+static uint32_t csrrw(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused));
+static uint32_t csrrw(unsigned int rd, unsigned int rs1, unsigned int csr) {
+ return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRW;
+}
+
static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) __attribute__ ((unused));
static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset)
{
diff --git a/riscv/platform.h b/riscv/platform.h
index c8a5bf4..5b794da 100644
--- a/riscv/platform.h
+++ b/riscv/platform.h
@@ -19,5 +19,7 @@
#define NS16550_INTERRUPT_ID 1
#define EXT_IO_BASE 0x40000000
#define DRAM_BASE 0x80000000
+#define DEBUG_START 0x0
+#define DEBUG_SIZE 0x1000
#endif
diff --git a/riscv/plic.cc b/riscv/plic.cc
index b6d204b..0310538 100644
--- a/riscv/plic.cc
+++ b/riscv/plic.cc
@@ -436,4 +436,4 @@ plic_t* plic_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, cons
return nullptr;
}
-REGISTER_DEVICE(plic, plic_parse_from_fdt, plic_generate_dts)
+REGISTER_BUILTIN_DEVICE(plic, plic_parse_from_fdt, plic_generate_dts)
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 7f2603a..80a47d9 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -34,7 +34,8 @@ processor_t::processor_t(const char* isa_str, const char* priv_str,
const cfg_t *cfg,
simif_t* sim, uint32_t id, bool halt_on_reset,
FILE* log_file, std::ostream& sout_)
-: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), sim(sim), id(id), xlen(0),
+: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg),
+ sim(sim), id(id), xlen(isa.get_max_xlen()),
histogram_enabled(false), log_commits_enabled(false),
log_file(log_file), sout_(sout_.rdbuf()), halt_on_reset(halt_on_reset),
in_wfi(false), check_triggers_icount(false),
@@ -61,25 +62,22 @@ processor_t::processor_t(const char* isa_str, const char* priv_str,
VU.vlenb = isa.get_vlen() / 8;
VU.vstart_alu = 0;
- register_base_instructions();
- mmu = new mmu_t(sim, cfg->endianness, this);
-
- disassembler = new disassembler_t(&isa);
- for (auto e : isa.get_extensions())
- register_extension(find_extension(e.c_str())());
+ mmu = new mmu_t(sim, cfg->endianness, this, cfg->cache_blocksz);
set_pmp_granularity(cfg->pmpgranularity);
set_pmp_num(cfg->pmpregions);
- if (isa.get_max_xlen() == 32)
- set_mmu_capability(IMPL_MMU_SV32);
- else if (isa.get_max_xlen() == 64)
- set_mmu_capability(IMPL_MMU_SV57);
-
+ set_max_vaddr_bits(0);
set_impl(IMPL_MMU_ASID, true);
set_impl(IMPL_MMU_VMID, true);
reset();
+
+ register_base_instructions();
+
+ disassembler = new disassembler_t(&isa);
+ for (auto e : isa.get_extensions())
+ register_extension(find_extension(e.c_str())());
}
processor_t::~processor_t()
@@ -145,6 +143,7 @@ void processor_t::enable_log_commits()
{
log_commits_enabled = true;
mmu->flush_tlb(); // the TLB caches this setting
+ build_opcode_map();
}
void processor_t::reset()
@@ -214,37 +213,75 @@ void processor_t::set_pmp_granularity(reg_t gran)
lg_pmp_granularity = ctz(gran);
}
-void processor_t::set_mmu_capability(int cap)
+void processor_t::set_max_vaddr_bits(unsigned n)
{
- switch (cap) {
- case IMPL_MMU_SV32:
- set_impl(IMPL_MMU_SV32, true);
- set_impl(IMPL_MMU, true);
+ switch (n) {
+ case 0:
break;
- case IMPL_MMU_SV57:
- set_impl(IMPL_MMU_SV57, true);
- [[fallthrough]];
- case IMPL_MMU_SV48:
- set_impl(IMPL_MMU_SV48, true);
- [[fallthrough]];
- case IMPL_MMU_SV39:
- set_impl(IMPL_MMU_SV39, true);
- set_impl(IMPL_MMU, true);
+ case 32:
+ if (isa.get_max_xlen() != 32)
+ abort();
break;
- default:
- set_impl(IMPL_MMU_SV32, false);
- set_impl(IMPL_MMU_SV39, false);
- set_impl(IMPL_MMU_SV48, false);
- set_impl(IMPL_MMU_SV57, false);
- set_impl(IMPL_MMU, false);
+ case 39:
+ case 48:
+ case 57:
+ if (isa.get_max_xlen() != 64)
+ abort();
break;
+ default:
+ abort();
}
+
+ max_vaddr_bits = n;
+}
+
+reg_t processor_t::select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const
+{
+ // nonstandard interrupts have highest priority
+ if (enabled_interrupts >> (IRQ_LCOF + 1))
+ enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1);
+ // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI
+ else if (enabled_interrupts & MIP_MEIP)
+ enabled_interrupts = MIP_MEIP;
+ else if (enabled_interrupts & MIP_MSIP)
+ enabled_interrupts = MIP_MSIP;
+ else if (enabled_interrupts & MIP_MTIP)
+ enabled_interrupts = MIP_MTIP;
+ else if (enabled_interrupts & MIP_SEIP)
+ enabled_interrupts = MIP_SEIP;
+ else if (enabled_interrupts & MIP_SSIP)
+ enabled_interrupts = MIP_SSIP;
+ else if (enabled_interrupts & MIP_STIP)
+ enabled_interrupts = MIP_STIP;
+ else if (enabled_interrupts & MIP_LCOFIP)
+ enabled_interrupts = MIP_LCOFIP;
+ else if (enabled_interrupts & MIP_VSEIP)
+ enabled_interrupts = MIP_VSEIP;
+ else if (enabled_interrupts & MIP_VSSIP)
+ enabled_interrupts = MIP_VSSIP;
+ else if (enabled_interrupts & MIP_VSTIP)
+ enabled_interrupts = MIP_VSTIP;
+
+ return enabled_interrupts;
}
void processor_t::take_interrupt(reg_t pending_interrupts)
{
+ reg_t s_pending_interrupts = 0;
+ reg_t vstopi = 0;
+ reg_t vs_pending_interrupt = 0;
+
+ if (extension_enabled_const(EXT_SSAIA)) {
+ s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read();
+ vstopi = state.vstopi->read();
+ // Legacy VS interrupts (VSEIP/VSTIP/VSSIP) come in through pending_interrupts but are shifted
+ // down 1 in vstopi. AIA-extended and VTI are not shifted. Clear S bits (VS shifted down by 1).
+ vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0;
+ vs_pending_interrupt &= ~MIP_S_MASK;
+ }
+
// Do nothing if no pending interrupts
- if (!pending_interrupts) {
+ if (!pending_interrupts && !s_pending_interrupts && !vs_pending_interrupt) {
return;
}
@@ -260,46 +297,20 @@ void processor_t::take_interrupt(reg_t pending_interrupts)
const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read();
const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE);
const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie);
- enabled_interrupts = pending_interrupts & deleg_to_hs & -hs_enabled;
+ enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled;
if (state.v && enabled_interrupts == 0) {
// VS-ints have least priority and can only be taken with virt enabled
const reg_t deleg_to_vs = state.hideleg->read();
const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie);
- enabled_interrupts = pending_interrupts & deleg_to_vs & -vs_enabled;
+ enabled_interrupts = ((pending_interrupts & deleg_to_vs) | vs_pending_interrupt) & -vs_enabled;
}
}
const bool nmie = !(state.mnstatus && !get_field(state.mnstatus->read(), MNSTATUS_NMIE));
if (!state.debug_mode && nmie && enabled_interrupts) {
- // nonstandard interrupts have highest priority
- if (enabled_interrupts >> (IRQ_LCOF + 1))
- enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1);
- // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI
- else if (enabled_interrupts & MIP_MEIP)
- enabled_interrupts = MIP_MEIP;
- else if (enabled_interrupts & MIP_MSIP)
- enabled_interrupts = MIP_MSIP;
- else if (enabled_interrupts & MIP_MTIP)
- enabled_interrupts = MIP_MTIP;
- else if (enabled_interrupts & MIP_SEIP)
- enabled_interrupts = MIP_SEIP;
- else if (enabled_interrupts & MIP_SSIP)
- enabled_interrupts = MIP_SSIP;
- else if (enabled_interrupts & MIP_STIP)
- enabled_interrupts = MIP_STIP;
- else if (enabled_interrupts & MIP_LCOFIP)
- enabled_interrupts = MIP_LCOFIP;
- else if (enabled_interrupts & MIP_VSEIP)
- enabled_interrupts = MIP_VSEIP;
- else if (enabled_interrupts & MIP_VSSIP)
- enabled_interrupts = MIP_VSSIP;
- else if (enabled_interrupts & MIP_VSTIP)
- enabled_interrupts = MIP_VSTIP;
- else
- abort();
-
+ reg_t selected_interrupt = select_an_interrupt_with_default_priority(enabled_interrupts);
if (check_triggers_icount) TM.detect_icount_match();
- throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(enabled_interrupts));
+ throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(selected_interrupt));
}
}
@@ -327,7 +338,7 @@ void processor_t::set_privilege(reg_t prv, bool virt)
state.v_changed = state.v != state.prev_v;
}
-const char* processor_t::get_privilege_string()
+const char* processor_t::get_privilege_string() const
{
if (state.debug_mode)
return "D";
@@ -403,7 +414,7 @@ void processor_t::take_trap(trap_t& t, reg_t epc)
bool supv_double_trap = false;
if (interrupt) {
vsdeleg = (curr_virt && state.prv <= PRV_S) ? state.hideleg->read() : 0;
- hsdeleg = (state.prv <= PRV_S) ? state.mideleg->read() : 0;
+ hsdeleg = (state.prv <= PRV_S) ? (state.mideleg->read() | state.nonvirtual_sip->read()) : 0;
bit &= ~((reg_t)1 << (max_xlen - 1));
} else {
vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg->read() & state.hedeleg->read()) : 0;
@@ -420,9 +431,17 @@ void processor_t::take_trap(trap_t& t, reg_t epc)
if (supv_double_trap)
vsdeleg = hsdeleg = 0;
}
- if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) {
+ bool vti = false;
+ if (extension_enabled_const(EXT_SSAIA)) {
+ const reg_t hvictl = state.csrmap[CSR_HVICTL]->read();
+ const reg_t iid = get_field(hvictl, HVICTL_IID);
+ // It is possible that hvictl is injecting VSEIP (10) and hvictl.DPR is causing mip.VSEIP to be picked over VTI.
+ // Check vstopi == hvictl.iid
+ vti = (hvictl & HVICTL_VTI) && iid != IRQ_S_EXT && iid == bit && get_field(state.vstopi->read(), MTOPI_IID) == iid;
+ }
+ if ((state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) || vti) {
// Handle the trap in VS-mode
- const reg_t adjusted_cause = interrupt ? bit - 1 : bit; // VSSIP -> SSIP, etc
+ const reg_t adjusted_cause = interrupt && bit <= IRQ_VS_EXT && !vti ? bit - 1 : bit; // VSSIP -> SSIP, etc;
reg_t vector = (state.vstvec->read() & 1) && interrupt ? 4 * adjusted_cause : 0;
state.pc = (state.vstvec->read() & ~(reg_t)1) + vector;
state.vscause->write(adjusted_cause | (interrupt ? interrupt_bit : 0));
@@ -546,6 +565,14 @@ void processor_t::check_if_lpad_required()
}
}
+reg_t processor_t::set_lpad_expected(reg_t pc)
+{
+ auto p = this;
+ if (ZICFILP_xLPE(state.v, state.prv))
+ state.elp = elp_t::LP_EXPECTED;
+ return pc;
+}
+
void processor_t::disasm(insn_t insn)
{
uint64_t bits = insn.bits();
@@ -581,13 +608,6 @@ void processor_t::disasm(insn_t insn)
}
}
-int processor_t::paddr_bits()
-{
- unsigned max_xlen = isa.get_max_xlen();
- assert(xlen == max_xlen);
- return max_xlen == 64 ? 50 : 34;
-}
-
void processor_t::put_csr(int which, reg_t val)
{
val = zext_xlen(val);
@@ -628,46 +648,55 @@ reg_t illegal_instruction(processor_t UNUSED *p, insn_t insn, reg_t UNUSED pc)
throw trap_illegal_instruction(insn.bits() & 0xffffffffULL);
}
-insn_func_t processor_t::decode_insn(insn_t insn)
+reg_t processor_t::throw_instruction_address_misaligned(reg_t pc)
{
- // look up opcode in hash table
- size_t idx = insn.bits() % OPCODE_CACHE_SIZE;
- auto [hit, desc] = opcode_cache[idx].lookup(insn.bits());
+ throw trap_instruction_address_misaligned(state.v, pc, 0, 0);
+}
- bool rve = extension_enabled('E');
+insn_func_t processor_t::decode_insn(insn_t insn)
+{
+ const auto& pool = opcode_map[insn.bits() % std::size(opcode_map)];
- if (unlikely(!hit)) {
- // fall back to linear search
- auto matching = [insn_bits = insn.bits()](const insn_desc_t &d) {
- return (insn_bits & d.mask) == d.match;
- };
- auto p = std::find_if(custom_instructions.begin(),
- custom_instructions.end(), matching);
- if (p == custom_instructions.end()) {
- p = std::find_if(instructions.begin(), instructions.end(), matching);
- assert(p != instructions.end());
+ for (auto p = pool.begin(); ; ++p) {
+ if ((insn.bits() & p->mask) == p->match) {
+ return p->func;
}
- desc = &*p;
- opcode_cache[idx].replace(insn.bits(), desc);
}
-
- return desc->func(xlen, rve, log_commits_enabled);
}
-void processor_t::register_insn(insn_desc_t desc, bool is_custom) {
+void processor_t::register_insn(insn_desc_t desc, std::vector<insn_desc_t>& pool) {
assert(desc.fast_rv32i && desc.fast_rv64i && desc.fast_rv32e && desc.fast_rv64e &&
desc.logged_rv32i && desc.logged_rv64i && desc.logged_rv32e && desc.logged_rv64e);
- if (is_custom)
- custom_instructions.push_back(desc);
- else
- instructions.push_back(desc);
+ pool.push_back(desc);
}
void processor_t::build_opcode_map()
{
- for (size_t i = 0; i < OPCODE_CACHE_SIZE; i++)
- opcode_cache[i].reset();
+ bool rve = extension_enabled('E');
+ bool zca = extension_enabled(EXT_ZCA);
+ const size_t N = std::size(opcode_map);
+
+ auto build_one = [&](const insn_desc_t& desc) {
+ auto func = desc.func(xlen, rve, log_commits_enabled);
+ if (!zca && insn_length(desc.match) % 4)
+ func = &::illegal_instruction;
+
+ auto stride = std::min(N, size_t(1) << ctz(~desc.mask));
+ for (size_t i = desc.match & (stride - 1); i < N; i += stride) {
+ if ((desc.match % N) == (i & desc.mask))
+ opcode_map[i].push_back({desc.match, desc.mask, func});
+ }
+ };
+
+ for (auto& p : opcode_map)
+ p.clear();
+
+ for (auto& d : custom_instructions)
+ build_one(d);
+
+ for (auto& d : instructions)
+ build_one(d);
}
void processor_t::register_extension(extension_t *x) {
diff --git a/riscv/processor.h b/riscv/processor.h
index 6b611d7..18ac08f 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -61,6 +61,13 @@ struct insn_desc_t
static const insn_desc_t illegal_instruction;
};
+struct opcode_map_entry_t
+{
+ insn_bits_t match;
+ insn_bits_t mask;
+ insn_func_t func;
+};
+
// regnum, data
typedef std::map<reg_t, freg_t> commit_log_reg_t;
@@ -70,6 +77,7 @@ typedef std::vector<std::tuple<reg_t, uint64_t, uint8_t>> commit_log_mem_t;
// architectural state of a RISC-V hart
struct state_t
{
+ void add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg);
void reset(processor_t* const proc, reg_t max_isa);
void add_csr(reg_t addr, const csr_t_p& csr);
@@ -96,6 +104,8 @@ struct state_t
wide_counter_csr_t_p mcycle;
mie_csr_t_p mie;
mip_csr_t_p mip;
+ csr_t_p nonvirtual_sip;
+ csr_t_p nonvirtual_sie;
csr_t_p medeleg;
csr_t_p mideleg;
csr_t_p mcounteren;
@@ -149,6 +159,7 @@ struct state_t
bool debug_mode;
mseccfg_csr_t_p mseccfg;
+ csr_t_p mseccfgh;
static const int max_pmp = 64;
pmpaddr_csr_t_p pmpaddr[max_pmp];
@@ -173,6 +184,11 @@ struct state_t
csr_t_p ssp;
+ csr_t_p mvien;
+ mvip_csr_t_p mvip;
+ csr_t_p hvictl;
+ csr_t_p vstopi;
+
bool serialized; // whether timer CSRs are in a well-defined state
// When true, execute a single instruction and then enter debug mode. This
@@ -198,47 +214,6 @@ struct state_t
void csr_init(processor_t* const proc, reg_t max_isa);
};
-class opcode_cache_entry_t {
- public:
- opcode_cache_entry_t()
- {
- reset();
- }
-
- void reset()
- {
- for (size_t i = 0; i < associativity; i++) {
- tag[i] = 0;
- contents[i] = &insn_desc_t::illegal_instruction;
- }
- }
-
- void replace(insn_bits_t opcode, const insn_desc_t* desc)
- {
- for (size_t i = associativity - 1; i > 0; i--) {
- tag[i] = tag[i-1];
- contents[i] = contents[i-1];
- }
-
- tag[0] = opcode;
- contents[0] = desc;
- }
-
- std::tuple<bool, const insn_desc_t*> lookup(insn_bits_t opcode)
- {
- for (size_t i = 0; i < associativity; i++)
- if (tag[i] == opcode)
- return std::tuple(true, contents[i]);
-
- return std::tuple(false, nullptr);
- }
-
- private:
- static const size_t associativity = 4;
- insn_bits_t tag[associativity];
- const insn_desc_t* contents[associativity];
-};
-
// this class represents one processor in a RISC-V machine.
class processor_t : public abstract_device_t
{
@@ -249,8 +224,8 @@ public:
FILE *log_file, std::ostream& sout_); // because of command line option --log and -s we need both
~processor_t();
- const isa_parser_t &get_isa() { return isa; }
- const cfg_t &get_cfg() { return *cfg; }
+ const isa_parser_t &get_isa() const & { return isa; }
+ const cfg_t &get_cfg() const & { return *cfg; }
void set_debug(bool value);
void set_histogram(bool value);
@@ -265,6 +240,7 @@ public:
mmu_t* get_mmu() { return mmu; }
state_t* get_state() { return &state; }
unsigned get_xlen() const { return xlen; }
+ unsigned paddr_bits() { return isa.get_max_xlen() == 64 ? 56 : 34; }
unsigned get_const_xlen() const {
// Any code that assumes a const xlen should use this method to
// document that assumption. If Spike ever changes to allow
@@ -314,6 +290,9 @@ public:
extension_enable_table[ext] = enable && isa.extension_enabled(ext);
}
void set_impl(uint8_t impl, bool val) { impl_table[impl] = val; }
+ bool has_mmu() const { return max_vaddr_bits != 0; }
+ unsigned get_max_vaddr_bits() const { return max_vaddr_bits; }
+ void set_max_vaddr_bits(unsigned);
bool supports_impl(uint8_t impl) const {
return impl_table[impl];
}
@@ -321,25 +300,23 @@ public:
const int ialign = extension_enabled(EXT_ZCA) ? 16 : 32;
return ~(reg_t)(ialign == 16 ? 0 : 2);
}
- void check_pc_alignment(reg_t pc) {
- if (unlikely(pc & ~pc_alignment_mask()))
- throw trap_instruction_address_misaligned(state.v, pc, 0, 0);
- }
+ reg_t throw_instruction_address_misaligned(reg_t pc);
reg_t legalize_privilege(reg_t);
void set_privilege(reg_t, bool);
- const char* get_privilege_string();
+ const char* get_privilege_string() const;
void update_histogram(reg_t pc);
const disassembler_t* get_disassembler() { return disassembler; }
FILE *get_log_file() { return log_file; }
void register_base_insn(insn_desc_t insn) {
- register_insn(insn, false /* is_custom */);
+ register_insn(insn, instructions);
}
void register_custom_insn(insn_desc_t insn) {
- register_insn(insn, true /* is_custom */);
+ register_insn(insn, custom_instructions);
}
void register_extension(extension_t*);
+ void build_opcode_map();
// MMIO slave interface
bool load(reg_t addr, size_t len, uint8_t* bytes) override;
@@ -349,8 +326,8 @@ public:
// When true, display disassembly of each instruction that's executed.
bool debug;
// When true, take the slow simulation path.
- bool slow_path();
- bool halted() { return state.debug_mode; }
+ bool slow_path() const;
+ bool halted() const { return state.debug_mode; }
enum {
HR_NONE, /* Halt request is inactive. */
HR_REGULAR, /* Regular halt request/debug interrupt. */
@@ -369,6 +346,9 @@ public:
bool is_waiting_for_interrupt() { return in_wfi; };
void check_if_lpad_required();
+ reg_t set_lpad_expected(reg_t pc);
+
+ reg_t select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const;
private:
const isa_parser_t isa;
@@ -381,6 +361,7 @@ private:
state_t state;
uint32_t id;
unsigned xlen;
+ unsigned max_vaddr_bits;
bool histogram_enabled;
bool log_commits_enabled;
FILE *log_file;
@@ -395,20 +376,17 @@ private:
std::bitset<NUM_ISA_EXTENSIONS> extension_dynamic;
mutable std::bitset<NUM_ISA_EXTENSIONS> extension_assumed_const;
+ std::vector<opcode_map_entry_t> opcode_map[128];
std::vector<insn_desc_t> instructions;
std::vector<insn_desc_t> custom_instructions;
std::unordered_map<reg_t,uint64_t> pc_histogram;
- static const size_t OPCODE_CACHE_SIZE = 4095;
- opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE];
-
void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); }
void take_interrupt(reg_t mask); // take first enabled interrupt in mask
void take_trap(trap_t& t, reg_t epc); // take an exception
void take_trigger_action(triggers::action_t action, reg_t breakpoint_tval, reg_t epc, bool virt);
void disasm(insn_t insn); // disassemble and print an instruction
- void register_insn(insn_desc_t, bool);
- int paddr_bits();
+ void register_insn(insn_desc_t, std::vector<insn_desc_t>& pool);
void enter_debug_mode(uint8_t cause, uint8_t ext_cause);
@@ -420,7 +398,6 @@ private:
friend class extension_t;
void parse_priv_string(const char*);
- void build_opcode_map();
void register_base_instructions();
insn_func_t decode_insn(insn_t insn);
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index 7fd9890..8df8739 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -18,6 +18,7 @@ riscv_install_prog_srcs = \
riscv_install_hdrs = \
abstract_device.h \
abstract_interrupt_controller.h \
+ bloom_filter.h \
cachesim.h \
cfg.h \
common.h \
@@ -33,11 +34,13 @@ riscv_install_hdrs = \
entropy_source.h \
extension.h \
isa_parser.h \
+ jtag_dtm.h \
log_file.h \
memtracer.h \
mmu.h \
platform.h \
processor.h \
+ remote_bitbang.h \
rocc.h \
sim.h \
simif.h \
@@ -958,6 +961,14 @@ riscv_insn_ext_zicond = \
czero_eqz \
czero_nez \
+riscv_insn_ext_zvfofp4min = \
+ vfext_vf2 \
+
+riscv_insn_ext_zvfofp8min = \
+ vfncvt_f_f_q \
+ vfncvt_sat_f_f_q \
+ vfncvtbf16_sat_f_f_w \
+
riscv_insn_ext_zfbfmin = \
fcvt_bf16_s \
fcvt_s_bf16 \
@@ -1072,10 +1083,29 @@ riscv_insn_ext_zvksh = \
vsm3c_vi \
vsm3me_vv \
+riscv_insn_ext_zvbdot = \
+ vqbdotu_vv \
+ vqbdots_vv \
+ vfwbdot_vv \
+ vfbdot_vv \
+ vfqbdot_vv \
+ vfqbdot_alt_vv \
+
+riscv_insn_ext_zvldot = \
+ vqldotu_vv \
+ vqldots_vv \
+ vfwldot_vv \
+ vfqldot_vv \
+ vfqldot_alt_vv \
+
riscv_insn_ext_zimop = \
mop_r_N \
mop_rr_N \
+riscv_insn_ext_zibi = \
+ beqi \
+ bnei \
+
riscv_insn_ext_zcmop = \
c_mop_N \
@@ -1111,13 +1141,15 @@ riscv_insn_list = \
$(riscv_insn_ext_b) \
$(riscv_insn_ext_a) \
$(if $(HAVE_INT128),$(riscv_insn_ext_v),) \
+ $(riscv_insn_ext_zvfofp4min) \
+ $(riscv_insn_ext_zvfofp8min) \
$(riscv_insn_ext_bf16) \
$(riscv_insn_ext_cmo) \
$(riscv_insn_ext_d_zfa) \
$(riscv_insn_ext_f_zfa) \
$(riscv_insn_ext_h) \
$(riscv_insn_ext_k) \
- $(riscv_insn_ext_q) \
+ $(if $(HAVE_INT128),$(riscv_insn_ext_q),) \
$(riscv_insn_ext_q_zfa) \
$(riscv_insn_ext_zacas) \
$(riscv_insn_ext_zabha) \
@@ -1128,9 +1160,12 @@ riscv_insn_list = \
$(riscv_insn_ext_zfh_zfa) \
$(riscv_insn_ext_zicond) \
$(riscv_insn_ext_zvk) \
+ $(riscv_insn_ext_zvbdot) \
+ $(riscv_insn_ext_zvldot) \
$(riscv_insn_priv) \
$(riscv_insn_smrnmi) \
$(riscv_insn_svinval) \
+ $(riscv_insn_ext_zibi) \
$(riscv_insn_ext_zimop) \
$(riscv_insn_ext_zcmop) \
$(riscv_insn_ext_zicfilp) \
diff --git a/riscv/sim.cc b/riscv/sim.cc
index fd1c6fb..4eb5ed0 100644
--- a/riscv/sim.cc
+++ b/riscv/sim.cc
@@ -88,7 +88,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
#ifndef RISCV_ENABLE_DUAL_ENDIAN
if (cfg->endianness != endianness_little) {
- fputs("Big-endian support has not been prroperly enabled; "
+ fputs("Big-endian support has not been properly enabled; "
"please rebuild the riscv-isa-sim project using "
"\"configure --enable-dual-endian\".\n",
stderr);
@@ -96,7 +96,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
}
#endif
- debug_mmu = new mmu_t(this, cfg->endianness, NULL);
+ debug_mmu = new mmu_t(this, cfg->endianness, NULL, cfg->cache_blocksz);
// When running without using a dtb, skip the fdt-based configuration steps
if (!dtb_enabled) {
@@ -118,8 +118,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
// particular, the default device tree configuration that you get without
// setting the dtb_file argument has one.
std::vector<device_factory_sargs_t> device_factories = {
- {clint_factory, {}}, // clint must be element 0
- {plic_factory, {}}, // plic must be element 1
+ {clint_factory, {}},
+ {plic_factory, {}},
{ns16550_factory, {}}};
device_factories.insert(device_factories.end(),
plugin_device_factories.begin(),
@@ -214,16 +214,16 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
// handle mmu-type
const char *mmu_type;
rc = fdt_parse_mmu_type(fdt, cpu_offset, &mmu_type);
+ procs[cpu_idx]->set_max_vaddr_bits(0);
if (rc == 0) {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SBARE);
if (strncmp(mmu_type, "riscv,sv32", strlen("riscv,sv32")) == 0) {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV32);
+ procs[cpu_idx]->set_max_vaddr_bits(32);
} else if (strncmp(mmu_type, "riscv,sv39", strlen("riscv,sv39")) == 0) {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV39);
+ procs[cpu_idx]->set_max_vaddr_bits(39);
} else if (strncmp(mmu_type, "riscv,sv48", strlen("riscv,sv48")) == 0) {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV48);
+ procs[cpu_idx]->set_max_vaddr_bits(48);
} else if (strncmp(mmu_type, "riscv,sv57", strlen("riscv,sv57")) == 0) {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV57);
+ procs[cpu_idx]->set_max_vaddr_bits(57);
} else if (strncmp(mmu_type, "riscv,sbare", strlen("riscv,sbare")) == 0) {
// has been set in the beginning
} else {
@@ -233,8 +233,6 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
<< mmu_type << ").\n";
exit(1);
}
- } else {
- procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SBARE);
}
procs[cpu_idx]->reset();
@@ -253,10 +251,15 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
std::shared_ptr<abstract_device_t> dev_ptr(device);
add_device(device_base, dev_ptr);
- if (i == 0) // clint_factory
+ if (dynamic_cast<clint_t*>(&*dev_ptr)) {
+ assert(!clint);
clint = std::static_pointer_cast<clint_t>(dev_ptr);
- else if (i == 1) // plic_factory
+ }
+
+ if (dynamic_cast<plic_t*>(&*dev_ptr)) {
+ assert(!plic);
plic = std::static_pointer_cast<plic_t>(dev_ptr);
+ }
}
}
}
@@ -273,7 +276,7 @@ int sim_t::run()
if (!debug && log)
set_procs_debug(true);
- htif_t::set_expected_xlen(harts[0]->get_isa().get_max_xlen());
+ htif_t::set_expected_xlen(harts.begin()->second->get_isa().get_max_xlen());
// htif_t::run() will repeatedly call back into sim_t::idle(), each
// invocation of which will advance target time
@@ -337,22 +340,16 @@ void sim_t::set_procs_debug(bool value)
procs[i]->set_debug(value);
}
-static bool paddr_ok(reg_t addr)
-{
- static_assert(MAX_PADDR_BITS == 8 * sizeof(addr));
- return true;
-}
-
bool sim_t::mmio_load(reg_t paddr, size_t len, uint8_t* bytes)
{
- if (paddr + len < paddr || !paddr_ok(paddr + len - 1))
+ if (paddr + len < paddr)
return false;
return bus.load(paddr, len, bytes);
}
bool sim_t::mmio_store(reg_t paddr, size_t len, const uint8_t* bytes)
{
- if (paddr + len < paddr || !paddr_ok(paddr + len - 1))
+ if (paddr + len < paddr)
return false;
return bus.store(paddr, len, bytes);
}
@@ -403,12 +400,20 @@ void sim_t::set_rom()
}
char* sim_t::addr_to_mem(reg_t paddr) {
- if (!paddr_ok(paddr))
- return NULL;
- auto desc = bus.find_device(paddr >> PGSHIFT << PGSHIFT, PGSIZE);
- if (auto mem = dynamic_cast<abstract_mem_t*>(desc.second))
- return mem->contents(paddr - desc.first);
- return NULL;
+ auto page_offset = paddr % PGSIZE;
+ auto page_addr = paddr - page_offset;
+
+ if (auto it = addr_to_mem_cache.find(page_addr); it != addr_to_mem_cache.end())
+ return it->second + page_offset;
+
+ auto desc = bus.find_device(page_addr, PGSIZE);
+ if (auto mem = dynamic_cast<abstract_mem_t*>(desc.second)) {
+ auto res = mem->contents(page_addr - desc.first);
+ addr_to_mem_cache.insert({page_addr, res});
+ return res + page_offset;
+ }
+
+ return nullptr;
}
const char* sim_t::get_symbol(uint64_t paddr)
diff --git a/riscv/sim.h b/riscv/sim.h
index da04a88..8a96395 100644
--- a/riscv/sim.h
+++ b/riscv/sim.h
@@ -13,6 +13,7 @@
#include <fesvr/htif.h>
#include <vector>
#include <map>
+#include <unordered_map>
#include <string>
#include <memory>
#include <sys/types.h>
@@ -73,6 +74,7 @@ private:
std::vector<std::pair<reg_t, abstract_mem_t*>> mems;
std::vector<processor_t*> procs;
std::map<size_t, processor_t*> harts;
+ std::unordered_map<reg_t, char*> addr_to_mem_cache;
std::pair<reg_t, reg_t> initrd_range;
std::string dts;
std::string dtb;
diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h
index b6a4b92..7f5256c 100644
--- a/riscv/v_ext_macros.h
+++ b/riscv/v_ext_macros.h
@@ -4,6 +4,8 @@
#define _RISCV_V_EXT_MACROS_H
#include "vector_unit.h"
+#include "zvbdot.h"
+#include <functional>
//
// vector: masking skip helper
@@ -58,11 +60,23 @@ static inline bool is_overlapped_widen(const int astart, int asize,
}
}
-#define VI_NARROW_CHECK_COMMON \
+#define VI_NON_ALTFMT_INSN \
+ require(P.VU.altfmt == 0); \
+
+#define require_zvfbfa \
+ require(P.VU.altfmt == 0 || p->extension_enabled(EXT_ZVFBFA)); \
+
+#define require_zvfbfa_or_zvfh \
+ require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFH); \
+
+#define require_zvfbfa_or_zvfhmin \
+ require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFHMIN); \
+
+#define VI_NARROW_CHECK_COMMON(factor) \
require_vector(true); \
- require(P.VU.vflmul <= 4); \
- require(P.VU.vsew * 2 <= P.VU.ELEN); \
- require_align(insn.rs2(), P.VU.vflmul * 2); \
+ require(P.VU.vflmul <= (8 / factor)); \
+ require(P.VU.vsew * factor <= P.VU.ELEN); \
+ require_align(insn.rs2(), P.VU.vflmul * factor); \
require_align(insn.rd(), P.VU.vflmul); \
require_vm; \
@@ -75,7 +89,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_CHECK_ST_INDEX(elt_width) \
require_vector(false); \
- require(elt_width <= P.VU.ELEN); \
+ require(elt_width <= std::min(P.VU.ELEN, (reg_t)P.get_xlen())); \
float vemul = ((float)elt_width / P.VU.vsew * P.VU.vflmul); \
require(vemul >= 0.125 && vemul <= 8); \
reg_t UNUSED emul = vemul < 1 ? 1 : vemul; \
@@ -171,12 +185,19 @@ static inline bool is_overlapped_widen(const int astart, int asize,
}
#define VI_CHECK_SDS(is_vs1) \
- VI_NARROW_CHECK_COMMON; \
+ VI_NARROW_CHECK_COMMON(2); \
if (insn.rd() != insn.rs2()) \
require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \
if (is_vs1) \
require_align(insn.rs1(), P.VU.vflmul); \
+#define VI_CHECK_SQS(is_vs1) \
+ VI_NARROW_CHECK_COMMON(4); \
+ if (insn.rd() != insn.rs2()) \
+ require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 4); \
+ if (is_vs1) \
+ require_align(insn.rs1(), P.VU.vflmul); \
+
#define VI_CHECK_REDUCTION(is_wide) \
require_vector(true); \
if (is_wide) { \
@@ -200,7 +221,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
require_vector(true); \
reg_t vl = P.VU.vl->read(); \
reg_t UNUSED sew = P.VU.vsew; \
- reg_t rd_num = insn.rd(); \
+ reg_t UNUSED rd_num = insn.rd(); \
reg_t UNUSED rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
@@ -336,7 +357,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_PARAMS(x) \
type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
- type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
+ type_sew_t<x>::type UNUSED simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
#define XV_PARAMS(x) \
@@ -435,7 +456,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VFP_VV_CMP_PARAMS(width)
#define VFP_VF_CMP_PARAMS(width) \
- float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \
+ float##width##_t rs1 = f##width(READ_FREG(rs1_num), P.VU.altfmt); \
float##width##_t UNUSED vs2 = P.VU.elt<float##width##_t>(rs2_num, i);
#define VFP_VF_PARAMS(width) \
@@ -545,7 +566,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VX_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VI_MERGE_LOOP(BODY) \
VI_CHECK_SSS(false); \
@@ -661,7 +682,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VV_U_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VV_LOOP(BODY) \
VI_CHECK_SSS(true) \
@@ -679,7 +700,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VV_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_V_ULOOP(BODY) \
VI_CHECK_SSS(false) \
@@ -715,7 +736,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VX_U_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VX_LOOP(BODY) \
VI_CHECK_SSS(false) \
@@ -733,7 +754,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VX_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VI_ULOOP(BODY) \
VI_CHECK_SSS(false) \
@@ -751,7 +772,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VI_U_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VI_LOOP(BODY) \
VI_CHECK_SSS(false) \
@@ -769,7 +790,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VI_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
// signed unsigned operation loop (e.g. mulhsu)
#define VI_VV_SU_LOOP(BODY) \
@@ -788,7 +809,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VV_SU_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
#define VI_VX_SU_LOOP(BODY) \
VI_CHECK_SSS(false) \
@@ -806,7 +827,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
VX_SU_PARAMS(e64); \
BODY; \
} \
- VI_LOOP_END
+ VI_LOOP_END
// narrow operation loop
#define VI_VV_LOOP_NARROW(BODY) \
@@ -1381,12 +1402,9 @@ VI_VX_ULOOP({ \
} \
P.VU.vstart->write(0);
-// vector: sign/unsiged extension
-#define VI_VV_EXT(div, type) \
+#define VI_EXT_CHECK(div) \
require(insn.rd() != insn.rs2()); \
require_vm; \
- reg_t from = P.VU.vsew / div; \
- require(from >= e8 && from <= e64); \
require(((float)P.VU.vflmul / div) >= 0.125 && ((float)P.VU.vflmul / div) <= 8 ); \
require_align(insn.rd(), P.VU.vflmul); \
require_align(insn.rs2(), P.VU.vflmul / div); \
@@ -1394,10 +1412,15 @@ VI_VX_ULOOP({ \
require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \
} else { \
require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \
- } \
+ }
+
+// vector: sign/unsiged extension
+#define VI_VV_EXT(div, type) \
+ reg_t from = P.VU.vsew / div; \
+ require(from >= e8 && from <= e64); \
+ VI_EXT_CHECK(div); \
+ VI_LOOP_BASE \
reg_t pat = (((P.VU.vsew >> 3) << 4) | from >> 3); \
- VI_GENERAL_LOOP_BASE \
- VI_LOOP_ELEMENT_SKIP(); \
switch (pat) { \
case 0x21: \
P.VU.elt<type##16_t>(rd_num, i, true) = P.VU.elt<type##8_t>(rs2_num, i); \
@@ -1420,34 +1443,37 @@ VI_VX_ULOOP({ \
default: \
break; \
} \
- VI_LOOP_END
+ VI_LOOP_END
//
// vector: vfp helper
//
-#define VI_VFP_COMMON \
+#define VI_VFP_BASE \
require_fp; \
- require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) || \
- (P.VU.vsew == e32 && p->get_isa().get_zvf()) || \
- (P.VU.vsew == e64 && p->get_isa().get_zvd())); \
require_vector(true); \
- require(STATE.frm->read() < 0x5); \
reg_t UNUSED vl = P.VU.vl->read(); \
reg_t UNUSED rd_num = insn.rd(); \
reg_t UNUSED rs1_num = insn.rs1(); \
reg_t UNUSED rs2_num = insn.rs2(); \
- softfloat_roundingMode = STATE.frm->read();
+ softfloat_roundingMode = VFP_RM
+
+#define VI_VFP_COMMON \
+ VI_VFP_BASE; \
+ require((P.VU.vsew == e16 && p->extension_enabled(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFH)) || \
+ (P.VU.vsew == e32 && p->get_isa().get_zvf()) || \
+ (P.VU.vsew == e64 && p->get_isa().get_zvd())); \
+
+// for now only support the divisor of two
+#define VI_VF_EXT(div, BODY) \
+ require(div == 2 && P.VU.vsew == 8); \
+ VI_EXT_CHECK(div); \
+ VI_LOOP_BASE \
+ BODY; \
+ VI_LOOP_END
#define VI_VFP_BF16_COMMON \
- require_fp; \
+ VI_VFP_BASE; \
require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFBFWMA))); \
- require_vector(true); \
- require(STATE.frm->read() < 0x5); \
- reg_t UNUSED vl = P.VU.vl->read(); \
- reg_t UNUSED rd_num = insn.rd(); \
- reg_t UNUSED rs1_num = insn.rs1(); \
- reg_t UNUSED rs2_num = insn.rs2(); \
- softfloat_roundingMode = STATE.frm->read();
#define VI_VFP_LOOP_BASE \
VI_VFP_COMMON \
@@ -1608,6 +1634,7 @@ VI_VX_ULOOP({ \
#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \
VI_CHECK_REDUCTION(false) \
+ VI_NON_ALTFMT_INSN \
VI_VFP_COMMON \
switch (P.VU.vsew) { \
case e16: { \
@@ -1638,6 +1665,7 @@ VI_VX_ULOOP({ \
#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \
VI_CHECK_REDUCTION(true) \
+ VI_NON_ALTFMT_INSN \
VI_VFP_COMMON \
require((P.VU.vsew == e16 && p->get_isa().get_zvf()) || \
(P.VU.vsew == e32 && p->get_isa().get_zvd())); \
@@ -1670,6 +1698,12 @@ VI_VX_ULOOP({ \
break; \
}; \
+#define VFP_OP_16(op, rs1, vs2) \
+ (P.VU.altfmt ? bf16_##op(rs1, vs2) : f16_##op(rs1, vs2))
+
+#define VFP_MULADD_16(rs1, vs2, vd) \
+ (P.VU.altfmt ? bf16_mulAdd(rs1, vs2, vd) : f16_mulAdd(rs1, vs2, vd))
+
#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(false); \
VI_VFP_LOOP_BASE \
@@ -1732,6 +1766,7 @@ VI_VX_ULOOP({ \
VI_VFP_LOOP_CMP_BASE \
switch (P.VU.vsew) { \
case e16: { \
+ require_zvfbfa_or_zvfh; \
VFP_VF_CMP_PARAMS(16); \
BODY16; \
set_fp_exceptions; \
@@ -1755,14 +1790,24 @@ VI_VX_ULOOP({ \
}; \
VI_VFP_LOOP_CMP_END \
+#define VI_CHECK_VFP_WIDE \
+ if (P.VU.vsew == e16) \
+ require(p->get_isa().get_zvf()); \
+ else if (P.VU.vsew == e32) \
+ require(p->get_isa().get_zvd()); \
+
#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DSS(false); \
+ VI_CHECK_VFP_WIDE \
VI_VFP_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
+ require_zvfbfa_or_zvfh; \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
- float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
- float32_t rs1 = f16_to_f32(FRS1_H); \
+ float32_t vs2 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)) \
+ : f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
+ float32_t rs1 = P.VU.altfmt ? bf16_to_f32(FRS1_BF) \
+ : f16_to_f32(FRS1_H); \
BODY16; \
set_fp_exceptions; \
break; \
@@ -1803,12 +1848,16 @@ VI_VX_ULOOP({ \
#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DSS(true); \
+ VI_CHECK_VFP_WIDE \
VI_VFP_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
+ require_zvfbfa; \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
- float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
- float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
+ float32_t vs2 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<float16_t>(rs2_num, i)) \
+ : f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
+ float32_t vs1 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<float16_t>(rs1_num, i)) \
+ : f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
BODY16; \
set_fp_exceptions; \
break; \
@@ -1849,12 +1898,15 @@ VI_VX_ULOOP({ \
#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DDS(false); \
+ VI_CHECK_VFP_WIDE \
VI_VFP_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
+ require_zvfbfa; \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
- float32_t rs1 = f16_to_f32(FRS1_H); \
+ float32_t rs1 = P.VU.altfmt ? bf16_to_f32(FRS1_BF) \
+ : f16_to_f32(FRS1_H); \
BODY16; \
set_fp_exceptions; \
break; \
@@ -1875,12 +1927,14 @@ VI_VX_ULOOP({ \
#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DDS(true); \
+ VI_CHECK_VFP_WIDE \
VI_VFP_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
- float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
+ float32_t vs1 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<bfloat16_t>(rs1_num, i)) \
+ : f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
BODY16; \
set_fp_exceptions; \
break; \
@@ -1900,14 +1954,7 @@ VI_VX_ULOOP({ \
VI_VFP_LOOP_END
#define VI_VFP_LOOP_SCALE_BASE \
- require_fp; \
- require_vector(true); \
- require(STATE.frm->read() < 0x5); \
- reg_t vl = P.VU.vl->read(); \
- reg_t rd_num = insn.rd(); \
- reg_t UNUSED rs1_num = insn.rs1(); \
- reg_t rs2_num = insn.rs2(); \
- softfloat_roundingMode = STATE.frm->read(); \
+ VI_VFP_BASE; \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
@@ -1921,6 +1968,7 @@ VI_VX_ULOOP({ \
#define VI_VFP_CVT_INT_TO_FP(BODY16, BODY32, BODY64, sign) \
VI_CHECK_SSS(false); \
+ VI_NON_ALTFMT_INSN \
VI_VFP_COMMON \
switch (P.VU.vsew) { \
case e16: \
@@ -1945,6 +1993,7 @@ VI_VX_ULOOP({ \
#define VI_VFP_CVT_FP_TO_INT(BODY16, BODY32, BODY64, sign) \
VI_CHECK_SSS(false); \
+ VI_NON_ALTFMT_INSN \
VI_VFP_COMMON \
switch (P.VU.vsew) { \
case e16: \
@@ -1982,17 +2031,32 @@ VI_VX_ULOOP({ \
break; \
}
-#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \
+// FIXME
+#define VI_VFP_WCVT_OFP8_BF16_FP(BODY8, BODY16, CHECK8, CHECK16) \
VI_CHECK_DSS(false); \
switch (P.VU.vsew) { \
+ case e8: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(8, 16), CHECK8, BODY8); } \
+ break; \
case e16: \
- { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK16, BODY16); } \
break; \
default: \
require(0); \
break; \
}
+#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \
+VI_CHECK_DSS(false); \
+switch (P.VU.vsew) { \
+case e16: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \
+ break; \
+default: \
+ require(0); \
+ break; \
+}
+
#define VI_VFP_WCVT_INT_TO_FP(BODY8, BODY16, BODY32, \
CHECK8, CHECK16, CHECK32, \
sign) \
@@ -2043,6 +2107,42 @@ VI_VX_ULOOP({ \
break; \
}
+#define VI_VFP_NCVT_FP_TO_OFP8(BODY, CHECK) \
+ VI_CHECK_SQS(false); \
+ switch (P.VU.vsew) { \
+ case e8: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 8), CHECK, BODY); } \
+ break; \
+ default: \
+ require(0); \
+ break; \
+ }
+
+#define VI_VFP_NCVT_FP_BF16_OFP8(BODY16, BODY32, CHECK16, CHECK32) \
+ VI_CHECK_SDS(false); \
+ switch (P.VU.vsew) { \
+ case e8: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 8), CHECK16, BODY16); } \
+ break; \
+ case e16: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK32, BODY32); } \
+ break; \
+ default: \
+ require(0); \
+ break; \
+ }
+
+#define VI_VFP_NCVT_SAT_BF16_TO_OFP8(BODY, CHECK) \
+ VI_CHECK_SDS(false); \
+ switch (P.VU.vsew) { \
+ case e8: \
+ { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 8), CHECK, BODY); } \
+ break; \
+ default: \
+ require(0); \
+ break; \
+ }
+
#define VI_VFP_NCVT_BF16_TO_FP(BODY, CHECK) \
VI_CHECK_SDS(false); \
switch (P.VU.vsew) { \
@@ -2089,6 +2189,78 @@ VI_VX_ULOOP({ \
break; \
}
+#define ZVLDOT_INIT(widen) \
+ require_vector(true); \
+ require(P.VU.vstart->read() == 0); \
+ require_align(insn.rs1(), P.VU.vflmul); \
+ require_align(insn.rs2(), P.VU.vflmul); \
+ require_vm; \
+ require_noover(insn.rd(), 1, insn.rs1(), P.VU.vflmul); \
+ require_noover(insn.rd(), 1, insn.rs2(), P.VU.vflmul)
+
+#define ZVBDOT_INIT(widen) \
+ require_vector(true); \
+ unsigned vd_eew = P.VU.vsew * (widen); \
+ unsigned vd_emul = std::max(1U, unsigned((8 * vd_eew) / P.VU.VLEN)); \
+ unsigned vs2 = insn.rs2() & ~7; \
+ unsigned ci = (insn.rs2() & 7) * 8; \
+ require(P.VU.vstart->read() == 0); \
+ require(P.VU.vflmul == 1); \
+ require(ci * vd_eew < P.VU.VLEN); \
+ require_align(insn.rd(), vd_emul); \
+ require_vm; \
+ require_noover(insn.rd(), vd_emul, insn.rs1(), 1); \
+ require_noover(insn.rd(), vd_emul, vs2, 8)
+
+template<typename a_t, typename b_t, typename c_t>
+c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_t c, std::function<c_t(a_t, b_t, c_t)> macc)
+{
+ for (size_t i = 0; i < a.size(); i++)
+ c = macc(a[i], b[i], c);
+ return c;
+}
+
+#define ZVLDOT_LOOP(a_t, b_t, c_t, dot) \
+ std::vector<a_t> a(P.VU.vl->read(), a_t()); \
+ std::vector<b_t> b(P.VU.vl->read(), b_t()); \
+ for (reg_t i = 0; i < a.size(); i++) { \
+ VI_LOOP_ELEMENT_SKIP(); \
+ a[i] = P.VU.elt<a_t>(insn.rs1(), i); \
+ b[i] = P.VU.elt<b_t>(insn.rs2(), i); \
+ } \
+ auto& acc = P.VU.elt<c_t>(insn.rd(), 0, true); \
+ acc = dot(a, b, acc)
+
+#define ZVLDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) \
+ auto dot = std::bind(generic_dot_product<a_t, b_t, c_t>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, macc); \
+ ZVLDOT_LOOP(a_t, b_t, c_t, dot)
+
+#define ZVLDOT_SIMPLE_LOOP(a_t, b_t, c_t) \
+ auto macc = [](auto a, auto b, auto c) { return c + decltype(c)(a) * decltype(c)(b); }; \
+ ZVLDOT_GENERIC_LOOP(a_t, b_t, c_t, macc)
+
+#define ZVBDOT_LOOP(a_t, b_t, c_t, dot) \
+ for (reg_t idx = 0; idx < 8; idx++) { \
+ reg_t i = ci + idx; \
+ VI_LOOP_ELEMENT_SKIP(); \
+ std::vector<a_t> a(P.VU.vl->read(), a_t()); \
+ std::vector<b_t> b(P.VU.vl->read(), b_t()); \
+ for (reg_t k = 0; k < a.size(); k++) { \
+ a[k] = P.VU.elt<a_t>(insn.rs1(), k); \
+ b[k] = P.VU.elt<b_t>(vs2 + idx, k); \
+ } \
+ auto& acc = P.VU.elt<c_t>(insn.rd(), i, true); \
+ acc = dot(a, b, acc); \
+ }
+
+#define ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) \
+ auto dot = std::bind(generic_dot_product<a_t, b_t, c_t>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, macc); \
+ ZVBDOT_LOOP(a_t, b_t, c_t, dot)
+
+#define ZVBDOT_SIMPLE_LOOP(a_t, b_t, c_t) \
+ auto macc = [](auto a, auto b, auto c) { return c + decltype(c)(a) * decltype(c)(b); }; \
+ ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc)
+
#define P_SET_OV(ov) \
if (ov) P.VU.vxsat->write(1);
diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc
index 7c6633c..a7ba018 100644
--- a/riscv/vector_unit.cc
+++ b/riscv/vector_unit.cc
@@ -38,10 +38,36 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new
vlmax = (VLEN/vsew) * vflmul;
vta = extract64(newType, 6, 1);
vma = extract64(newType, 7, 1);
+ altfmt = extract64(newType, 8, 1);
+
+ bool ill_altfmt = true;
+ if (altfmt) {
+ if (p->extension_enabled(EXT_ZVQBDOT8I) && vsew == 8)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVQBDOT16I) && vsew == 16)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFQBDOT8F) && vsew == 8)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFWBDOT16BF) && vsew == 16)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVQLDOT8I) && vsew == 8)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVQLDOT16I) && vsew == 16)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFQLDOT8F) && vsew == 8)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFWLDOT16BF) && vsew == 16)
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFBFA) && (vsew == 16 || vsew == 8))
+ ill_altfmt = false;
+ else if (p->extension_enabled(EXT_ZVFOFP8MIN) && vsew == 8)
+ ill_altfmt = false;
+ }
vill = !(vflmul >= 0.125 && vflmul <= 8)
|| vsew > std::min(vflmul, 1.0f) * ELEN
- || (newType >> 8) != 0
+ || (newType >> 9) != 0
+ || (altfmt && ill_altfmt)
|| (rd == 0 && rs1 == 0 && old_vlmax != vlmax);
if (vill) {
@@ -64,94 +90,10 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new
}
vstart->write_raw(0);
- setvl_count++;
return vl->read();
}
-template<class T> T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write) {
- assert(vsew != 0);
- assert((VLEN >> 3)/sizeof(T) > 0);
- reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T));
- vReg += n / elts_per_reg;
- n = n % elts_per_reg;
-#ifdef WORDS_BIGENDIAN
- // "V" spec 0.7.1 requires lower indices to map to lower significant
- // bits when changing SEW, thus we need to index from the end on BE.
- n ^= elts_per_reg - 1;
-#endif
- reg_referenced[vReg] = 1;
-
- if (unlikely(p->get_log_commits_enabled() && is_write))
+void vectorUnit_t::log_elt_write_if_needed(reg_t vReg) const {
+ if (unlikely(p->get_log_commits_enabled()))
p->get_state()->log_reg_write[((vReg) << 4) | 2] = {0, 0};
-
- T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3));
- return regStart[n];
-}
-
-// The logic differences between 'elt()' and 'elt_group()' come from
-// the fact that, while 'elt()' requires that the element is fully
-// contained in a single vector register, the element group may span
-// multiple registers in a single register group (LMUL>1).
-//
-// Notes:
-// - We do NOT check that a single element - i.e., the T in the element
-// group type std::array<T, N> - fits within a single register, or that
-// T is smaller or equal to VSEW. Implementations of the instructions
-// sometimes use a different T than what the specification suggests.
-// Instructon implementations should 'require()' what the specification
-// dictates.
-// - We do NOT check that 'vReg' is a valid register group, or that
-// 'n+1' element groups fit in the register group 'vReg'. It is
-// the responsibility of the caller to validate those preconditions.
-template<typename EG> EG&
-vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) {
-#ifdef WORDS_BIGENDIAN
- fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n",
- stderr);
- abort();
-#endif
- using T = typename EG::value_type;
- constexpr std::size_t N = std::tuple_size<EG>::value;
- assert(N > 0);
-
- assert(vsew != 0);
- constexpr reg_t elt_group_size = N * sizeof(T);
- const reg_t reg_group_size = (VLEN >> 3) * vflmul;
- assert(((n + 1) * elt_group_size) <= reg_group_size);
-
- const reg_t start_byte = n * elt_group_size;
- const reg_t bytes_per_reg = VLEN >> 3;
-
- // Inclusive first/last register indices.
- const reg_t reg_first = vReg + start_byte / bytes_per_reg;
- const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg;
-
- // Element groups per register groups
- for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) {
- reg_referenced[vidx] = 1;
-
- if (unlikely(p->get_log_commits_enabled() && is_write)) {
- p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0};
- }
- }
-
- return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte);
}
-
-template signed char& vectorUnit_t::elt<signed char>(reg_t, reg_t, bool);
-template short& vectorUnit_t::elt<short>(reg_t, reg_t, bool);
-template int& vectorUnit_t::elt<int>(reg_t, reg_t, bool);
-template long& vectorUnit_t::elt<long>(reg_t, reg_t, bool);
-template long long& vectorUnit_t::elt<long long>(reg_t, reg_t, bool);
-template uint8_t& vectorUnit_t::elt<uint8_t>(reg_t, reg_t, bool);
-template uint16_t& vectorUnit_t::elt<uint16_t>(reg_t, reg_t, bool);
-template uint32_t& vectorUnit_t::elt<uint32_t>(reg_t, reg_t, bool);
-template uint64_t& vectorUnit_t::elt<uint64_t>(reg_t, reg_t, bool);
-template float16_t& vectorUnit_t::elt<float16_t>(reg_t, reg_t, bool);
-template float32_t& vectorUnit_t::elt<float32_t>(reg_t, reg_t, bool);
-template float64_t& vectorUnit_t::elt<float64_t>(reg_t, reg_t, bool);
-
-template EGU32x4_t& vectorUnit_t::elt_group<EGU32x4_t>(reg_t, reg_t, bool);
-template EGU32x8_t& vectorUnit_t::elt_group<EGU32x8_t>(reg_t, reg_t, bool);
-template EGU64x4_t& vectorUnit_t::elt_group<EGU64x4_t>(reg_t, reg_t, bool);
-template EGU8x16_t& vectorUnit_t::elt_group<EGU8x16_t>(reg_t, reg_t, bool);
diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h
index 0e80618..88d4399 100644
--- a/riscv/vector_unit.h
+++ b/riscv/vector_unit.h
@@ -87,26 +87,87 @@ using EGU8x16_t = std::array<uint8_t, 16>;
class vectorUnit_t
{
public:
- processor_t* p;
- void *reg_file;
- char reg_referenced[NVPR];
- int setvl_count;
- reg_t vlmax;
- reg_t vlenb;
- csr_t_p vxsat;
- vector_csr_t_p vxrm, vstart, vl, vtype;
- reg_t vma, vta;
- reg_t vsew;
- float vflmul;
- reg_t ELEN, VLEN;
- bool vill;
- bool vstart_alu;
+ processor_t* p = nullptr;
+ void *reg_file = nullptr;
+ int setvl_count = 0;
+ reg_t vlmax = 0;
+ reg_t vlenb = 0;
+ csr_t_p vxsat = 0;
+ vector_csr_t_p vxrm = 0, vstart = 0, vl = 0, vtype = 0;
+ reg_t vma = 0, vta = 0;
+ reg_t vsew = 0;
+ float vflmul = 0;
+ reg_t altfmt = 0;
+ reg_t ELEN = 0, VLEN = 0;
+ bool vill = false;
+ bool vstart_alu = false;
// vector element for various SEW
- template<class T> T& elt(reg_t vReg, reg_t n, bool is_write = false);
+ template<typename T> T& elt(reg_t vReg, reg_t n, bool is_write = false) {
+ assert(vsew != 0);
+ assert((VLEN >> 3)/sizeof(T) > 0);
+ reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T));
+ vReg += n / elts_per_reg;
+ n = n % elts_per_reg;
+#ifdef WORDS_BIGENDIAN
+ // "V" spec 0.7.1 requires lower indices to map to lower significant
+ // bits when changing SEW, thus we need to index from the end on BE.
+ n ^= elts_per_reg - 1;
+#endif
+ if (is_write)
+ log_elt_write_if_needed(vReg);
+
+ T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3));
+ return regStart[n];
+ }
+
// vector element group access, where EG is a std::array<T, N>.
+ // The logic differences between 'elt()' and 'elt_group()' come from
+ // the fact that, while 'elt()' requires that the element is fully
+ // contained in a single vector register, the element group may span
+ // multiple registers in a single register group (LMUL>1).
+ //
+ // Notes:
+ // - We do NOT check that a single element - i.e., the T in the element
+ // group type std::array<T, N> - fits within a single register, or that
+ // T is smaller or equal to VSEW. Implementations of the instructions
+ // sometimes use a different T than what the specification suggests.
+ // Instructon implementations should 'require()' what the specification
+ // dictates.
+ // - We do NOT check that 'vReg' is a valid register group, or that
+ // 'n+1' element groups fit in the register group 'vReg'. It is
+ // the responsibility of the caller to validate those preconditions.
+
template<typename EG> EG&
- elt_group(reg_t vReg, reg_t n, bool is_write = false);
+ elt_group(reg_t vReg, reg_t n, bool is_write = false) {
+#ifdef WORDS_BIGENDIAN
+ fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n",
+ stderr);
+ abort();
+#endif
+ using T = typename EG::value_type;
+ constexpr std::size_t N = std::tuple_size<EG>::value;
+ assert(N > 0);
+
+ assert(vsew != 0);
+ constexpr reg_t elt_group_size = N * sizeof(T);
+ const reg_t reg_group_size = (VLEN >> 3) * vflmul;
+ assert(((n + 1) * elt_group_size) <= reg_group_size);
+
+ const reg_t start_byte = n * elt_group_size;
+ const reg_t bytes_per_reg = VLEN >> 3;
+
+ // Inclusive first/last register indices.
+ const reg_t reg_first = vReg + start_byte / bytes_per_reg;
+ const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg;
+
+ // Element groups per register groups
+ for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx)
+ if (is_write)
+ log_elt_write_if_needed(vidx);
+
+ return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte);
+ }
bool mask_elt(reg_t vReg, reg_t n)
{
@@ -119,31 +180,15 @@ public:
e = (e & ~(1U << (n % 8))) | (value << (n % 8));
}
+private:
+
+ void log_elt_write_if_needed(reg_t vReg) const;
+
public:
void reset();
- vectorUnit_t():
- p(0),
- reg_file(0),
- reg_referenced{0},
- setvl_count(0),
- vlmax(0),
- vlenb(0),
- vxsat(0),
- vxrm(0),
- vstart(0),
- vl(0),
- vtype(0),
- vma(0),
- vta(0),
- vsew(0),
- vflmul(0),
- ELEN(0),
- VLEN(0),
- vill(false),
- vstart_alu(false) {
- }
+ vectorUnit_t() {}
~vectorUnit_t() {
free(reg_file);
diff --git a/riscv/zvbdot.h b/riscv/zvbdot.h
new file mode 100644
index 0000000..67a204b
--- /dev/null
+++ b/riscv/zvbdot.h
@@ -0,0 +1,59 @@
+#ifndef _RISCV_ZVBDOT_H
+#define _RISCV_ZVBDOT_H
+
+#include "bulknormdot.h"
+#include <vector>
+#include <algorithm>
+
+static inline float32_t f32_add_odd(float32_t a, float32_t b)
+{
+ auto rm = softfloat_roundingMode;
+ auto flags = softfloat_exceptionFlags;
+
+ softfloat_roundingMode = softfloat_round_odd;
+ softfloat_exceptionFlags = 0;
+
+ auto res = f32_add(a, b);
+
+ if (softfloat_exceptionFlags & softfloat_flag_overflow) {
+ res.v++; // FLT_MAX -> INF
+ }
+
+ auto new_flags = softfloat_exceptionFlags & (softfloat_flag_overflow | softfloat_flag_invalid);
+
+ softfloat_roundingMode = rm;
+ softfloat_exceptionFlags = flags | new_flags;
+
+ return res;
+}
+
+static inline float32_t zvfwbdot16bf_dot_acc(const std::vector<uint16_t>& a, const std::vector<uint16_t>& b, float32_t c)
+{
+ std::vector<bf16_t> fa(a.size());
+ std::transform(a.begin(), a.end(), fa.begin(), [](auto f) { return f; });
+
+ std::vector<bf16_t> fb(b.size());
+ std::transform(b.begin(), b.end(), fb.begin(), [](auto f) { return f; });
+
+ DotConfig cfg(a.size(), int_log2(a.size()) + ((a.size() & (a.size() - 1)) != 0));
+ auto res = bulk_norm_dot_bf16(cfg, &fa[0], &fb[0]);
+ softfloat_exceptionFlags |= res.flags;
+ return f32_add_odd(f32(res.out), c);
+}
+
+template<typename A, typename B>
+float32_t zvfqbdot8f_dot_acc(const std::vector<uint8_t>& a, const std::vector<uint8_t>& b, float32_t c)
+{
+ std::vector<A> fa(a.size());
+ std::transform(a.begin(), a.end(), fa.begin(), [](auto f) { return f; });
+
+ std::vector<B> fb(b.size());
+ std::transform(b.begin(), b.end(), fb.begin(), [](auto f) { return f; });
+
+ DotConfig cfg(a.size(), int_log2(a.size()) + ((a.size() & (a.size() - 1)) != 0));
+ auto res = bulk_norm_dot_ofp8(cfg, &fa[0], &fb[0]);
+ softfloat_exceptionFlags |= res.flags;
+ return f32_add_odd(f32(res.out), c);
+}
+
+#endif
diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h
index f094629..e96e0a8 100644
--- a/riscv/zvk_ext_macros.h
+++ b/riscv/zvk_ext_macros.h
@@ -13,6 +13,14 @@
// Predicate Macros
//
+// Ensures that the ZVKB extension (vector crypto bitmanip subset) is present,
+// and the vector unit is enabled and in a valid state.
+#define require_zvkb \
+ do { \
+ require_vector(true); \
+ require_extension(EXT_ZVKB); \
+ } while (0)
+
// Ensures that the ZVBB extension (vector crypto bitmanip) is present,
// and the vector unit is enabled and in a valid state.
#define require_zvbb \
@@ -86,6 +94,32 @@
// (LMUL * VLEN) <= EGW
#define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul))
+// Ensures that a register index is aligned to EMUL
+// evaluated as EGW / VLEN.
+// The check is only enabled if this value is greater
+// than one (no index alignment check required for fractional EMUL)
+#define require_vreg_align_eglmul(EGW, VREG_NUM) \
+ do { \
+ float vfeglmul = EGW / P.VU.VLEN; \
+ if (vfeglmul > 1) { \
+ require_align(VREG_NUM, vfeglmul); \
+ }\
+ } while (0)
+
+#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2())
+
+// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide
+// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register
+// group.
+// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits.
+#define require_noover_eglmul(vd, vs2) \
+ do { \
+ int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \
+ int aligned_vd = vd / vd_emul; \
+ int aligned_vs2 = vs2 / vd_emul; \
+ require(aligned_vd != aligned_vs2); \
+ } while (0)
+
// Checks that the vector unit state (vtype and vl) can be interpreted
// as element groups with EEW=32, EGS=4 (four 32-bits elements per group),
// for an effective element group width of EGW=128 bits.
diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h
index db705c7..d94ddc2 100644
--- a/riscv/zvkned_ext_macros.h
+++ b/riscv/zvkned_ext_macros.h
@@ -2,6 +2,7 @@
// the RISC-V Zvkned extension (vector AES single round).
#include "insns/aes_common.h"
+#include "zvk_ext_macros.h"
#ifndef RISCV_ZVKNED_EXT_MACROS_H_
#define RISCV_ZVKNED_EXT_MACROS_H_
@@ -9,16 +10,22 @@
// vaes*.vs instruction constraints:
// - Zvkned is enabled
// - EGW (128) <= LMUL * VLEN
+// - vd is LMUL aligned
+// - vs2 is ceil(EGW / VLEN) aligned
// - vd and vs2 cannot overlap
//
// The constraint that vstart and vl are both EGS (4) aligned
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
#define require_vaes_vs_constraints \
do { \
+ const uint32_t EGS = 4; \
require_zvkned; \
+ require(P.VU.vl->read() % EGS == 0); \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
- require(insn.rd() != insn.rs2()); \
+ require_align(insn.rd(), P.VU.vflmul); \
+ require_vs2_align_eglmul(128); \
+ require_noover_eglmul(insn.rd(), insn.rs2()); \
} while (false)
// vaes*.vv instruction constraints. Those are the same as the .vs ones,
@@ -30,17 +37,24 @@
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
#define require_vaes_vv_constraints \
do { \
+ const uint32_t EGS = 4; \
require_zvkned; \
+ require(P.VU.vl->read() % EGS == 0); \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
+ VI_CHECK_SSS(false) \
} while (false)
// vaeskf*.vi instruction constraints. Those are the same as the .vv ones.
#define require_vaeskf_vi_constraints \
do { \
+ const uint32_t EGS = 4; \
require_zvkned; \
+ require(P.VU.vstart->read() % EGS == 0); \
+ require(P.VU.vl->read() % EGS == 0); \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
+ VI_CHECK_SSS(false) \
} while (false)
#define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0))
diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h
index b50818b..98236b0 100644
--- a/riscv/zvknh_ext_macros.h
+++ b/riscv/zvknh_ext_macros.h
@@ -15,6 +15,7 @@
// macros.
#define require_vsha2_common_constraints \
do { \
+ VI_CHECK_SSS(true) \
require(P.VU.vsew == 32 || P.VU.vsew == 64); \
require(insn.rd() != insn.rs1()); \
require(insn.rd() != insn.rs2()); \
diff --git a/riscv/zvksed_ext_macros.h b/riscv/zvksed_ext_macros.h
index 46e399b..3ffa272 100644
--- a/riscv/zvksed_ext_macros.h
+++ b/riscv/zvksed_ext_macros.h
@@ -16,9 +16,12 @@
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
#define require_vsm4_constraints \
do { \
+ const uint32_t EGS = 4; \
require_zvksed; \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
+ require(P.VU.vstart->read() % EGS == 0); \
+ require(P.VU.vl->read() % EGS == 0); \
} while (false)
// Returns a uint32_t value constructed from the 4 bytes (uint8_t)
diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h
index 71c5a09..c4549da 100644
--- a/riscv/zvksh_ext_macros.h
+++ b/riscv/zvksh_ext_macros.h
@@ -16,9 +16,12 @@
// is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros.
#define require_vsm3_constraints \
do { \
+ const uint32_t EGS = 8; \
require_zvksh; \
require(P.VU.vsew == 32); \
require_egw_fits(256); \
+ require(P.VU.vstart->read() % EGS == 0); \
+ require(P.VU.vl->read() % EGS == 0); \
require(insn.rd() != insn.rs2()); \
} while (false)
diff --git a/softfloat/bf16_to_e4m3.c b/softfloat/bf16_to_e4m3.c
new file mode 100644
index 0000000..032f21b
--- /dev/null
+++ b/softfloat/bf16_to_e4m3.c
@@ -0,0 +1,48 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+e4m3_t bf16_to_e4m3( bfloat16_t a, bool saturationMode )
+{
+ return f32_to_e4m3( bf16_to_f32( a ), saturationMode );
+}
+
diff --git a/softfloat/bf16_to_e5m2.c b/softfloat/bf16_to_e5m2.c
new file mode 100644
index 0000000..3d30ec9
--- /dev/null
+++ b/softfloat/bf16_to_e5m2.c
@@ -0,0 +1,48 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+e5m2_t bf16_to_e5m2( bfloat16_t a, bool saturationMode )
+{
+ return f32_to_e5m2( bf16_to_f32( a ), saturationMode );
+}
+
diff --git a/softfloat/e4m3_to_bf16.c b/softfloat/e4m3_to_bf16.c
new file mode 100644
index 0000000..d1ca770
--- /dev/null
+++ b/softfloat/e4m3_to_bf16.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t e4m3_to_bf16( float8_t a )
+{
+ union ui8_f8 uA;
+ uint_fast8_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast8_t frac;
+ struct commonNaN commonNaN;
+ uint_fast16_t uiZ;
+ struct exp8_sig8 normExpSig;
+ union ui16_f16 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signE4M3UI( uiA );
+ exp = expE4M3UI( uiA );
+ frac = fracE4M3UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( (exp == 0xF) && (frac == 0x7) ) {
+ /* NaN */
+ softfloat_E4M3UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
+ goto uiZ;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( ! exp ) {
+ if ( ! frac ) {
+ /* zero */
+ uiZ = packToBF16UI( sign, 0, 0 );
+ goto uiZ;
+ }
+ /* subnormal */
+ normExpSig = softfloat_normSubnormalE4M3Sig( frac );
+ exp = normExpSig.exp - 1;
+ frac = normExpSig.sig;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uiZ = packToBF16UI( sign, exp + 0x78, (uint_fast16_t) frac<<4);
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/e4m3_to_f16.c b/softfloat/e4m3_to_f16.c
new file mode 100644
index 0000000..d086b3c
--- /dev/null
+++ b/softfloat/e4m3_to_f16.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t e4m3_to_f16( float8_t a )
+{
+ union ui8_f8 uA;
+ uint_fast8_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast8_t frac;
+ struct commonNaN commonNaN;
+ uint_fast16_t uiZ;
+ struct exp8_sig8 normExpSig;
+ union ui16_f16 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signE4M3UI( uiA );
+ exp = expE4M3UI( uiA );
+ frac = fracE4M3UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( (exp == 0xF) && (frac == 0x7) ) {
+ /* NaN */
+ softfloat_E4M3UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToF16UI( &commonNaN );
+ goto uiZ;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( ! exp ) {
+ if ( ! frac ) {
+ /* zero */
+ uiZ = packToF16UI( sign, 0, 0 );
+ goto uiZ;
+ }
+ /* subnormal */
+ normExpSig = softfloat_normSubnormalE4M3Sig( frac );
+ exp = normExpSig.exp - 1;
+ frac = normExpSig.sig;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uiZ = packToF16UI( sign, exp + 0x8, (uint_fast16_t) frac<<7);
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/e5m2_to_bf16.c b/softfloat/e5m2_to_bf16.c
new file mode 100644
index 0000000..6b4db1f
--- /dev/null
+++ b/softfloat/e5m2_to_bf16.c
@@ -0,0 +1,98 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t e5m2_to_bf16( float8_t a )
+{
+ union ui8_f8 uA;
+ uint_fast8_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast8_t frac;
+ struct commonNaN commonNaN;
+ uint_fast16_t uiZ;
+ struct exp8_sig8 normExpSig;
+ union ui16_f16 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signE5M2UI( uiA );
+ exp = expE5M2UI( uiA );
+ frac = fracE5M2UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0x1F ) {
+ if ( frac ) {
+ /* NaN */
+ softfloat_E5M2UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
+ } else {
+ /* Inf */
+ uiZ = packToBF16UI( sign, 0xFF, 0 );
+ }
+ goto uiZ;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( ! exp ) {
+ if ( ! frac ) {
+ /* zero */
+ uiZ = packToBF16UI( sign, 0, 0 );
+ goto uiZ;
+ }
+ /* subnormal */
+ normExpSig = softfloat_normSubnormalE5M2Sig( frac );
+ //TODO: might be unnecessary to implement a dedicated function
+ exp = normExpSig.exp - 1;
+ frac = normExpSig.sig;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uiZ = packToBF16UI( sign, exp + 0x70, (uint_fast16_t) frac<<5);
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/e5m2_to_f16.c b/softfloat/e5m2_to_f16.c
new file mode 100644
index 0000000..d0d9acf
--- /dev/null
+++ b/softfloat/e5m2_to_f16.c
@@ -0,0 +1,98 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t e5m2_to_f16( float8_t a )
+{
+ union ui8_f8 uA;
+ uint_fast8_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast8_t frac;
+ struct commonNaN commonNaN;
+ uint_fast16_t uiZ;
+ struct exp8_sig8 normExpSig;
+ union ui16_f16 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signE5M2UI( uiA );
+ exp = expE5M2UI( uiA );
+ frac = fracE5M2UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0xF ) {
+ if ( frac ) {
+ /* NaN */
+ softfloat_E5M2UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToF16UI( &commonNaN );
+ } else {
+ /* Inf */
+ uiZ = packToF16UI( sign, 0x1F, 0 );
+ }
+ goto uiZ;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( ! exp ) {
+ if ( ! frac ) {
+ /* zero */
+ uiZ = packToF16UI( sign, 0, 0 );
+ goto uiZ;
+ }
+ /* subnormal */
+ normExpSig = softfloat_normSubnormalE5M2Sig( frac );
+ //TODO: might be unnecessary to implement a dedicated function
+ exp = normExpSig.exp - 1;
+ frac = normExpSig.sig;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uiZ = packToF16UI( sign, exp, (uint_fast16_t) frac<<8);
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/f16_to_e4m3.c b/softfloat/f16_to_e4m3.c
new file mode 100644
index 0000000..d050d83
--- /dev/null
+++ b/softfloat/f16_to_e4m3.c
@@ -0,0 +1,89 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+e4m3_t f16_to_e4m3( float16_t a, bool saturationMode )
+{
+ union ui16_f16 uA;
+ uint_fast16_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast16_t frac, frac8;
+ struct commonNaN commonNaN;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signF16UI( uiA );
+ exp = expF16UI( uiA );
+ frac = fracF16UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0x1F ) {
+ if ( frac ) { // nan
+ softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToE4M3UI( &commonNaN );
+ } else {
+ uiZ = saturationMode
+ ? packToE4M3UI( sign, 0xF, 0x6 )
+ : softfloat_commonNaNToE4M3UI(&commonNaN);
+ }
+ goto uiZ;
+ }
+ /* Use additional 4 bits for rounding. We will have 3+4 bits including the sticky bit*/
+ frac8 = frac>>3 | ((frac & 0x7) != 0);
+ if ( !(exp | frac8) ) {
+ uiZ = packToE4M3UI( sign, 0, 0 );
+ goto uiZ;
+ }
+
+ /* Add the implicit leading 1 to the fraction and shift exp by (15-7)+1 */
+ return softfloat_roundPackToE4M3( sign, exp - 0x9, frac8 | 0x80, saturationMode);
+uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+}
+
diff --git a/softfloat/f16_to_e5m2.c b/softfloat/f16_to_e5m2.c
new file mode 100644
index 0000000..deee4a0
--- /dev/null
+++ b/softfloat/f16_to_e5m2.c
@@ -0,0 +1,89 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+e5m2_t f16_to_e5m2( float16_t a, bool saturationMode )
+{
+ union ui16_f16 uA;
+ uint_fast16_t uiA;
+ bool sign;
+ int_fast8_t exp;
+ uint_fast16_t frac, frac8;
+ struct commonNaN commonNaN;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signF16UI( uiA );
+ exp = expF16UI( uiA );
+ frac = fracF16UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0x1F ) {
+ if ( frac ) { // nan
+ softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToE5M2UI( &commonNaN );
+ } else {
+ /* If saturation mode is enabled, convert Inf to the max value of E5M2, otherwise Inf */
+ uiZ = saturationMode
+ ? packToE5M2UI( sign, 0x1D, 0x3 )
+ : packToE5M2UI( sign, 0x1F, 0x0 );
+ }
+ goto uiZ;
+ }
+ /* Use additional 4 bits for rounding. We will have 2+4 bits including the sticky bit*/
+ frac8 = frac>>4 | ((frac & 0xF) != 0);
+ if ( !(exp | frac8) ) {
+ uiZ = packToE5M2UI( sign, 0, 0 );
+ goto uiZ;
+ }
+
+ /* Add the implicit leading 1 to the fraction and shift exp by 1 */
+ return softfloat_roundPackToE5M2( sign, exp - 1, frac8 | 0x40, saturationMode);
+uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+}
+
diff --git a/softfloat/f32_to_e4m3.c b/softfloat/f32_to_e4m3.c
new file mode 100644
index 0000000..2b96c66
--- /dev/null
+++ b/softfloat/f32_to_e4m3.c
@@ -0,0 +1,90 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float8_t f32_to_e4m3( float32_t a, bool saturationMode )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ bool sign;
+ int_fast16_t exp;
+ uint_fast32_t frac;
+ uint_fast16_t frac8;
+ struct commonNaN commonNaN;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signF32UI( uiA );
+ exp = expF32UI( uiA );
+ frac = fracF32UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0xFF ) {
+ if ( frac ) {
+ softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToE4M3UI( &commonNaN );
+ } else {
+ uiZ = saturationMode
+ ? packToE4M3UI( sign, 0xF, 0x6 )
+ : softfloat_commonNaNToE4M3UI(&commonNaN);
+ }
+ goto uiZ;
+ }
+ /* Use additional 4 bits for rounding. We will have 3+4 bits including the sticky bit*/
+ frac8 = frac>>16 | ((frac & 0xFFFF) != 0);
+ if ( !(exp | frac8) ) {
+ uiZ = packToE4M3UI( sign, 0, 0 );
+ goto uiZ;
+ }
+
+ /* Add the implicit leading 1 to the fraction and shift exp by (127-7)+1 */
+ return softfloat_roundPackToE4M3( sign, exp - 121, frac8 | 0x80, saturationMode);
+uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/f32_to_e5m2.c b/softfloat/f32_to_e5m2.c
new file mode 100644
index 0000000..b03cf53
--- /dev/null
+++ b/softfloat/f32_to_e5m2.c
@@ -0,0 +1,91 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float8_t f32_to_e5m2( float32_t a, bool saturationMode )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ bool sign;
+ int_fast16_t exp;
+ uint_fast32_t frac;
+ uint_fast16_t frac8;
+ struct commonNaN commonNaN;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ uA.f = a;
+ uiA = uA.ui;
+ sign = signF32UI( uiA );
+ exp = expF32UI( uiA );
+ frac = fracF32UI( uiA );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( exp == 0xFF ) {
+ if ( frac ) { // nan
+ softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToE5M2UI( &commonNaN );
+ } else {
+ /* If saturation mode is enabled, convert Inf to the max value of E5M2, otherwise Inf */
+ uiZ = saturationMode
+ ? packToE5M2UI( sign, 0x1E, 0x3 )
+ : packToE5M2UI( sign, 0x1F, 0x0 );
+ }
+ goto uiZ;
+ }
+ /* Use additional 4 bits for rounding. We will have 2+4 bits including the sticky bit*/
+ frac8 = frac>>17 | ((frac & 0x1FFFF) != 0);
+ if ( !(exp | frac8) ) {
+ uiZ = packToE5M2UI( sign, 0, 0 );
+ goto uiZ;
+ }
+
+ /* Add the implicit leading 1 to the fraction and shift exp by (127-15)+1 */
+ return softfloat_roundPackToE5M2( sign, exp - 113, frac8 | 0x40, saturationMode);
+uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+}
+
diff --git a/softfloat/f32_to_i8.c b/softfloat/f32_to_i8.c
new file mode 100644
index 0000000..2b7fb0e
--- /dev/null
+++ b/softfloat/f32_to_i8.c
@@ -0,0 +1,58 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast8_t f32_to_i8( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+ uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+ int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact);
+
+
+ if (sig32 > INT8_MAX) {
+ softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+ return i8_fromPosOverflow;
+ } else if (sig32 < INT8_MIN) {
+ softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+ return i8_fromNegOverflow;
+ } else {
+ return sig32;
+ }
+}
+
diff --git a/softfloat/f32_to_ui8.c b/softfloat/f32_to_ui8.c
new file mode 100644
index 0000000..8fdc8fc
--- /dev/null
+++ b/softfloat/f32_to_ui8.c
@@ -0,0 +1,54 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast8_t f32_to_ui8( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+ uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+ uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact);
+
+ if (sig32 > UINT8_MAX) {
+ softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+ return ui8_fromPosOverflow;
+ } else {
+ return sig32;
+ }
+}
+
diff --git a/softfloat/internals.h b/softfloat/internals.h
index f397ce5..e01da60 100644
--- a/softfloat/internals.h
+++ b/softfloat/internals.h
@@ -4,7 +4,7 @@
This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2025 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
+union ui8_f8 { uint8_t ui; float8_t f; };
+typedef union ui8_f8 ui8_e4m3;
+typedef union ui8_f8 ui8_e5m2;
union ui16_bf16 { uint16_t ui; bfloat16_t f; };
union ui16_f16 { uint16_t ui; float16_t f; };
union ui32_f32 { uint32_t ui; float32_t f; };
@@ -85,6 +88,39 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
+
+/*
+ * OCP F8
+ * E4M3 |sign(1)|exp(4)|frac(3)|
+ * E5M2 |sign(1)|exp(5)|frac(2)|
+ */
+
+#define signF8UI( a ) ((bool) ((uint8_t) (a)>>7))
+#define signE4M3UI( a ) signF8UI( a )
+#define signE5M2UI( a ) signF8UI( a )
+#define expE4M3UI( a ) ((int_fast8_t) ((a)>>3) & 0xF)
+#define fracE4M3UI( a ) ((a) & 0x7)
+#define packToE4M3UI( sign, exp, sig ) (((uint8_t) (sign)<<7) + ((uint16_t) (exp)<<3) + (sig))
+#define expE5M2UI( a ) ((int_fast8_t) ((a)>>2) & 0x1F)
+#define fracE5M2UI( a ) ((a) & 0x3)
+#define packToE5M2UI( sign, exp, sig ) (((uint8_t) (sign)<<7) + ((uint16_t) (exp)<<2) + (sig))
+
+/*
+ * | | E4M3 | E5M2 |
+ * | Inf | N/A | S.11111.00 |
+ * | NaN | S.1111.111 | S.11111.{01, 10, 11} |
+*/
+#define isNaNE4M3UI( a ) ((~(a) & 0x7F) == 0)
+#define isInfE5M2UI( a ) ((~(a) & 0x78) == 0)
+#define isNaNE5M2UI( a ) (((~(a) & 0x78) == 0) && ((a) & 0x11))
+
+struct exp8_sig8 { int_fast8_t exp; uint_fast8_t sig; };
+struct exp8_sig8 softfloat_normSubnormalE4M3Sig( uint_fast8_t );
+struct exp8_sig8 softfloat_normSubnormalE5M2Sig( uint_fast8_t );
+float8_t softfloat_roundPackToE4M3( bool, int_fast16_t, uint_fast16_t, bool );
+float8_t softfloat_roundPackToE5M2( bool, int_fast16_t, uint_fast16_t, bool );
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
#define fracBF16UI( a ) ((a) & 0x07F)
@@ -169,6 +205,28 @@ float64_t
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
+/*this is for bf16
+ *b16 |sign(1)|exp(8)|frac(7)|
+ *f32 |sign(1)|exp(8)|fraction(23 bits)|
+ */
+#define F32_EXP_BITS 8
+#define F32_EXP_BIAS ((1 << (F32_EXP_BITS - 1)) - 1)
+#define F32_SIG_BITS 23
+#define F32_EXP_MASK (((uint32_t)1 << F32_EXP_BITS) - 1)
+#define F32_SIG_MASK (((uint32_t)1 << F32_SIG_BITS) - 1)
+#define BF16_SIG_BITS 7
+#define BF16_EXP_BIAS F32_EXP_BIAS
+#define BF16_IMPLICIT_ONE (1 << BF16_SIG_BITS)
+
+#define SIG(n) (n.v & 0x7F)
+#define EXP_T(n) ((n.v >>7) & (F32_EXP_MASK))
+#define SIGN(n) (n.v >> 15)
+#define SPECIAL(n) (EXP_T(n) == F32_EXP_MASK)
+#define INF(n) (SPECIAL(n) && SIG(n)==0)
+#define NAN_T(n) (SPECIAL(n) && SIG(n)!=0)
+
+
+
struct exp32_sig64 { int_fast32_t exp; uint64_t sig; };
struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t );
diff --git a/softfloat/s_normSubnormalE4M3Sig.c b/softfloat/s_normSubnormalE4M3Sig.c
new file mode 100644
index 0000000..7475202
--- /dev/null
+++ b/softfloat/s_normSubnormalE4M3Sig.c
@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+struct exp8_sig8 softfloat_normSubnormalE4M3Sig( uint_fast8_t sig )
+{
+ int_fast8_t shiftDist;
+ struct exp8_sig8 z;
+
+ shiftDist = softfloat_countLeadingZeros8[(uint8_t) sig] - 4;
+ z.exp = 1 - shiftDist;
+ z.sig = sig<<shiftDist;
+ return z;
+
+}
+
diff --git a/softfloat/s_normSubnormalE5M2Sig.c b/softfloat/s_normSubnormalE5M2Sig.c
new file mode 100644
index 0000000..af1316d
--- /dev/null
+++ b/softfloat/s_normSubnormalE5M2Sig.c
@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+struct exp8_sig8 softfloat_normSubnormalE5M2Sig( uint_fast8_t sig )
+{
+ int_fast8_t shiftDist;
+ struct exp8_sig8 z;
+
+ shiftDist = ( sig & 0x2 ) ? 1 : 2;
+ z.exp = 1 - shiftDist;
+ z.sig = sig<<shiftDist;
+ return z;
+
+}
diff --git a/softfloat/s_roundPackToE4M3.c b/softfloat/s_roundPackToE4M3.c
new file mode 100644
index 0000000..01b4fdb
--- /dev/null
+++ b/softfloat/s_roundPackToE4M3.c
@@ -0,0 +1,119 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+#include "specialize.h"
+
+float8_t softfloat_roundPackToE4M3( bool sign, int_fast16_t exp, uint_fast16_t sig, bool saturationMode )
+{
+ uint_fast8_t roundingMode;
+ bool roundNearEven;
+ uint_fast8_t roundIncrement, roundBits;
+ bool isTiny;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ roundingMode = softfloat_roundingMode;
+ roundNearEven = (roundingMode == softfloat_round_near_even);
+ roundIncrement = 0x8;
+ if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+ roundIncrement =
+ (roundingMode
+ == (sign ? softfloat_round_min : softfloat_round_max))
+ ? 0xF
+ : 0;
+ }
+ roundBits = sig & 0xF;
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( 0xE <= (unsigned int) exp ) {
+ /* Here we use the outer if condition to cover both overflow and underflow */
+ if ( exp < 0 ) {
+ /*----------------------------------------------------------------
+ *----------------------------------------------------------------*/
+ isTiny =
+ (softfloat_detectTininess == softfloat_tininess_beforeRounding)
+ || (exp < -1) || (sig + roundIncrement < 0x100);
+ sig = softfloat_shiftRightJam32( sig, -exp );
+ exp = 0;
+ roundBits = sig & 0xF;
+ if ( isTiny && roundBits ) {
+ softfloat_raiseFlags( softfloat_flag_underflow );
+ }
+ } else if ( 0xE < exp || (sig + roundIncrement >= (0xF0 + roundNearEven)) ) {
+ /*----------------------------------------------------------------
+ *----------------------------------------------------------------*/
+ /* roundNearEven needs a further check of tiebreaker since the threshold is odd */
+ softfloat_raiseFlags(
+ softfloat_flag_overflow | softfloat_flag_inexact );
+ /* If saturation mode is enabled, convert to the max value of E4M3, otherwise NaN. */
+ uiZ = (saturationMode || !roundIncrement)
+ ? packToE4M3UI( sign, 0xF, 0x6 )
+ : softfloat_commonNaNToE4M3UI(&commonNaN);
+ goto uiZ;
+
+ }
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ sig = (sig + roundIncrement)>>4;
+ if ( roundBits ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+ if ( roundingMode == softfloat_round_odd ) {
+ sig |= 1;
+ goto packReturn;
+ }
+#endif
+ }
+ sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven);
+ if ( ! sig ) exp = 0;
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ packReturn:
+ uiZ = packToE4M3UI( sign, exp, sig );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/s_roundPackToE5M2.c b/softfloat/s_roundPackToE5M2.c
new file mode 100644
index 0000000..731c012
--- /dev/null
+++ b/softfloat/s_roundPackToE5M2.c
@@ -0,0 +1,117 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2025 The Regents of the University of California. All rights
+reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float8_t softfloat_roundPackToE5M2( bool sign, int_fast16_t exp, uint_fast16_t sig, bool saturationMode )
+{
+ uint_fast8_t roundingMode;
+ bool roundNearEven;
+ uint_fast8_t roundIncrement, roundBits;
+ bool isTiny;
+ uint_fast8_t uiZ;
+ union ui8_f8 uZ;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ roundingMode = softfloat_roundingMode;
+ roundNearEven = (roundingMode == softfloat_round_near_even);
+ roundIncrement = 0x8;
+ if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+ roundIncrement =
+ (roundingMode
+ == (sign ? softfloat_round_min : softfloat_round_max))
+ ? 0xF
+ : 0;
+ }
+ roundBits = sig & 0xF;
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( 0x1D <= (unsigned int) exp ) {
+ /* Here we use the outer if condition to cover both overflow and underflow */
+ if ( exp < 0 ) {
+ /*----------------------------------------------------------------
+ *----------------------------------------------------------------*/
+ isTiny =
+ (softfloat_detectTininess == softfloat_tininess_beforeRounding)
+ || (exp < -1) || (sig + roundIncrement < 0x80);
+ sig = softfloat_shiftRightJam32( sig, -exp );
+ exp = 0;
+ roundBits = sig & 0xF;
+ if ( isTiny && roundBits ) {
+ softfloat_raiseFlags( softfloat_flag_underflow );
+ }
+ } else if ( (0x1D < exp) || (0x80 <= sig + roundIncrement) ) {
+ /*----------------------------------------------------------------
+ *----------------------------------------------------------------*/
+ softfloat_raiseFlags(
+ softfloat_flag_overflow | softfloat_flag_inexact );
+ /* If saturation mode is enabled, convert to the max value of E5M2, otherwise Inf */
+ uiZ = saturationMode
+ ? packToE5M2UI( sign, 0x1E, 0x3 )
+ : packToE5M2UI( sign, 0x1F, 0x0 ) - ! roundIncrement;
+ goto uiZ;
+
+ }
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ sig = (sig + roundIncrement)>>4;
+ if ( roundBits ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+ if ( roundingMode == softfloat_round_odd ) {
+ sig |= 1;
+ goto packReturn;
+ }
+#endif
+ }
+ sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven);
+ if ( ! sig ) exp = 0;
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ packReturn:
+ uiZ = packToE5M2UI( sign, exp, sig );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h
index 9c57404..269434b 100644
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -4,7 +4,7 @@
This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2025 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -122,6 +122,10 @@ float128_t ui64_to_f128( uint64_t );
#endif
void ui64_to_extF80M( uint64_t, extFloat80_t * );
void ui64_to_f128M( uint64_t, float128_t * );
+bfloat16_t e4m3_to_bf16( e4m3_t );
+float16_t e4m3_to_f16( e4m3_t );
+float16_t e5m2_to_f16( e5m2_t );
+bfloat16_t e5m2_to_bf16( e5m2_t );
bfloat16_t i32_to_bf16( int32_t );
float16_t i32_to_f16( int32_t );
float32_t i32_to_f32( int32_t );
@@ -194,6 +198,8 @@ uint_fast8_t bf16_to_ui8( bfloat16_t, uint_fast8_t, bool );
uint_fast32_t bf16_to_ui32( bfloat16_t, uint_fast8_t, bool );
int_fast8_t bf16_to_i8( bfloat16_t, uint_fast8_t, bool );
int_fast32_t bf16_to_i32( bfloat16_t, uint_fast8_t, bool );
+e4m3_t bf16_to_e4m3( bfloat16_t, bool );
+e5m2_t bf16_to_e5m2( bfloat16_t, bool );
float32_t bf16_to_f32( bfloat16_t );
float64_t bf16_to_f64( bfloat16_t );
bfloat16_t bf16_add( bfloat16_t, bfloat16_t );
@@ -225,6 +231,8 @@ uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
+e4m3_t f32_to_e4m3( float32_t, bool );
+e5m2_t f32_to_e5m2( float32_t, bool );
bfloat16_t f32_to_bf16( float32_t );
float16_t f32_to_f16( float32_t );
float64_t f32_to_f64( float32_t );
diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in
index 899f00a..44a5b81 100644
--- a/softfloat/softfloat.mk.in
+++ b/softfloat/softfloat.mk.in
@@ -1,6 +1,10 @@
softfloat_subproject_deps =
softfloat_c_srcs = \
+ e4m3_to_f16.c \
+ e5m2_to_f16.c \
+ e4m3_to_bf16.c \
+ e5m2_to_bf16.c \
bf16_add.c \
bf16_div.c \
bf16_mul.c \
@@ -9,6 +13,8 @@ softfloat_c_srcs = \
bf16_sub.c \
bf16_cmp.c \
bf16_classify.c \
+ bf16_to_e4m3.c \
+ bf16_to_e5m2.c \
bf16_to_f32.c \
bf16_to_f64.c \
bf16_to_i8.c \
@@ -61,6 +67,8 @@ softfloat_c_srcs = \
f16_to_f128.c \
f16_to_f32.c \
f16_to_f64.c \
+ f16_to_e4m3.c \
+ f16_to_e5m2.c \
f16_to_i8.c \
f16_to_i16.c \
f16_to_i32.c \
@@ -89,6 +97,8 @@ softfloat_c_srcs = \
f32_roundToInt.c \
f32_sqrt.c \
f32_sub.c \
+ f32_to_e4m3.c \
+ f32_to_e5m2.c \
f32_to_bf16.c \
f32_to_f128.c \
f32_to_f16.c \
@@ -103,6 +113,8 @@ softfloat_c_srcs = \
f32_to_ui32_r_minMag.c \
f32_to_ui64.c \
f32_to_ui64_r_minMag.c \
+ f32_to_i8.c \
+ f32_to_ui8.c \
f64_add.c \
f64_classify.c \
f64_div.c \
@@ -185,6 +197,8 @@ softfloat_c_srcs = \
s_normRoundPackToF32.c \
s_normRoundPackToF64.c \
s_normSubnormalF128Sig.c \
+ s_normSubnormalE4M3Sig.c \
+ s_normSubnormalE5M2Sig.c \
s_normSubnormalF16Sig.c \
s_normSubnormalF32Sig.c \
s_normSubnormalF64Sig.c \
@@ -199,6 +213,8 @@ softfloat_c_srcs = \
s_roundMToUI64.c \
s_roundPackMToI64.c \
s_roundPackMToUI64.c \
+ s_roundPackToE4M3.c \
+ s_roundPackToE5M2.c \
s_roundPackToBF16.c \
s_roundPackToF128.c \
s_roundPackToF16.c \
diff --git a/softfloat/softfloat_types.h b/softfloat/softfloat_types.h
index 34c518f..9c69623 100644
--- a/softfloat/softfloat_types.h
+++ b/softfloat/softfloat_types.h
@@ -4,8 +4,8 @@
This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
-California. All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2017, 2025 The Regents of the University
+of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -40,19 +40,27 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
/*----------------------------------------------------------------------------
-| Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point
-| arguments and results to/from functions. These types must be exactly
+| Types used to pass 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit floating-point
+| arguments and results to/from functions. These types must be exactly 8 bits,
| 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively. Where a
| platform has "native" support for IEEE-Standard floating-point formats,
| the types below may, if desired, be defined as aliases for the native types
| (typically 'float' and 'double', and possibly 'long double').
*----------------------------------------------------------------------------*/
+typedef struct { uint8_t v; } float8_t;
typedef struct { uint16_t v; } float16_t;
typedef float16_t bfloat16_t;
typedef struct { uint32_t v; } float32_t;
typedef struct { uint64_t v; } float64_t;
typedef struct { uint64_t v[2]; } float128_t;
+
+/*----------------------------------------------------------------------------
+| OCP 8-bit floating-point (OFP8) types.
+*----------------------------------------------------------------------------*/
+typedef float8_t e4m3_t;
+typedef float8_t e5m2_t;
+
/*----------------------------------------------------------------------------
| The format of an 80-bit extended floating-point number in memory. This
| structure must contain a 16-bit field named 'signExp' and a 64-bit field
diff --git a/softfloat/specialize.h b/softfloat/specialize.h
index adbc081..7236720 100644
--- a/softfloat/specialize.h
+++ b/softfloat/specialize.h
@@ -4,8 +4,8 @@
This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
-California. All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2025 The Regents of the University
+of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -94,6 +94,12 @@ extern "C" {
struct commonNaN { char _unused; };
/*----------------------------------------------------------------------------
+| The bit pattern for a default generated 8-bit floating-point NaN.
+*----------------------------------------------------------------------------*/
+#define defaultNaNE4M3 0x7F
+#define defaultNaNE5M2 0x7F
+
+/*----------------------------------------------------------------------------
| The bit pattern for a default generated 16-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF16UI 0x7E00
@@ -118,6 +124,14 @@ struct commonNaN { char _unused; };
#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
/*----------------------------------------------------------------------------
+| Assuming `uiA' has the bit pattern of a 8-bit floating-point NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+#define softfloat_E4M3UIToCommonNaN( uiA, zPtr ) (void) (uiA), (void) (zPtr)
+#define softfloat_E5M2UIToCommonNaN( uiA, zPtr ) (void) (uiA), (void) (zPtr)
+
+/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 16-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
@@ -137,6 +151,13 @@ struct commonNaN { char _unused; };
| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
+#define softfloat_commonNaNToE4M3UI( aPtr ) ((uint_fast8_t) defaultNaNE4M3)
+#define softfloat_commonNaNToE5M2UI( aPtr ) ((uint_fast8_t) defaultNaNE5M2)
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
/*----------------------------------------------------------------------------
@@ -146,6 +167,11 @@ struct commonNaN { char _unused; };
#define softfloat_commonNaNToF16UI( aPtr ) ((uint_fast16_t) defaultNaNF16UI)
/*----------------------------------------------------------------------------
+| The bit pattern for a default generated BF16 NaN.
+*----------------------------------------------------------------------------*/
+#define defaultNaNBF16UI 0x7FC0
+
+/*----------------------------------------------------------------------------
| Interpreting `uiA' and `uiB' as the bit patterns of two 16-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
diff --git a/spike_main/spike.cc b/spike_main/spike.cc
index b8a1b5c..5617a82 100644
--- a/spike_main/spike.cc
+++ b/spike_main/spike.cc
@@ -71,6 +71,7 @@ static void help(int exit_code = 1)
fprintf(stderr, " --real-time-clint Increment clint time at real-time rate\n");
fprintf(stderr, " --triggers=<n> Number of supported triggers [default 4]\n");
fprintf(stderr, " --dm-progsize=<words> Progsize for the debug module [default 2]\n");
+ fprintf(stderr, " --dm-datacount=<n> Number of data registers available for the debug module [default 2]\n");
fprintf(stderr, " --dm-sba=<bits> Debug system bus access supports up to "
"<bits> wide accesses [default 0]\n");
fprintf(stderr, " --dm-auth Debug module requires debugger to authenticate\n");
@@ -83,6 +84,7 @@ static void help(int exit_code = 1)
fprintf(stderr, " --dm-no-abstract-fpr Debug module won't support abstract FPR access\n");
fprintf(stderr, " --dm-no-halt-groups Debug module won't support halt groups\n");
fprintf(stderr, " --dm-no-impebreak Debug module won't support implicit ebreak in program buffer\n");
+ fprintf(stderr, " --dm-no-abstractauto Debug module won't support the abstractauto register\n");
fprintf(stderr, " --blocksz=<size> Cache block size (B) for CMO operations(powers of 2) [default 64]\n");
fprintf(stderr, " --instructions=<n> Stop after n instructions\n");
@@ -413,6 +415,8 @@ int main(int argc, char** argv)
});
parser.option(0, "dm-progsize", 1,
[&](const char* s){dm_config.progbufsize = atoul_safe(s);});
+ parser.option(0, "dm-datacount", 1,
+ [&](const char* s){dm_config.datacount = atoul_safe(s);});
parser.option(0, "dm-no-impebreak", 0,
[&](const char UNUSED *s){dm_config.support_impebreak = false;});
parser.option(0, "dm-sba", 1,
@@ -431,6 +435,8 @@ int main(int argc, char** argv)
[&](const char UNUSED *s){dm_config.support_abstract_fpr_access = false;});
parser.option(0, "dm-no-halt-groups", 0,
[&](const char UNUSED *s){dm_config.support_haltgroups = false;});
+ parser.option(0, "dm-no-abstractauto", 0,
+ [&](const char UNUSED *s){dm_config.support_abstractauto = false;});
parser.option(0, "log-commits", 0,
[&](const char UNUSED *s){log_commits = true;});
parser.option(0, "log", 1,
@@ -451,6 +457,7 @@ int main(int argc, char** argv)
min_blocksz, max_blocksz);
exit(-1);
}
+ cfg.cache_blocksz = blocksz;
});
parser.option(0, "instructions", 1, [&](const char* s){
instructions = strtoull(s, 0, 0);
@@ -541,7 +548,6 @@ int main(int argc, char** argv)
if (dc) s.get_core(i)->get_mmu()->register_memtracer(&*dc);
for (auto e : extensions)
s.get_core(i)->register_extension(e());
- s.get_core(i)->get_mmu()->set_cache_blocksz(blocksz);
}
s.set_debug(debug);