diff options
254 files changed, 5253 insertions, 1110 deletions
diff --git a/.github/workflows/apt-packages.txt b/.github/workflows/apt-packages.txt index e153391..cef0337 100644 --- a/.github/workflows/apt-packages.txt +++ b/.github/workflows/apt-packages.txt @@ -1,2 +1,4 @@ build-essential device-tree-compiler +g++-riscv64-linux-gnu +libc6-dev-riscv64-cross diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 517c74e..c6f73d5 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -18,7 +18,7 @@ on: jobs: test: name: Test Spike build (Ubuntu) - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v2 with: @@ -26,7 +26,9 @@ jobs: fetch-depth: 0 - name: Install Dependencies - run: sudo xargs apt-get install -y < .github/workflows/apt-packages.txt + run: | + sudo apt-get update + sudo xargs apt-get install -y < .github/workflows/apt-packages.txt - run: | for commit in $(git rev-list origin/master..HEAD | tac); do @@ -53,5 +55,4 @@ jobs: git checkout $commit echo "Checking commit $commit" ci-tests/build-spike - ci-tests/test-spike done diff --git a/.github/workflows/debug-smoke.yml b/.github/workflows/debug-smoke.yml index 7d6cc00..7559616 100644 --- a/.github/workflows/debug-smoke.yml +++ b/.github/workflows/debug-smoke.yml @@ -13,12 +13,13 @@ on: jobs: test: name: Test debug (Ubuntu) - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v2 - name: Install Dependencies run: | + sudo apt-get update sudo xargs apt-get install -y < .github/workflows/apt-packages.txt - name: Download OpenOCD @@ -55,10 +56,10 @@ jobs: ./gdbserver.py targets/RISC-V/spike32.py --print-failures \ --gcc $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gcc \ --gdb $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gdb \ - --sim_cmd $GITHUB_WORKSPACE/build/install/bin/spike \ + --sim_cmd $GITHUB_WORKSPACE/install/bin/spike \ --server_cmd $GITHUB_WORKSPACE/riscv-openocd/src/openocd ./gdbserver.py targets/RISC-V/spike64-2.py --print-failures \ --gcc $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gcc \ --gdb $GITHUB_WORKSPACE/xpack-riscv-none-elf-gcc-12.2.0-1/bin/riscv-none-elf-gdb \ - --sim_cmd $GITHUB_WORKSPACE/build/install/bin/spike \ + --sim_cmd $GITHUB_WORKSPACE/install/bin/spike \ --server_cmd $GITHUB_WORKSPACE/riscv-openocd/src/openocd @@ -30,6 +30,7 @@ Spike supports the following RISC-V ISA features: - Zbc extension, v1.0 - Zbs extension, v1.0 - Zfh and Zfhmin half-precision floating-point extensions, v1.0 + - Zfa extension, v1.0 - Zfinx extension, v1.0 - Zmmul integer multiplication extension, v1.0 - Zicbom, Zicbop, Zicboz cache-block maintenance extensions, v1.0 @@ -78,6 +79,7 @@ Spike supports the following RISC-V ISA features: - Zicond extension, v1.0 - Zilsd extension, v1.0 - Zclsd extension, v1.0 + - Zimop extension, v1.0 Versioning and APIs ------------------- diff --git a/ci-tests/.gitignore b/ci-tests/.gitignore new file mode 100644 index 0000000..b2b07d4 --- /dev/null +++ b/ci-tests/.gitignore @@ -0,0 +1,3 @@ +/install +/build +/run diff --git a/ci-tests/atomics.c b/ci-tests/atomics.c new file mode 100644 index 0000000..ece5a38 --- /dev/null +++ b/ci-tests/atomics.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <stdatomic.h> + +atomic_int acnt = 0; +atomic_int bcnt = 0; + +int foo() { + for(int n = 0; n < 1000; ++n) { + ++acnt; + if(acnt % 10 == 0) + ++bcnt; + } + return acnt; +} + +int main(void) { + int acnt = foo(); + printf("First atomic counter is %u, second is %u\n", acnt, bcnt); + return 0; +} diff --git a/ci-tests/build-spike b/ci-tests/build-spike index 0a1b315..ed7de8f 100755 --- a/ci-tests/build-spike +++ b/ci-tests/build-spike @@ -1,17 +1,25 @@ #!/bin/bash set -e -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +ROOT=`git rev-parse --show-toplevel` +NPROCS="$(nproc 2> /dev/null || sysctl -n hw.ncpu)" +HERE=`pwd` +CI="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +INSTALL=$HERE/install +BUILD=$HERE/build -rm -rf build +rm -rf $INSTALL $BUILD +mkdir $INSTALL $BUILD -mkdir build -cd build -mkdir install -CXXFLAGS="-Wnon-virtual-dtor" CFLAGS="-Werror -Wall -Wextra -Wvla" $DIR/../configure --prefix=`pwd`/install -make -j"$(nproc 2> /dev/null || sysctl -n hw.ncpu)" +# build spike +mkdir $BUILD/spike +cd $BUILD/spike +CFLAGS="-Werror -Wall -Wextra -Wvla" +CXXFLAGS="-Wnon-virtual-dtor $CFLAGS" +CXXFLAGS="$CXXFLAGS" CFLAGS="$CFLAGS" $ROOT/configure --prefix=$INSTALL +make -j$NPROCS make check make install install-hdrs-list.h # check that help message prints without error -install/bin/spike -h +$INSTALL/bin/spike -h diff --git a/ci-tests/create-ci-binary-tarball b/ci-tests/create-ci-binary-tarball deleted file mode 100755 index 73a549e..0000000 --- a/ci-tests/create-ci-binary-tarball +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -set -e - -rm -rf build - -mkdir -p build/pk && cd "$_" -`git rev-parse --show-toplevel`/../riscv-pk/configure --host=riscv64-unknown-elf --with-arch=rv64gc_zifencei -make -j4 -cd - - -mkdir -p build/hello && cd "$_" -riscv64-unknown-elf-gcc -O2 -o hello `git rev-parse --show-toplevel`/ci-tests/hello.c -cd - - -mkdir -p build/dummy-slliuw && cd "$_" -riscv64-unknown-elf-gcc -O2 -o dummy-slliuw `git rev-parse --show-toplevel`/ci-tests/dummy-slliuw.c -cd - - -mkdir -p build/dummycsr && cd "$_" -riscv64-unknown-elf-gcc -O2 -o customcsr `git rev-parse --show-toplevel`/ci-tests/customcsr.c -cd - - -mv build/pk/pk . -mv build/hello/hello . -mv build/dummy-slliuw/dummy-slliuw . -mv build/dummycsr/customcsr . -tar -cf spike-ci.tar pk hello dummy-slliuw customcsr - -rm pk hello dummy-slliuw customcsr diff --git a/ci-tests/custom-csr.cc b/ci-tests/custom-csr.cc index 857c9c3..89b0149 100644 --- a/ci-tests/custom-csr.cc +++ b/ci-tests/custom-csr.cc @@ -37,7 +37,7 @@ struct xdummycsr_t : public extension_t { } }; -REGISTER_EXTENSION(dummycsr, []() { return new xdummycsr_t; }) +REGISTER_EXTENSION(dummycsr, []() { static xdummycsr_t ext; return &ext; }) // Copied from spike main. // TODO: This should really be provided in libriscv diff --git a/ci-tests/run-snippy-test.sh b/ci-tests/run-snippy-test.sh new file mode 100755 index 0000000..f6a3c90 --- /dev/null +++ b/ci-tests/run-snippy-test.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -e -x +set -o pipefail + +ROOT="$1" +NUMINSTRS="$2" +BOOTCODE="$3" +TRIPLE="$4" +ARCH="$5" +EXTENSIONS="$6" +ABI="$7" +SPIKE_PATH="$8" + +CONFIGDIR="$ROOT"/ci-tests/snippy-tests + +CONFIG="test-$ARCH-$ABI.yaml" + +base=$(basename "$CONFIG" .yaml) +testfile="$base".elf +tmpelf="$base".tmp.elf +# exclude C_JR and C_JALR and some othe compressed opcodes as snippy has issues with them +# exclude EBREAK/ECALL as we want non-privileged instructions +# exclude lr.rl and sc.aq as they don't make sense +"$CONFIGDIR"/generate-snippy-test.sh --march "$ARCH" --mtriple "$TRIPLE" --extensions "$EXTENSIONS" --num-instrs $NUMINSTRS --ignore-opcode-regex "C_JR|C_JALR|EBREAK|ECALL|C_.*(SP|HINT|UNIMP).*|LR_.*_RL|SC_.*_AQ" > "$CONFIG" +llvm-snippy "$CONFIG" -o "$tmpelf" --seed 1 -riscv-disable-misaligned-access --layout-include-dir "$ROOT"/ci-tests/snippy-tests +riscv64-linux-gnu-gcc -O0 -march="$ARCH" -mabi="$ABI" -T "$tmpelf".ld -T "$CONFIGDIR"/linker-entry.ld "$tmpelf" "$BOOTCODE" -nostdlib -static -o "$testfile" -Wl,--build-id=none + +error=0 +if ! timeout --foreground 60s "$SPIKE_PATH" -l --log-commits --isa "$ARCH" "$testfile" +then + echo "TIMEOUT: $testfile" + error=1 +else + echo "SUCCESS: $testfile" +fi +exit $error diff --git a/ci-tests/run-snippy-tests.sh b/ci-tests/run-snippy-tests.sh new file mode 100755 index 0000000..8f67d3a --- /dev/null +++ b/ci-tests/run-snippy-tests.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +set -e -x + +WORKDIR="$1" +CONFIGDIR="$2" +SPIKE_PATH="$3" +RESULTDIR="$WORKDIR"/snippy-tests + +mkdir -p "$WORKDIR" +mkdir -p "$RESULTDIR" + +ROOT=`git rev-parse --show-toplevel` +run_test_script="$ROOT"/ci-tests/run-snippy-test.sh + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei "i" lp64 "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei "i" ilp32 "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv64 rv64ic_zicsr_zifencei "c - d" lp64 "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv32 rv32ic_zicsr_zifencei "c - d" ilp32 "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifd_zicsr_zifencei "d - c - zfa - zvfh" lp64d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifd_zicsr_zifencei "d - c - zfa - zvfh" ilp32d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64if_zicsr_zifencei "f - c" lp64f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32if_zicsr_zifencei "f - c" ilp32f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifc_zicsr_zifencei "f" lp64f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifc_zicsr_zifencei "f" ilp32f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 2000 \ + "$CONFIGDIR"/boot-code-vf.s riscv64 rv64gcv_zfa_zvfh "v" lp64d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifc_zicsr_zifencei_zfhmin "f + zfhmin - d" lp64f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifc_zicsr_zifencei_zfhmin "f + zfhmin - d" ilp32f "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei "d - zfa - zvfh" lp64d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei "d - zfa - zvfh" ilp32d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei_zfhmin "d - zfa + zfhmin" lp64d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei_zfhmin "d - zfa + zfhmin" ilp32d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifdc_zicsr_zifencei_zfh_zfa "d + zfh" lp64d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifdc_zicsr_zifencei_zfh_zfa "d + zfh" ilp32d "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei_zca "zca" lp64 "$SPIKE_PATH" + +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei_zca "zca" ilp32 "$SPIKE_PATH" + +# rv32-only zcf +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32if_zicsr_zifencei_zca_zcf "zcf" ilp32f "$SPIKE_PATH" +# zcd +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv64 rv64ifd_zicsr_zifencei_zca_zcd "zca + zcd - zfa - zfh" lp64d "$SPIKE_PATH" +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code-f.s riscv32 rv32ifd_zicsr_zifencei_zca_zcd "zca + zcd - zfa - zfh" ilp32d "$SPIKE_PATH" +# zcb +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv64 rv64i_zicsr_zifencei_zca_zcb_zmmul_zba_zbb "zca + zcb" lp64 "$SPIKE_PATH" +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv32 rv32i_zicsr_zifencei_zca_zcb_zmmul_zba_zbb "zca + zcb" ilp32 "$SPIKE_PATH" +# zawrs +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv64 rv64ia_zicsr_zifencei_zawrs "zawrs + zalrsc" lp64 "$SPIKE_PATH" +"$run_test_script" "$ROOT" 3000 \ + "$CONFIGDIR"/boot-code.s riscv32 rv32ia_zicsr_zifencei_zawrs "zawrs + zalrsc" ilp32 "$SPIKE_PATH" + diff --git a/ci-tests/snippy-tests/boot-code-f.s b/ci-tests/snippy-tests/boot-code-f.s new file mode 100644 index 0000000..b30fccc --- /dev/null +++ b/ci-tests/snippy-tests/boot-code-f.s @@ -0,0 +1,41 @@ +.option norvc + +.global _entry +.global fromhost +.global tohost + +.text +_entry: + la t0, exception_handler + csrw mtvec, t0 + csrr t1, mstatus +# Setting bit number 13 (mstatus.FS) + li t3, 1 + slli t3, t3, 13 + or t1, t1, t3 + csrw mstatus, t1 + la t0, SNIPPY_ENTRY + jalr t0 + +exception_handler: + csrr x10, mcause +# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize. +# Otherwise it's not the expected behavior and we go into an infinite loop. + li x11, 3 + beq x10, x11, exit + j infinite_loop + +exit: + li ra, 1 + la sp, tohost + sw ra, 0(sp) + +infinite_loop: + j infinite_loop + +.balign 64 +tohost: +.8byte 0x0 +.balign 64 +fromhost: +.8byte 0x0 diff --git a/ci-tests/snippy-tests/boot-code-vf.s b/ci-tests/snippy-tests/boot-code-vf.s new file mode 100644 index 0000000..8c32c0f --- /dev/null +++ b/ci-tests/snippy-tests/boot-code-vf.s @@ -0,0 +1,45 @@ +.option norvc + +.global _entry +.global fromhost +.global tohost + +.text +_entry: + la t0, exception_handler + csrw mtvec, t0 + csrr t1, mstatus +# Setting bit number 9 (mstatus.VS) + li t2, 1 + slli t2, t2, 9 + or t1, t1, t2 +# Setting bit number 13 (mstatus.FS) + li t3, 1 + slli t3, t3, 13 + or t1, t1, t3 + csrw mstatus, t1 + la t0, SNIPPY_ENTRY + jalr t0 + +exception_handler: + csrr x10, mcause +# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize. +# Otherwise it's not the expected behavior and we go into an infinite loop. + li x11, 3 + beq x10, x11, exit + j infinite_loop + +exit: + li ra, 1 + la sp, tohost + sd ra, 0(sp) + +infinite_loop: + j infinite_loop + +.balign 64 +tohost: +.8byte 0x0 +.balign 64 +fromhost: +.8byte 0x0 diff --git a/ci-tests/snippy-tests/boot-code.s b/ci-tests/snippy-tests/boot-code.s new file mode 100644 index 0000000..9dfae53 --- /dev/null +++ b/ci-tests/snippy-tests/boot-code.s @@ -0,0 +1,30 @@ +.option norvc +.global _entry +.global fromhost +.global tohost +.text +_entry: + la t0, exception_handler + csrw mtvec, t0 + la t0, SNIPPY_ENTRY + jalr t0 + j exit +exception_handler: + csrr x10, mcause +# In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize. +# Otherwise it's not the expected behavior and we go into an infinite loop. + li x11, 3 + beq x10, x11, exit + j infinite_loop +exit: + li ra, 1 + la sp, tohost + sw ra, 0(sp) +infinite_loop: + j infinite_loop +.balign 64 +tohost: +.8byte 0x0 +fromhost: +.8byte 0x0 + diff --git a/ci-tests/snippy-tests/generate-snippy-test.sh b/ci-tests/snippy-tests/generate-snippy-test.sh new file mode 100755 index 0000000..6e23768 --- /dev/null +++ b/ci-tests/snippy-tests/generate-snippy-test.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +set -euo pipefail + +args=("$@") + +march= +mtriple= +extensions= +ignore_regex='^$' +includes=("./sections.yaml") +num_instrs=1000 + +usage() { + { + echo "isa-tests-gen.sh [options]" + echo " --march : Target architecture [required]" + echo " --mtriple : Target triple" + echo " --extensions : Target extensions string" + echo " --include : Additional include" + echo " --num-instrs : Number of instructions to generate (default: $num_instrs)" + echo " --ignore-opcode-regex : Regex to filter out opcodes" + echo " -h, --help : Print this help message" + } >&2 +} + +while [[ $# -gt 0 ]]; do + case $1 in + --march) + shift + march=$1 + shift + ;; + --mtriple) + shift + mtriple=$1 + shift + ;; + --extensions) + shift + extensions="$1" + shift + ;; + --include) + shift + includes+=("$1") + shift + ;; + --num-instrs) + shift + num_instrs="$1" + shift + ;; + --ignore-opcode-regex) + shift + ignore_regex="$1" + shift + ;; + -h | --help) + usage + exit 0 + ;; + *) + usage + exit 1 + ;; + esac +done + +declare -a ie_args + +case $mtriple in +riscv32) + ie_args+=("-arch=riscv" "--rv32" "-riscv-ext" "$extensions") + ;; +riscv64) + ie_args+=("-arch=riscv" "--rv64" "-riscv-ext" "$extensions") + ;; +*) + echo "error: Unrecognized --mtriple" >&2 + usage + exit 1 + ;; +esac + +ie_args+=("--disable-pseudo") + +if [[ -z "$march" ]]; then + echo "error: --march hasn't been specified" >&2 + exit 1 +fi + +if ! [ -x "$(command -v llvm-ie)" ]; then + echo "error: 'llvm-ie' is not in PATH" >&2 + exit 1 +fi + +mapfile -t opcodes < <(llvm-ie "${ie_args[@]}") +filtered_opcodes=() + +for opc in "${opcodes[@]}"; do + if [[ ! "$opc" =~ $ignore_regex ]]; then + filtered_opcodes+=("$opc") + fi +done + +if [[ ${#filtered_opcodes[@]} -eq 0 ]]; then + echo "error: No opcodes matched" >&2 + exit 1 +fi + +echo "# generated with" "$(basename "$0"), to regenerate run:" +printf "# %s" "$(basename "$0")" +for arg in "${args[@]}"; do + printf " %q" "$arg" +done +printf "\n" +echo "include:" +for inc in "${includes[@]}"; do + echo " - \"$inc\"" +done + +cat <<EOF +options: + march: ${march} + mtriple: ${mtriple} + num-instrs: ${num_instrs} + model-plugin: None + entry-point: SNIPPY_ENTRY + riscv-init-fregs-from-memory: true +histogram: +EOF +for opc in "${filtered_opcodes[@]}"; do + echo " - [$opc, 1.0]" +done + +printf "\n" diff --git a/ci-tests/snippy-tests/linker-entry.ld b/ci-tests/snippy-tests/linker-entry.ld new file mode 100644 index 0000000..f94c957 --- /dev/null +++ b/ci-tests/snippy-tests/linker-entry.ld @@ -0,0 +1 @@ +ENTRY(_entry) diff --git a/ci-tests/snippy-tests/sections.yaml b/ci-tests/snippy-tests/sections.yaml new file mode 100644 index 0000000..bc5a399 --- /dev/null +++ b/ci-tests/snippy-tests/sections.yaml @@ -0,0 +1,22 @@ +sections: + - name: 0 + VMA: 0x80000000 + SIZE: 0x10000 + LMA: 0x80000000 + ACCESS: r + - name: 1 + VMA: 0x80020000 + SIZE: 0x20000 + LMA: 0x80020000 + ACCESS: rx + - name: 2 + VMA: 0x80040000 + SIZE: 0x10000 + LMA: 0x80040000 + ACCESS: rw + - name: stack + VMA: 0x80050000 + SIZE: 0x10000 + LMA: 0x80050000 + ACCESS: rw + diff --git a/ci-tests/test-customext.cc b/ci-tests/test-customext.cc index 77c739f..90cdb35 100644 --- a/ci-tests/test-customext.cc +++ b/ci-tests/test-customext.cc @@ -46,7 +46,7 @@ struct xslliuw_dummy_t : public extension_t { } }; -REGISTER_EXTENSION(dummyslliuw, []() { return new xslliuw_dummy_t; }) +REGISTER_EXTENSION(dummyslliuw, []() { static xslliuw_dummy_t ext; return &ext; }) // Copied from spike main. // TODO: This should really be provided in libriscv diff --git a/ci-tests/test-spike b/ci-tests/test-spike index 36b748a..ebec4c6 100755 --- a/ci-tests/test-spike +++ b/ci-tests/test-spike @@ -1,25 +1,51 @@ #!/bin/bash set -e -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +ROOT=`git rev-parse --show-toplevel` +NPROCS="$(nproc 2> /dev/null || sysctl -n hw.ncpu)" +HERE=`pwd` +CI="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +INSTALL=$HERE/install +BUILD=$HERE/build +RUN=$HERE/run -cd build +# build pk +rm -rf $BUILD/pk +mkdir $BUILD/pk +cd $BUILD/pk +git clone https://github.com/riscv-software-src/riscv-pk.git +riscv-pk/configure --host=riscv64-linux-gnu --prefix=$INSTALL +make -j$NPROCS +make install -# run a program and check for correct output -mkdir run -cd run -wget https://github.com/riscv-software-src/riscv-isa-sim/releases/download/dummy-tag-for-ci-storage/spike-ci.tar -tar xf spike-ci.tar -time ../install/bin/spike --isa=rv64gc pk hello | grep "Hello, world! Pi is approximately 3.141588." +# build tests +rm -rf $RUN +mkdir -p $RUN +cd $RUN +riscv64-linux-gnu-gcc -static -O2 -o hello $CI/hello.c +riscv64-linux-gnu-gcc -static -O2 -o dummy-slliuw $CI/dummy-slliuw.c +riscv64-linux-gnu-gcc -static -O2 -o customcsr $CI/customcsr.c +riscv64-linux-gnu-gcc -static -O2 -o atomics $CI/atomics.c + +# run snippy-based tests +wget https://github.com/syntacore/snippy/releases/download/snippy-2.1/snippy-x86_64-linux.tar.xz +tar xf snippy-x86_64-linux.tar.xz + +# test that snippy runs +bin/llvm-snippy --version | grep "Snippy version: 2.1.0" +PATH="$PATH:$RUN/bin" "$ROOT"/ci-tests/run-snippy-tests.sh "$RUN" "$ROOT"/ci-tests/snippy-tests "$INSTALL"/bin/spike # check that including sim.h in an external project works -g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o test-libriscv -g++ -std=c++2a -I../install/include -L../install/lib $DIR/test-customext.cc -lriscv -o test-customext -g++ -std=c++2a -I../install/include -L../install/lib $DIR/custom-csr.cc -lriscv -o test-custom-csr +g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o test-libriscv +g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/test-customext.cc -lriscv -o test-customext +g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/custom-csr.cc -lriscv -o test-custom-csr # check that all installed headers are functional -g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o /dev/null -include ../install-hdrs-list.h +g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o /dev/null -include $BUILD/spike/install-hdrs-list.h -LD_LIBRARY_PATH=../install/lib ./test-libriscv pk hello| grep "Hello, world! Pi is approximately 3.141588." -LD_LIBRARY_PATH=../install/lib ./test-customext pk dummy-slliuw | grep "Executed successfully" -LD_LIBRARY_PATH=../install/lib ./test-custom-csr pk customcsr | grep "Executed successfully" +# run tests +time $INSTALL/bin/spike --isa=rv64gc $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588." +$INSTALL/bin/spike --log-commits --isa=rv64gc $BUILD/pk/pk atomics 2> /dev/null | grep "First atomic counter is 1000, second is 100" +LD_LIBRARY_PATH=$INSTALL/lib ./test-libriscv $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588." +LD_LIBRARY_PATH=$INSTALL/lib ./test-customext $BUILD/pk/pk dummy-slliuw | grep "Executed successfully" +LD_LIBRARY_PATH=$INSTALL/lib ./test-custom-csr $BUILD/pk/pk customcsr | grep "Executed successfully" diff --git a/customext/cflush.cc b/customext/cflush.cc index c090e88..5a9d279 100644 --- a/customext/cflush.cc +++ b/customext/cflush.cc @@ -40,4 +40,4 @@ class cflush_t : public extension_t } }; -REGISTER_EXTENSION(cflush, []() { return new cflush_t; }) +REGISTER_EXTENSION(cflush, []() { static cflush_t ext; return &ext; }) diff --git a/customext/dummy_rocc.cc b/customext/dummy_rocc.cc index 6669887..bc23939 100644 --- a/customext/dummy_rocc.cc +++ b/customext/dummy_rocc.cc @@ -44,4 +44,4 @@ class dummy_rocc_t : public rocc_t reg_t acc[num_acc]; }; -REGISTER_EXTENSION(dummy_rocc, []() { return new dummy_rocc_t; }) +REGISTER_EXTENSION(dummy_rocc, []() { static dummy_rocc_t ext; return &ext; }) diff --git a/debug_rom/debug_rom.S b/debug_rom/debug_rom.S index 2d36139..378c568 100755 --- a/debug_rom/debug_rom.S +++ b/debug_rom/debug_rom.S @@ -7,6 +7,19 @@ .global entry .global exception +// This macro handles mem access with proper management of the MPRVEN +// Usage: MEMORY_ACCESS_WITH_MPRV(<your code>) +#define MEMORY_ACCESS_WITH_MPRV(...) \ + csrrci s0, CSR_DCSR, DCSR_MPRVEN; \ + andi s0, s0, DCSR_MPRVEN; \ + bnez s0, 1f; \ + __VA_ARGS__; \ + j 2f; \ +1: \ + __VA_ARGS__; \ + csrrsi zero, CSR_DCSR, DCSR_MPRVEN; \ +2: + // Entry location on ebreak, Halt, or Breakpoint // It is the same for all harts. They branch when // their GO or RESUME bit is set. @@ -30,13 +43,22 @@ _entry: // We keep checking both whether there is something the debugger wants // us to do, or whether we should resume. entry_loop: - csrr s0, CSR_MHARTID - sw s0, DEBUG_ROM_HALTED(zero) - lbu s0, DEBUG_ROM_FLAGS(s0) // 1 byte flag per hart. Only one hart advances here. + // 1 byte flag per hart. Only one hart advances here. + MEMORY_ACCESS_WITH_MPRV( + csrr s0, CSR_MHARTID; + sw s0, DEBUG_ROM_HALTED(zero); + lbu s0, DEBUG_ROM_FLAGS(s0); + ) + andi s0, s0, (1 << DEBUG_ROM_FLAG_GO) bnez s0, going - csrr s0, CSR_MHARTID - lbu s0, DEBUG_ROM_FLAGS(s0) // multiple harts can resume here + + // multiple harts can resume here + MEMORY_ACCESS_WITH_MPRV( + csrr s0, CSR_MHARTID; + lbu s0, DEBUG_ROM_FLAGS(s0); + ) + andi s0, s0, (1 << DEBUG_ROM_FLAG_RESUME) bnez s0, _resume wfi @@ -46,13 +68,23 @@ _exception: // Restore S0, which we always save to dscratch. // We need this in case the user tried an abstract write to a // non-existent CSR. - csrr s0, CSR_DSCRATCH0 - sw zero, DEBUG_ROM_EXCEPTION(zero) // Let debug module know you got an exception. + + + // Let debug module know you got an exception. + MEMORY_ACCESS_WITH_MPRV( + csrr s0, CSR_DSCRATCH0; + sw zero, DEBUG_ROM_EXCEPTION(zero); + ) + ebreak going: - csrr s0, CSR_MHARTID - sw s0, DEBUG_ROM_GOING(zero) // When debug module sees this write, the GO flag is reset. + // When debug module sees this write, the GO flag is reset. + MEMORY_ACCESS_WITH_MPRV( + csrr s0, CSR_MHARTID; + sw s0, DEBUG_ROM_GOING(zero); + ) + csrr s0, CSR_DSCRATCH0 // Restore s0 here fence fence.i @@ -61,8 +93,12 @@ going: // because jalr is special there) _resume: - csrr s0, CSR_MHARTID - sw s0, DEBUG_ROM_RESUMING(zero) // When Debug Module sees this write, the RESUME flag is reset. + // When Debug Module sees this write, the RESUME flag is reset. + MEMORY_ACCESS_WITH_MPRV( + csrr s0, CSR_MHARTID; + sw s0, DEBUG_ROM_RESUMING(zero); + ) + csrr s0, CSR_DSCRATCH0 // Restore s0 dret diff --git a/debug_rom/debug_rom.h b/debug_rom/debug_rom.h index 7edd5f6..d3d89a2 100644 --- a/debug_rom/debug_rom.h +++ b/debug_rom/debug_rom.h @@ -1,13 +1,25 @@ static const unsigned char debug_rom_raw[] = { - 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x00, 0x06, 0x6f, 0x00, 0x80, 0x03, - 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x24, 0x40, 0xf1, - 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x13, 0x74, 0x14, 0x00, - 0x63, 0x14, 0x04, 0x02, 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, - 0x13, 0x74, 0x24, 0x00, 0x63, 0x18, 0x04, 0x02, 0x73, 0x00, 0x50, 0x10, - 0x6f, 0xf0, 0x9f, 0xfd, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10, - 0x73, 0x00, 0x10, 0x00, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x22, 0x80, 0x10, - 0x73, 0x24, 0x20, 0x7b, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, - 0x67, 0x00, 0x00, 0x30, 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, + 0x6f, 0x00, 0xc0, 0x00, 0x6f, 0x00, 0x40, 0x0d, 0x6f, 0x00, 0x40, 0x07, + 0x0f, 0x00, 0xf0, 0x0f, 0x73, 0x10, 0x24, 0x7b, 0x73, 0x74, 0x08, 0x7b, + 0x13, 0x74, 0x04, 0x01, 0x63, 0x1a, 0x04, 0x00, 0x73, 0x24, 0x40, 0xf1, + 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, 0x6f, 0x00, 0x40, 0x01, + 0x73, 0x24, 0x40, 0xf1, 0x23, 0x20, 0x80, 0x10, 0x03, 0x44, 0x04, 0x40, + 0x73, 0x60, 0x08, 0x7b, 0x13, 0x74, 0x14, 0x00, 0x63, 0x10, 0x04, 0x06, + 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00, + 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, 0x6f, 0x00, 0x00, 0x01, + 0x73, 0x24, 0x40, 0xf1, 0x03, 0x44, 0x04, 0x40, 0x73, 0x60, 0x08, 0x7b, + 0x13, 0x74, 0x24, 0x00, 0x63, 0x14, 0x04, 0x06, 0x73, 0x00, 0x50, 0x10, + 0x6f, 0xf0, 0xdf, 0xf9, 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01, + 0x63, 0x18, 0x04, 0x00, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10, + 0x6f, 0x00, 0x00, 0x01, 0x73, 0x24, 0x20, 0x7b, 0x23, 0x26, 0x00, 0x10, + 0x73, 0x60, 0x08, 0x7b, 0x73, 0x00, 0x10, 0x00, 0x73, 0x74, 0x08, 0x7b, + 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00, 0x73, 0x24, 0x40, 0xf1, + 0x23, 0x22, 0x80, 0x10, 0x6f, 0x00, 0x00, 0x01, 0x73, 0x24, 0x40, 0xf1, + 0x23, 0x22, 0x80, 0x10, 0x73, 0x60, 0x08, 0x7b, 0x73, 0x24, 0x20, 0x7b, + 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x10, 0x00, 0x00, 0x67, 0x00, 0x00, 0x30, + 0x73, 0x74, 0x08, 0x7b, 0x13, 0x74, 0x04, 0x01, 0x63, 0x18, 0x04, 0x00, + 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x6f, 0x00, 0x00, 0x01, + 0x73, 0x24, 0x40, 0xf1, 0x23, 0x24, 0x80, 0x10, 0x73, 0x60, 0x08, 0x7b, 0x73, 0x24, 0x20, 0x7b, 0x73, 0x00, 0x20, 0x7b }; -static const unsigned int debug_rom_raw_len = 116; +static const unsigned int debug_rom_raw_len = 260; diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 49f2794..7b505b0 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -123,6 +123,12 @@ struct : public arg_t { struct : public arg_t { std::string to_string(insn_t insn) const { + return frm_name(insn.rm()); + } +} rm; + +struct : public arg_t { + std::string to_string(insn_t insn) const { return fpr_name[insn.rd()]; } } frd; @@ -562,6 +568,12 @@ struct : public arg_t { struct : public arg_t { std::string to_string(insn_t insn) const { + return std::to_string((int)insn.b_imm5()); + } +} b_imm5; + +struct : public arg_t { + std::string to_string(insn_t insn) const { return std::to_string((int)insn.bs()); } } bs; @@ -637,7 +649,17 @@ static void NOINLINE add_fstore_insn(disassembler_t* d, const char* name, uint32 static void NOINLINE add_xamo_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) { - d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs2, &base_only_address})); + const char *suffix[] = {"", ".rl", ".aq", ".aqrl"}; + char new_name[128]; + uint32_t new_mask = mask | (0x3 << 25); + uint32_t new_match; + + for (uint32_t idx = 0; idx < sizeof(suffix) / sizeof(suffix[0]); ++idx) { + snprintf(new_name, sizeof(new_name), "%s%s", name, suffix[idx]); + new_match = match | (idx << 25); + + d->add_insn(new disasm_insn_t(new_name, new_match, new_mask, {&xrd, &xrs2, &base_only_address})); + } } static void NOINLINE add_xlr_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) @@ -655,6 +677,11 @@ static void NOINLINE add_btype_insn(disassembler_t* d, const char* name, uint32_ d->add_insn(new disasm_insn_t(name, match, mask, {&xrs1, &xrs2, &branch_target})); } +static void NOINLINE add_bimmtype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) +{ + d->add_insn(new disasm_insn_t(name, match, mask, {&xrs1, &b_imm5, &branch_target})); +} + static void NOINLINE add_b1type_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) { const uint32_t mask_rs2 = 0x1fUL << 20; @@ -696,6 +723,11 @@ static void NOINLINE add_fx2type_insn(disassembler_t* d, const char* name, uint3 d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &frs1, &frs2})); } +static void NOINLINE add_fxrtype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) +{ + d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &frs1, &rm})); +} + static void NOINLINE add_flitype_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) { d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &fli_imm})); @@ -833,6 +865,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) #define DEFINE_PREFETCH(code) DISASM_INSN(#code, code, 0, {&store_address}) #define DEFINE_LTYPE(code) DISASM_INSN(#code, code, 0, {&xrd, &bigimm}) #define DEFINE_BTYPE(code) add_btype_insn(this, #code, match_##code, mask_##code); + #define DEFINE_BIMMTYPE(code) add_bimmtype_insn(this, #code, match_##code, mask_##code); #define DEFINE_B1TYPE(name, code) add_b1type_insn(this, name, match_##code, mask_##code); #define DEFINE_XLOAD(code) add_xload_insn(this, #code, match_##code, mask_##code); #define DEFINE_XSTORE(code) add_xstore_insn(this, #code, match_##code, mask_##code); @@ -846,6 +879,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) #define DEFINE_FR3TYPE(code) add_fr3type_insn(this, #code, match_##code, mask_##code); #define DEFINE_FXTYPE(code) add_fxtype_insn(this, #code, match_##code, mask_##code); #define DEFINE_FX2TYPE(code) add_fx2type_insn(this, #code, match_##code, mask_##code); + #define DEFINE_FXRTYPE(code) add_fxrtype_insn(this, #code, match_##code, mask_##code); #define DEFINE_FLITYPE(code) add_flitype_insn(this, #code, match_##code, mask_##code); #define DEFINE_XFTYPE(code) add_xftype_insn(this, #code, match_##code, mask_##code); #define DEFINE_XF2TYPE(code) add_xf2type_insn(this, #code, match_##code, mask_##code); @@ -1260,6 +1294,7 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) DEFINE_FR1TYPE(froundnx_d); DEFINE_FX2TYPE(fleq_d); DEFINE_FX2TYPE(fltq_d); + DEFINE_FXRTYPE(fcvtmod_w_d); if (xlen_eq(32)) { DEFINE_XF2TYPE(fmvp_d_x); @@ -1368,6 +1403,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) //DEFINE_R1TYPE(fcvt_q_h); } + if (ext_enabled(EXT_ZIBI)) { + DEFINE_BIMMTYPE(beqi) + DEFINE_BIMMTYPE(bnei) + } + if (ext_enabled('Q')) { DEFINE_FLOAD(flq) DEFINE_FSTORE(fsq) @@ -1946,6 +1986,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) #undef DISASM_VFUNARY0_INSN } + if (ext_enabled(EXT_ZVFOFP4MIN)) { + DEFINE_VECTOR_V(vfext_vf2); + } + + if (ext_enabled(EXT_ZVFOFP8MIN)) { + DEFINE_VECTOR_V(vfncvt_f_f_q); + DEFINE_VECTOR_V(vfncvt_sat_f_f_q); + DEFINE_VECTOR_V(vfncvtbf16_sat_f_f_w); + } + if (ext_enabled(EXT_ZVFBFMIN)) { DEFINE_VECTOR_V(vfncvtbf16_f_f_w); DEFINE_VECTOR_V(vfwcvtbf16_f_f_v); diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index baedc3f..930ef47 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -130,6 +130,12 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // Zvfh implies Zfhmin extension_table[EXT_ZFHMIN] = true; } + } else if (ext_str == "zvfbfa") { + extension_table[EXT_ZVFBFA] = true; + } else if (ext_str == "zvfofp4min") { + extension_table[EXT_ZVFOFP4MIN] = true; + } else if (ext_str == "zvfofp8min") { + extension_table[EXT_ZVFOFP8MIN] = true; } else if (ext_str == "zicsr") { // Spike necessarily has Zicsr, because // Zicsr is implied by the privileged architecture @@ -140,6 +146,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // HINTs encoded in base-ISA instructions are always present. } else if (ext_str == "zihintntl") { // HINTs encoded in base-ISA instructions are always present. + } else if (ext_str == "ziccid") { + extension_table[EXT_ZICCID] = true; + } else if (ext_str == "ziccif") { + // aligned instruction fetch is always atomic in Spike } else if (ext_str == "zaamo") { extension_table[EXT_ZAAMO] = true; } else if (ext_str == "zalrsc") { @@ -201,6 +211,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZCMP] = true; } else if (ext_str == "zcmt") { extension_table[EXT_ZCMT] = true; + } else if (ext_str == "zibi") { + extension_table[EXT_ZIBI] = true; } else if (ext_str == "zk") { extension_table[EXT_ZBKB] = true; extension_table[EXT_ZBKC] = true; @@ -239,6 +251,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SMEPMP] = true; } else if (ext_str == "smstateen") { extension_table[EXT_SMSTATEEN] = true; + } else if (ext_str == "smpmpmt") { + extension_table[EXT_SMPMPMT] = true; } else if (ext_str == "smrnmi") { extension_table[EXT_SMRNMI] = true; } else if (ext_str == "sscofpmf") { @@ -253,6 +267,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SVPBMT] = true; } else if (ext_str == "svinval") { extension_table[EXT_SVINVAL] = true; + } else if (ext_str == "svukte") { + if (max_xlen != 64) + bad_isa_string(str, "'svukte' requires RV64"); + extension_table[EXT_SVUKTE] = true; } else if (ext_str == "zfa") { extension_table[EXT_ZFA] = true; } else if (ext_str == "zicbom") { @@ -272,7 +290,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZILSD] = true; } else if (ext_str == "zclsd") { extension_table[EXT_ZCLSD] = true; + } else if (ext_str == "zvkb") { + extension_table[EXT_ZVKB] = true; } else if (ext_str == "zvbb") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; } else if (ext_str == "zvbc") { extension_table[EXT_ZVBC] = true; @@ -283,15 +304,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } else if (ext_str == "zvkg") { extension_table[EXT_ZVKG] = true; } else if (ext_str == "zvkn") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvknc") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvkng") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKNED] = true; @@ -303,15 +327,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } else if (ext_str == "zvknhb") { extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvks") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvksc") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvksg") { + extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKSED] = true; @@ -322,6 +349,24 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvqdotq") { extension_table[EXT_ZVQDOTQ] = true; + } else if (ext_str == "zvqbdot8i") { + extension_table[EXT_ZVQBDOT8I] = true; + } else if (ext_str == "zvqbdot16i") { + extension_table[EXT_ZVQBDOT16I] = true; + } else if (ext_str == "zvfqbdot8f") { + extension_table[EXT_ZVFQBDOT8F] = true; + } else if (ext_str == "zvfwbdot16bf") { + extension_table[EXT_ZVFWBDOT16BF] = true; + } else if (ext_str == "zvfbdot32f") { + extension_table[EXT_ZVFBDOT32F] = true; + } else if (ext_str == "zvqldot8i") { + extension_table[EXT_ZVQLDOT8I] = true; + } else if (ext_str == "zvqldot16i") { + extension_table[EXT_ZVQLDOT16I] = true; + } else if (ext_str == "zvfqldot8f") { + extension_table[EXT_ZVFQLDOT8F] = true; + } else if (ext_str == "zvfwldot16bf") { + extension_table[EXT_ZVFWLDOT16BF] = true; } else if (ext_str == "zvkt") { } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; @@ -364,6 +409,9 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, ("Invalid Zvl string: " + ext_str).c_str()); vlen = std::max(vlen, new_vlen); } else if (ext_str.substr(0, 3) == "zve") { + if (ext_str.size() != 6) { + bad_isa_string(str, ("Invalid Zve string: " + ext_str).c_str()); + } reg_t new_elen; try { new_elen = safe_stoul(ext_str.substr(3, ext_str.size() - 4)); @@ -382,10 +430,19 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) if (new_elen != 32 && new_elen != 64) bad_isa_string(str, ("Invalid Zve string: " + ext_str).c_str()); elen = std::max(elen, new_elen); + vlen = std::max(vlen, new_elen); } else if (ext_str == "ssdbltrp") { extension_table[EXT_SSDBLTRP] = true; } else if (ext_str == "smdbltrp") { extension_table[EXT_SMDBLTRP] = true; + } else if (ext_str == "smaia") { + extension_table[EXT_SMAIA] = true; + extension_table[EXT_SSAIA] = true; + extension_table[EXT_SMCSRIND] = true; + extension_table[EXT_SSCSRIND] = true; + } else if (ext_str == "ssaia") { + extension_table[EXT_SSAIA] = true; + extension_table[EXT_SSCSRIND] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { @@ -433,16 +490,32 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) bad_isa_string(str, "'Zclsd' extension requires 'Zca' and 'Zilsd' extensions"); } - if (extension_table[EXT_ZFBFMIN] && !extension_table['F']) { + if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZFHMIN]) { + extension_table[EXT_INTERNAL_ZFH_MOVE] = true; + } + + if (extension_table[EXT_ZFBFMIN] && (!extension_table['F'])) { bad_isa_string(str, "'Zfbfmin' extension requires 'F' extension"); } - if ((extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZVFBFWMA]) && !extension_table['V']) { - bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension"); + if (extension_table[EXT_ZVFBFMIN] && (vlen == 0 || !zvf)) { + bad_isa_string(str, "'Zvfbfmin' extension requires 'Zve32f' extension"); } - if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZFHMIN]) { - extension_table[EXT_INTERNAL_ZFH_MOVE] = true; + if (extension_table[EXT_ZVFBFA] && (!has_any_vector() || !extension_table[EXT_ZFBFMIN] || !get_zvf())) { + bad_isa_string(str, "'zvfbfa' extension requires at least 'Zve32f', and 'Zfbfmin'"); + } + + if (extension_table[EXT_ZVFBFWMA] && (!extension_table[EXT_ZFBFMIN] || !extension_table[EXT_ZVFBFMIN])) { + bad_isa_string(str, "'Zvfbfwma' extension requires 'Zfbfmin' and 'Zvfbfmin' extensions"); + } + + if (extension_table[EXT_ZVFOFP4MIN] && (!has_any_vector() || !get_zvf())) { + bad_isa_string(str, "'Zvfofp4min' extension requires either 'V' or 'Zve32f' extension"); + } + + if (extension_table[EXT_ZVFOFP8MIN] && (!has_any_vector() || !get_zvf())) { + bad_isa_string(str, "'Zvfofp8min' extension requires either 'V' or 'Zve32f' extension"); } if (extension_table[EXT_ZFINX] && extension_table['F']) { @@ -475,7 +548,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) } if (extension_table[EXT_ZAWRS] && !extension_table[EXT_ZALRSC]) { - bad_isa_string(str, "'Zabha' extension requires either the 'A' or the 'Zalrsc' extension"); + bad_isa_string(str, "'Zawrs' extension requires either the 'A' or the 'Zalrsc' extension"); } // When SSE is 0, Zicfiss behavior is defined by Zicmop @@ -498,11 +571,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) #endif if (vlen > 4096) { - bad_isa_string(str, "Spike does not currently support VLEN > 4096b"); - } - - if ((vlen != 0) ^ (elen != 0)) { - bad_isa_string(str, "Invalid Zvl/Zve configuration"); + bad_isa_string(str, "Spike does not support VLEN > 4096"); } if (extension_table[EXT_ZVFHMIN] && (vlen == 0 || elen == 0 || !zvf)) { diff --git a/disasm/regnames.cc b/disasm/regnames.cc index 0a7fd4d..42b1328 100644 --- a/disasm/regnames.cc +++ b/disasm/regnames.cc @@ -31,3 +31,21 @@ const char* csr_name(int which) { } return "unknown-csr"; } + +const char* frm_name(int which) { + switch (which) { + case 0: + return "rne"; + case 1: + return "rtz"; + case 2: + return "rdn"; + case 3: + return "rup"; + case 4: + return "rmm"; + case 7: + return "dyn"; + } + return "unknown-frm"; +} diff --git a/fesvr/term.cc b/fesvr/term.cc index c4cba0c..e0acff6 100644 --- a/fesvr/term.cc +++ b/fesvr/term.cc @@ -1,9 +1,13 @@ #include "term.h" +#include "common.h" #include <termios.h> #include <unistd.h> #include <poll.h> #include <signal.h> #include <stdlib.h> +#include <string.h> + +static int tcsetattr_ttou(int fd, int optional_actions, const struct termios *p); class canonical_termios_t { @@ -15,7 +19,7 @@ class canonical_termios_t { struct termios new_tios = old_tios; new_tios.c_lflag &= ~(ICANON | ECHO); - if (tcsetattr(0, TCSANOW, &new_tios) == 0) + if (tcsetattr_ttou(0, TCSANOW, &new_tios) == 0) restore_tios = true; } } @@ -23,7 +27,7 @@ class canonical_termios_t ~canonical_termios_t() { if (restore_tios) - tcsetattr(0, TCSANOW, &old_tios); + tcsetattr_ttou(0, TCSANOW, &old_tios); } private: struct termios old_tios; @@ -51,3 +55,34 @@ void canonical_terminal_t::write(char ch) if (::write(1, &ch, 1) != 1) abort(); } + +static volatile sig_atomic_t sigttou_caught; + +static void sigttou_handler(int UNUSED signum) { + sigttou_caught = 1; +} + +static int tcsetattr_ttou(int fd, int optional_actions, const struct termios *p) +{ + struct sigaction sa, old_sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = sigttou_handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGTTOU, &sa, &old_sa)) + abort(); + + sigttou_caught = 0; + + int result = tcsetattr(fd, optional_actions, p); + + if (sigttou_caught) { + sigaction(SIGTTOU, &old_sa, NULL); + return -1; + } + + if (sigaction(SIGTTOU, &old_sa, NULL)) + abort(); + + return result; +} diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index d8ddbab..41f5c3f 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -46,4 +46,11 @@ mmio_device_map_t& mmio_device_map(); std::string generate_dts(const sim_t* sim, const std::vector<std::string>& sargs) const override { return generate(sim, sargs); } \ }; device_factory_t *name##_factory = new name##_factory_t(); +#define REGISTER_BUILTIN_DEVICE(name, parse, generate) \ + class name##_factory_t : public device_factory_t { \ + public: \ + name##_t* parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& sargs) const override { return parse(fdt, sim, base, sargs); } \ + std::string generate_dts(const sim_t* sim, const std::vector<std::string>& sargs) const override { return generate(sim, sargs); } \ + }; device_factory_t *name##_factory = new name##_factory_t(); + #endif diff --git a/riscv/bloom_filter.h b/riscv/bloom_filter.h new file mode 100644 index 0000000..a3285bd --- /dev/null +++ b/riscv/bloom_filter.h @@ -0,0 +1,64 @@ +// See LICENSE for license details. + +#ifndef _RISCV_BLOOM_FILTER_H +#define _RISCV_BLOOM_FILTER_H + +#include <bitset> +#include <cstdint> + +struct simple_hash1 { + uint64_t operator()(uint64_t x) const + { + x = (x ^ (x >> 33)) * 0xff51afd7ed558ccd; + x = (x ^ (x >> 33)) * 0xc4ceb9fe1a85ec53; + return x ^ (x >> 33); + } +}; + +struct simple_hash2 { + uint64_t operator()(uint64_t x) const + { + x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; + x = (x ^ (x >> 27)) * 0x94d049b13c66a8ed; + return x ^ (x >> 31); + } +}; + +template <typename T, typename H1, typename H2, size_t M, size_t K> // M: bit array size, K: number of hash functions +class bloom_filter_t { + public: + void clear() + { + bits.reset(); + } + + void insert(T value) + { + uint64_t h1 = H1()(value); + uint64_t h2 = H2()(value); + + for (size_t i = 0; i < K; i++) { + size_t idx = (h1 + i * h2) % M; + bits[idx] = true; + } + } + + bool contains(T value) const + { + uint64_t h1 = H1()(value); + uint64_t h2 = H2()(value); + + for (size_t i = 0; i < K; i++) { + size_t idx = (h1 + i * h2) % M; + if (!bits[idx]) + return false; + } + + return true; + } + + private: + std::bitset<M> bits; +}; + +#endif diff --git a/riscv/bulknormdot.h b/riscv/bulknormdot.h new file mode 100644 index 0000000..37981ae --- /dev/null +++ b/riscv/bulknormdot.h @@ -0,0 +1,328 @@ +#ifndef _RISCV_BULKNORMDOT_H +#define _RISCV_BULKNORMDOT_H + +#include <cstdint> +#include <vector> +#include "softfloat.h" + +struct bulk_norm_out_t { + uint32_t out; + uint8_t flags; +}; + +template<typename T> +static int int_log2(T n) +{ + int res = 0; + while (n >>= 1) + res++; + return res; +} + +template<typename T> +static T shift_right_jam(T n, int amt) +{ + int width = 8 * sizeof(T); + T shifted = amt >= width ? 0 : n >> amt; + T jam_mask = amt >= width ? T(-1) : (T(1) << amt) - 1; + bool jam = (n & jam_mask) != 0; + return shifted | jam; +} + +/** Configuration description for dot product */ +class DotConfig { + public: + int n; // number of products + int guardBits; // number of guard bits + bool flushSub; // flush subnormal (input/output) to zero + DotConfig(int numProd, int numGuardBits) : n(numProd), guardBits(numGuardBits), flushSub(false) {} +}; + +const static int f32_exp_bits = 8; +const static int f32_exp_bias = (1 << (f32_exp_bits - 1)) - 1; +const static int f32_mant_bits = 23; // number of mantissa bits (excluding implicit one) +const static int f32_exp_mask = (uint32_t(1) << f32_exp_bits) - 1; +const static uint32_t f32_mant_mask = (uint32_t(1) << f32_mant_bits) - 1; + +/** Template for a floating-point format class */ +template <typename U, typename M, typename E> class FloatFormat { + virtual M mant() const = 0; + virtual M sig() const = 0; + virtual E exp() const = 0; + + virtual bool subOrZero() const = 0; + + virtual bool inf() const = 0; + virtual bool nan() const = 0; + virtual bool sigNan() const = 0; + virtual bool special() const = 0; + +public: + virtual ~FloatFormat() = default; +}; + +/** Template for an IEEE-754 floating-point format class */ +template <typename U, typename M, typename E, unsigned expWidth, unsigned mantWidth> class IEEEFloatFormat : FloatFormat<U, M, E> { +public: + U n; + IEEEFloatFormat(U _n) : n(_n) {} + IEEEFloatFormat() {} + + int bias = (1 << (expWidth - 1)) - 1; + int sigBits = mantWidth + 1; + int mant_bits = mantWidth; +public: + /* raw exponent field */ + E exp() const { return (n >> mantWidth) & ((1 << expWidth) - 1); } + + /* raw exponent field with correction for subnormal */ + E expSubFixed() const { return exp() + subOrZero(); } + + /** number sign */ + bool sign() const { return n >> (expWidth + mantWidth); } + + /** bit mask for mantissa */ + M mantMask() const { return (1 << mantWidth) - 1; } + + /** Number mantissa */ + M mant() const { return n & mantMask(); } + + /** Number significand */ + M sig() const { return mant() ^ (!subOrZero() << mantWidth);} + + /** bit mask for exponent */ + E expMask() const { return (1 << expWidth) - 1; } + + /* predicate: is the value a subnormal number or a zero */ + bool subOrZero() const { return exp() == 0; } + + /** predicate: is the value a special value (infinity or NaN) */ + virtual bool special() const { return exp() == expMask(); } + + /** predicate: is the value an infinity */ + virtual bool inf() const { return special() && mant() == 0; } + + /** predicate: is the value a NaN (Not A Number) */ + virtual bool nan() const { return special() && mant() != 0; } + + virtual bool sigNan() const { return nan() && !inf() && ( ( mant() >> (mantWidth - 1)) == 0); } + + bool isZero() const { return exp() == 0 && mant() == 0; } +}; + +class bf16_t final : public IEEEFloatFormat<uint16_t, uint8_t, uint8_t, 8, 7> { + public: + operator uint16_t() const { return n; } + + bf16_t() {} + bf16_t(uint16_t _n) : IEEEFloatFormat(_n) {} + + bf16_t flushed() const + { + if (exp() == 0) + return bf16_t(uint16_t(sign() << 15)); + return *this; + } +}; + +/** OpenCompute 8-bit Floating-point E5M2 (5-bit exponent, 2-bit mantissa) */ +class ofp8_e5m2 final : public IEEEFloatFormat<uint8_t, uint8_t, uint8_t, 5, 2> { + public: + operator uint8_t() const { return n; } + ofp8_e5m2() {} + ofp8_e5m2(uint8_t _n) : IEEEFloatFormat(_n) {} + + // OFP8 does not have signaling NaNs + bool sigNan() const { return false; } + + ofp8_e5m2 flushed() const + { + if (exp() == 0) + return ofp8_e5m2(uint8_t(sign() << 7)); + return *this; + } +}; + +/** OpenCompute 8-bit Floating-point E4M3 (4-bit exponent, 3-bit mantissa) */ +class ofp8_e4m3 final : public IEEEFloatFormat<uint8_t, uint8_t, uint8_t, 4, 3> { + public: + operator uint8_t() const { return n; } + ofp8_e4m3() {} + ofp8_e4m3(uint8_t _n) : IEEEFloatFormat(_n) {} + + // E4M3 does not have infinities + bool inf() const { return false; } + + bool nan() const { return exp() == expMask() && mant() == mantMask(); } + + bool special() const { return nan(); } + + // OFP8 does not have signaling NaNs + bool sigNan() const { return false; } + + ofp8_e4m3 flushed() const + { + if (exp() == 0) + return ofp8_e4m3(uint8_t(sign() << 7)); + return *this; + } +}; + +/** bulk-normalization dot product (without accumulation) with binary32 result + * + * The actual products of significands is provided as an argument such that the model can be used + * to match against RTL implementations with external product implementation. + * + * @param cfg dot-product configuration + * @param a left-hand-side operand array + * @param b right-hand-side operand array + * @param prod_signs array of products of significands + * + */ +template<typename ValueTypeLHS, typename ValueTypeRHS, typename SigProdType> bulk_norm_out_t bulk_norm_dot_no_mult(const DotConfig cfg, const ValueTypeLHS* a, const ValueTypeRHS* b, const SigProdType* prod_sigs) +{ + std::vector<int> approx_prod_exp(cfg.n); + std::vector<int> flushed_prods(cfg.n); + + bool any_pos_inf = false; + bool any_neg_inf = false; + bool any_nan = false; + bool any_invalid_nan = false; + bool any_sigNan = false; + + // extracting format parameters from the first element in each input arrays + int lhs_bias = a[0].bias; + int rhs_bias = b[0].bias; + + int lhs_mant_bits = a[0].mant_bits; + int rhs_mant_bits = b[0].mant_bits; + + for (int i = 0; i < cfg.n; i++) { + flushed_prods[i] = (cfg.flushSub && (a[i].subOrZero() || b[i].subOrZero())); + approx_prod_exp[i] = flushed_prods[i] ? 0 : // flush input subnormals + a[i].isZero() || b[i].isZero() ? (f32_exp_bias - (lhs_bias + rhs_bias)) : // minimalize exp of zero product + a[i].expSubFixed() + b[i].expSubFixed() + (f32_exp_bias - (lhs_bias + rhs_bias)); + + bool either_inf = a[i].inf() || b[i].inf(); + any_pos_inf |= either_inf && a[i].sign() == b[i].sign(); + any_neg_inf |= either_inf && a[i].sign() != b[i].sign(); + + any_invalid_nan |= + (a[i].inf() && ((b[i].subOrZero() && cfg.flushSub) || b[i].isZero())) || + (b[i].inf() && ((a[i].subOrZero() && cfg.flushSub) || a[i].isZero())); + + any_nan |= any_invalid_nan || a[i].nan() || b[i].nan(); + + any_sigNan |= a[i].sigNan() || b[i].sigNan(); + } + + // find largest exponent + int max_approx_prod_exp = approx_prod_exp[0]; + for (int i = 1; i < cfg.n; i++) { + max_approx_prod_exp = std::max(max_approx_prod_exp, approx_prod_exp[i]); + } + + bool acc_sign = false; // assuming the accumulator is positive + + int64_t acc = 0; + + // compute products, normalize to largest exponent, accumulate + for (int i = 0; i < cfg.n; i++) { + int prod_sign = a[i].sign() ^ b[i].sign(); + uint64_t prod_sig = uint64_t(prod_sigs[i]); // 16 to 64-bit zero extension + // align the product so the width of its fractional part is: f32_mant_bits(23) + guardBits + prod_sig <<= f32_mant_bits - lhs_mant_bits - rhs_mant_bits + cfg.guardBits; + + int shiftAmt = max_approx_prod_exp - approx_prod_exp[i]; + uint64_t shifted_sig = shift_right_jam(prod_sig, shiftAmt); + acc += flushed_prods[i]? 0 : // flush input subnormals + (prod_sign != acc_sign ? -shifted_sig : shifted_sig); + } + + // normalize result to f32 + bool sign = (acc < 0) != acc_sign; + uint64_t mag = acc < 0 ? -acc : acc; // absolute magnitude + int norm_dist = int_log2(mag); + int exp = max_approx_prod_exp - f32_mant_bits - cfg.guardBits + norm_dist; + + // fixing normalization distance for subnormal results + int sig_bits = (!cfg.flushSub && exp <= 0) ? f32_mant_bits - (1-exp) : f32_mant_bits; + sig_bits = std::max(sig_bits, 0); + uint32_t rounded_sig = shift_right_jam(uint64_t(mag) << sig_bits, norm_dist); + + bool any_inf = any_pos_inf || any_neg_inf; + bool overflow = (exp >= f32_exp_mask && mag != 0) || any_inf; + bool op_sign_inf = (any_pos_inf && any_neg_inf); + bool nan_out = any_nan || op_sign_inf; + bool overflowflag = (exp >= f32_exp_mask && mag != 0) && !any_inf && !nan_out; + + if (nan_out) { + sign = 0; + exp = f32_exp_mask; + rounded_sig = uint32_t(1) << (f32_mant_bits - 1); + } else if (overflow) { + exp = f32_exp_mask; + rounded_sig = 0; + if (any_inf) + sign = any_neg_inf; + } else if (mag == 0) { + // exact zero result + exp = 0; + } else if (exp <= 0) { + if (cfg.flushSub) { + // flush output subnormals + exp = 0; + rounded_sig = 0; + } else { + exp = 0; + // rounded_sig should have been properly denormalized previously + } + } + + bulk_norm_out_t su; + su.flags = 0; + su.out = (rounded_sig & f32_mant_mask) + | (exp << f32_mant_bits) + | (uint32_t(sign) << (f32_exp_bits + f32_mant_bits)); + + if (any_sigNan) { + su.flags |= softfloat_flag_invalid; + } + if (any_invalid_nan || op_sign_inf) { + su.flags |= softfloat_flag_invalid; + } + if (overflowflag) { + su.flags |= softfloat_flag_overflow; + } + + return su; +} + +/** bf16_t dot product (without accumulation) */ +static inline bulk_norm_out_t bulk_norm_dot_bf16(const DotConfig cfg, const bf16_t* a, const bf16_t* b) +{ + // product are extracted so that the no-mult version can be more easily matched against the RTL implementation + std::vector<uint16_t> prod_sigs(cfg.n); + + // compute products, normalize to largest exponent, accumulate + for (int i = 0; i < cfg.n; i++) { + prod_sigs[i] = a[i].sig() * (uint16_t) b[i].sig(); + } + + return bulk_norm_dot_no_mult<bf16_t, bf16_t, uint16_t>(cfg, a, b, &prod_sigs[0]); +} + +template <typename L, typename R> +bulk_norm_out_t bulk_norm_dot_ofp8(const DotConfig cfg, const L* a, const R* b) +{ + // products are extracted so that the no-mult version can be more easily matched against the RTL implementation + std::vector<uint16_t> prod_sigs(cfg.n); + + // compute products, normalize to largest exponent, accumulate + for (int i = 0; i < cfg.n; i++) { + prod_sigs[i] = a[i].sig() * (uint16_t) b[i].sig(); + } + return bulk_norm_dot_no_mult<L, R, uint16_t>(cfg, a, b, &prod_sigs[0]); +} + +#endif diff --git a/riscv/cfg.cc b/riscv/cfg.cc index 2f9a229..cc39a54 100644 --- a/riscv/cfg.cc +++ b/riscv/cfg.cc @@ -47,4 +47,5 @@ cfg_t::cfg_t() explicit_hartids = false; real_time_clint = false; trigger_count = 4; + cache_blocksz = 64; } diff --git a/riscv/cfg.h b/riscv/cfg.h index 388030b..8032856 100644 --- a/riscv/cfg.h +++ b/riscv/cfg.h @@ -78,6 +78,7 @@ public: bool explicit_hartids; bool real_time_clint; reg_t trigger_count; + reg_t cache_blocksz; std::optional<abstract_sim_if_t*> external_simulator; size_t nprocs() const { return hartids.size(); } diff --git a/riscv/clint.cc b/riscv/clint.cc index 3d5c984..e16ebdd 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -145,4 +145,4 @@ std::string clint_generate_dts(const sim_t* sim, const std::vector<std::string>& return s.str(); } -REGISTER_DEVICE(clint, clint_parse_from_fdt, clint_generate_dts) +REGISTER_BUILTIN_DEVICE(clint, clint_parse_from_fdt, clint_generate_dts) diff --git a/riscv/common.h b/riscv/common.h index a354ced..b55657b 100644 --- a/riscv/common.h +++ b/riscv/common.h @@ -19,4 +19,16 @@ # define UNUSED #endif +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +#if __has_cpp_attribute(assume) +# define assume(x) [[assume(x)]] +#elif __has_builtin(__builtin_assume) +# define assume(x) __builtin_assume(x) +#else +# define assume(x) ((void) 0) +#endif + #endif diff --git a/riscv/csr_init.cc b/riscv/csr_init.cc index cabb7c2..4a05a9c 100644 --- a/riscv/csr_init.cc +++ b/riscv/csr_init.cc @@ -12,6 +12,24 @@ void state_t::add_csr(reg_t addr, const csr_t_p& csr) #define add_supervisor_csr(addr, csr) add_const_ext_csr('S', addr, csr) #define add_hypervisor_csr(addr, csr) add_ext_csr('H', addr, csr) +void state_t::add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg) +{ + // This assumes xlen is always max_xlen, which is true today (see + // mstatus_csr_t::unlogged_write()): + auto xlen = proc->get_isa().get_max_xlen(); + + const reg_t iprio0_addr = 0x30; + for (int i=0; i<16; i+=2) { + csr_t_p iprio = std::make_shared<aia_csr_t>(proc, iprio0_addr + i, 0, 0); + if (xlen == 32) { + ireg->add_ireg_proxy(iprio0_addr + i, std::make_shared<rv32_low_csr_t>(proc, iprio0_addr + i, iprio)); + ireg->add_ireg_proxy(iprio0_addr + i + 1, std::make_shared<rv32_high_csr_t>(proc, iprio0_addr + i + 1, iprio)); + } else { + ireg->add_ireg_proxy(iprio0_addr + i, iprio); + } + } +} + void state_t::csr_init(processor_t* const proc, reg_t max_isa) { // This assumes xlen is always max_xlen, which is true today (see @@ -87,8 +105,17 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) } } add_const_ext_csr(EXT_SSCOFPMF, CSR_SCOUNTOVF, std::make_shared<scountovf_csr_t>(proc, CSR_SCOUNTOVF)); - add_csr(CSR_MIE, mie = std::make_shared<mie_csr_t>(proc, CSR_MIE)); - add_csr(CSR_MIP, mip = std::make_shared<mip_csr_t>(proc, CSR_MIP)); + mie = std::make_shared<mie_csr_t>(proc, CSR_MIE); + mip = std::make_shared<mip_csr_t>(proc, CSR_MIP); + if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) { + add_csr(CSR_MIE, std::make_shared<rv32_low_csr_t>(proc, CSR_MIE, mie)); + add_csr(CSR_MIEH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIEH, mie)); + add_csr(CSR_MIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MIP, mip)); + add_csr(CSR_MIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIPH, mip)); + } else { + add_csr(CSR_MIE, mie); + add_csr(CSR_MIP, mip); + } auto sip_sie_accr = std::make_shared<generic_int_accessor_t>( this, ~MIP_HS_MASK, // read_mask @@ -116,21 +143,49 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) 1 // shiftamt ); - auto nonvirtual_sip = std::make_shared<mip_proxy_csr_t>(proc, CSR_SIP, sip_sie_accr); + nonvirtual_sip = std::make_shared<sip_csr_t>(proc, CSR_SIP, sip_sie_accr); auto vsip = std::make_shared<mip_proxy_csr_t>(proc, CSR_VSIP, vsip_vsie_accr); - add_hypervisor_csr(CSR_VSIP, vsip); - add_supervisor_csr(CSR_SIP, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip)); + auto sip = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_VSIP, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIP, vsip)); + add_hypervisor_csr(CSR_VSIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIPH, vsip)); + add_supervisor_csr(CSR_SIP, std::make_shared<rv32_low_csr_t>(proc, CSR_SIP, sip)); + add_supervisor_csr(CSR_SIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIPH, sip)); + } else { + add_hypervisor_csr(CSR_VSIP, vsip); + add_supervisor_csr(CSR_SIP, sip); + } add_hypervisor_csr(CSR_HIP, std::make_shared<mip_proxy_csr_t>(proc, CSR_HIP, hip_hie_accr)); - add_hypervisor_csr(CSR_HVIP, hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0)); + hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_HVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIP, hvip)); + add_hypervisor_csr(CSR_HVIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HVIPH, hvip)); + } else { + add_hypervisor_csr(CSR_HVIP, hvip); + } - auto nonvirtual_sie = std::make_shared<mie_proxy_csr_t>(proc, CSR_SIE, sip_sie_accr); + nonvirtual_sie = std::make_shared<sie_csr_t>(proc, CSR_SIE, sip_sie_accr); auto vsie = std::make_shared<mie_proxy_csr_t>(proc, CSR_VSIE, vsip_vsie_accr); - add_hypervisor_csr(CSR_VSIE, vsie); - add_supervisor_csr(CSR_SIE, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie)); + auto sie = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_VSIE, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIE, vsie)); + add_hypervisor_csr(CSR_VSIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIEH, vsie)); + add_supervisor_csr(CSR_SIE, std::make_shared<rv32_low_csr_t>(proc, CSR_SIE, sie)); + add_supervisor_csr(CSR_SIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIEH, sie)); + } else { + add_hypervisor_csr(CSR_VSIE, vsie); + add_supervisor_csr(CSR_SIE, sie); + } add_hypervisor_csr(CSR_HIE, std::make_shared<mie_proxy_csr_t>(proc, CSR_HIE, hip_hie_accr)); add_supervisor_csr(CSR_MEDELEG, medeleg = std::make_shared<medeleg_csr_t>(proc, CSR_MEDELEG)); - add_supervisor_csr(CSR_MIDELEG, mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG)); + mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG); + if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) { + add_supervisor_csr(CSR_MIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_MIDELEG, mideleg)); + add_supervisor_csr(CSR_MIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_MIDELEGH, mideleg)); + } else { + add_supervisor_csr(CSR_MIDELEG, mideleg); + } const reg_t counteren_mask = (proc->extension_enabled_const(EXT_ZICNTR) ? 0x7UL : 0x0) | (proc->extension_enabled_const(EXT_ZIHPM) ? 0xfffffff8ULL : 0x0); add_user_csr(CSR_MCOUNTEREN, mcounteren = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTEREN, counteren_mask, 0)); add_csr(CSR_MCOUNTINHIBIT, mcountinhibit = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTINHIBIT, counteren_mask & (~MCOUNTEREN_TIME), 0)); @@ -162,7 +217,32 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) add_hypervisor_csr(CSR_HSTATUS, hstatus = std::make_shared<hstatus_csr_t>(proc, CSR_HSTATUS)); add_hypervisor_csr(CSR_HGEIE, std::make_shared<const_csr_t>(proc, CSR_HGEIE, 0)); add_hypervisor_csr(CSR_HGEIP, std::make_shared<const_csr_t>(proc, CSR_HGEIP, 0)); - add_hypervisor_csr(CSR_HIDELEG, hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg)); + hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_HIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_HIDELEG, hideleg)); + add_hypervisor_csr(CSR_HIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HIDELEGH, hideleg)); + } else { + add_hypervisor_csr(CSR_HIDELEG, hideleg); + } + + const reg_t menvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? MENVCFG_CBCFE | MENVCFG_CBIE : 0) | + (proc->extension_enabled(EXT_ZICBOZ) ? MENVCFG_CBZE : 0) | + (proc->extension_enabled(EXT_SMNPM) ? MENVCFG_PMM : 0) | + (proc->extension_enabled(EXT_SVADU) ? MENVCFG_ADUE: 0) | + (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) | + (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0) | + (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) | + (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) | + (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)| + (proc->extension_enabled(EXT_SMCDELEG) ? MENVCFG_CDE : 0); + menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0); + if (xlen == 32) { + add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg)); + add_user_csr(CSR_MENVCFGH, std::make_shared<rv32_high_csr_t>(proc, CSR_MENVCFGH, menvcfg)); + } else { + add_user_csr(CSR_MENVCFG, menvcfg); + } + const reg_t hedeleg_mask = (1 << CAUSE_MISALIGNED_FETCH) | (1 << CAUSE_FETCH_ACCESS) | @@ -220,7 +300,14 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) auto hcontext = std::make_shared<masked_csr_t>(proc, CSR_HCONTEXT, (reg_t(1) << hcontext_length) - 1, 0); add_hypervisor_csr(CSR_HCONTEXT, hcontext); add_csr(CSR_MCONTEXT, mcontext = std::make_shared<proxy_csr_t>(proc, CSR_MCONTEXT, hcontext)); - add_csr(CSR_MSECCFG, mseccfg = std::make_shared<mseccfg_csr_t>(proc, CSR_MSECCFG)); + + mseccfg = std::make_shared<mseccfg_csr_t>(proc, CSR_MSECCFG); + if (xlen == 32) { + add_csr(CSR_MSECCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MSECCFG, mseccfg)); + add_csr(CSR_MSECCFGH, mseccfgh = std::make_shared<rv32_high_csr_t>(proc, CSR_MSECCFGH, mseccfg)); + } else { + add_csr(CSR_MSECCFG, mseccfg); + } for (int i = 0; i < max_pmp; ++i) { add_csr(CSR_PMPADDR0 + i, pmpaddr[i] = std::make_shared<pmpaddr_csr_t>(proc, CSR_PMPADDR0 + i)); @@ -242,25 +329,9 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) add_csr(CSR_MVENDORID, std::make_shared<const_csr_t>(proc, CSR_MVENDORID, 0)); add_csr(CSR_MHARTID, std::make_shared<const_csr_t>(proc, CSR_MHARTID, proc->get_id())); add_csr(CSR_MCONFIGPTR, std::make_shared<const_csr_t>(proc, CSR_MCONFIGPTR, 0)); - const reg_t menvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? MENVCFG_CBCFE | MENVCFG_CBIE : 0) | - (proc->extension_enabled(EXT_ZICBOZ) ? MENVCFG_CBZE : 0) | - (proc->extension_enabled(EXT_SMNPM) ? MENVCFG_PMM : 0) | - (proc->extension_enabled(EXT_SVADU) ? MENVCFG_ADUE: 0) | - (proc->extension_enabled(EXT_SVPBMT) ? MENVCFG_PBMTE : 0) | - (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0) | - (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) | - (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) | - (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)| - (proc->extension_enabled(EXT_SMCSRIND) ? MENVCFG_CDE : 0); - menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0); - if (xlen == 32) { - add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg)); - add_user_csr(CSR_MENVCFGH, std::make_shared<rv32_high_csr_t>(proc, CSR_MENVCFGH, menvcfg)); - } else { - add_user_csr(CSR_MENVCFG, menvcfg); - } const reg_t senvcfg_mask = (proc->extension_enabled(EXT_ZICBOM) ? SENVCFG_CBCFE | SENVCFG_CBIE : 0) | (proc->extension_enabled(EXT_ZICBOZ) ? SENVCFG_CBZE : 0) | + (proc->extension_enabled(EXT_SVUKTE) ? SENVCFG_UKTE : 0) | (proc->extension_enabled(EXT_SSNPM) ? SENVCFG_PMM : 0) | (proc->extension_enabled(EXT_ZICFILP) ? SENVCFG_LPE : 0) | (proc->extension_enabled(EXT_ZICFISS) ? SENVCFG_SSE : 0); @@ -285,7 +356,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) const reg_t sstateen0_mask = (proc->extension_enabled(EXT_ZFINX) ? SSTATEEN0_FCSR : 0) | (proc->extension_enabled(EXT_ZCMT) ? SSTATEEN0_JVT : 0) | SSTATEEN0_CS; - const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN; + const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_CSRIND | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN; const reg_t mstateen0_mask = hstateen0_mask | (proc->extension_enabled(EXT_SSQOSID) ? MSTATEEN0_PRIV114 : 0); for (int i = 0; i < 4; i++) { const reg_t mstateen_mask = i == 0 ? mstateen0_mask : MSTATEEN_HSTATEEN; @@ -321,7 +392,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) if (proc->extension_enabled_const(EXT_SSTC)) { stimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_STIMECMP, MIP_STIP); vstimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_VSTIMECMP, MIP_VSTIP); - auto virtualized_stimecmp = std::make_shared<virtualized_stimecmp_csr_t>(proc, stimecmp, vstimecmp); + auto virtualized_stimecmp = std::make_shared<virtualized_with_special_permission_csr_t>(proc, stimecmp, vstimecmp); if (xlen == 32) { add_supervisor_csr(CSR_STIMECMP, std::make_shared<rv32_low_csr_t>(proc, CSR_STIMECMP, virtualized_stimecmp)); add_supervisor_csr(CSR_STIMECMPH, std::make_shared<rv32_high_csr_t>(proc, CSR_STIMECMPH, virtualized_stimecmp)); @@ -348,20 +419,41 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) csr_t_p miselect = std::make_shared<basic_csr_t>(proc, CSR_MISELECT, 0); add_csr(CSR_MISELECT, miselect); - const reg_t mireg_csrs[] = { CSR_MIREG, CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 }; + sscsrind_reg_csr_t::sscsrind_reg_csr_t_p mireg; + add_csr(CSR_MIREG, mireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_MIREG, miselect)); + add_ireg_proxy(proc, mireg); + const reg_t mireg_csrs[] = { CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 }; for (auto csr : mireg_csrs) add_csr(csr, std::make_shared<sscsrind_reg_csr_t>(proc, csr, miselect)); } if (proc->extension_enabled_const(EXT_SSCSRIND)) { - csr_t_p vsiselect = std::make_shared<basic_csr_t>(proc, CSR_VSISELECT, 0); + csr_t_p vsiselect = std::make_shared<siselect_csr_t>(proc, CSR_VSISELECT, 0); add_hypervisor_csr(CSR_VSISELECT, vsiselect); - csr_t_p siselect = std::make_shared<basic_csr_t>(proc, CSR_SISELECT, 0); - add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_csr_t>(proc, siselect, vsiselect)); + csr_t_p siselect = std::make_shared<siselect_csr_t>(proc, CSR_SISELECT, 0); + add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_with_special_permission_csr_t>(proc, siselect, vsiselect)); + + auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_VSIREG, vsiselect); + add_hypervisor_csr(CSR_VSIREG, vsireg); - const reg_t vsireg_csrs[] = { CSR_VSIREG, CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 }; - const reg_t sireg_csrs[] = { CSR_SIREG, CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 }; + auto sireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_SIREG, siselect); + add_ireg_proxy(proc, sireg); + add_supervisor_csr(CSR_SIREG, std::make_shared<virtualized_indirect_csr_t>(proc, sireg, vsireg)); + if (proc->extension_enabled(EXT_SSCCFG) || proc->extension_enabled(EXT_SMCDELEG)) { + // case CSR_SIREG + if (proc->extension_enabled_const(EXT_ZICNTR)) { + sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle); + sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret); + } + if (proc->extension_enabled_const(EXT_ZIHPM)) { + for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++) + sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]); + } + } + + const reg_t vsireg_csrs[] = { CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 }; + const reg_t sireg_csrs[] = { CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 }; for (size_t i = 0; i < std::size(vsireg_csrs); i++) { auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, vsireg_csrs[i], vsiselect); add_hypervisor_csr(vsireg_csrs[i], vsireg); @@ -372,16 +464,6 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) // Smcdeleg if (proc->extension_enabled(EXT_SSCCFG) || proc->extension_enabled(EXT_SMCDELEG)) { switch (sireg_csrs[i]) { - case CSR_SIREG: - if (proc->extension_enabled_const(EXT_ZICNTR)) { - sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle); - sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret); - } - if (proc->extension_enabled_const(EXT_ZIHPM)) { - for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++) - sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]); - } - break; case CSR_SIREG4: if (xlen == 32) { if (proc->extension_enabled_const(EXT_ZICNTR)) { @@ -438,4 +520,44 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) const reg_t srmcfg_mask = SRMCFG_MCID | SRMCFG_RCID; add_const_ext_csr(EXT_SSQOSID, CSR_SRMCFG, std::make_shared<srmcfg_csr_t>(proc, CSR_SRMCFG, srmcfg_mask, 0)); + + mvien = std::make_shared<masked_csr_t>(proc, CSR_MVIEN, MIP_SEIP | MIP_SSIP, 0); + mvip = std::make_shared<mvip_csr_t>(proc, CSR_MVIP, 0); + if (proc->extension_enabled_const(EXT_SMAIA)) { + add_csr(CSR_MTOPI, std::make_shared<mtopi_csr_t>(proc, CSR_MTOPI)); + if (xlen == 32) { + add_supervisor_csr(CSR_MVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIEN, mvien)); + add_supervisor_csr(CSR_MVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIENH, mvien)); + add_supervisor_csr(CSR_MVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIP, mvip)); + add_supervisor_csr(CSR_MVIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIPH, mvip)); + } else { + add_supervisor_csr(CSR_MVIEN, mvien); + add_supervisor_csr(CSR_MVIP, mvip); + } + } + + hvictl = std::make_shared<aia_csr_t>(proc, CSR_HVICTL, HVICTL_VTI | HVICTL_IID | HVICTL_DPR | HVICTL_IPRIOM | HVICTL_IPRIO, 0); + vstopi = std::make_shared<vstopi_csr_t>(proc, CSR_VSTOPI); + if (proc->extension_enabled_const(EXT_SSAIA)) { // Included by EXT_SMAIA + csr_t_p nonvirtual_stopi = std::make_shared<nonvirtual_stopi_csr_t>(proc, CSR_STOPI); + add_supervisor_csr(CSR_STOPI, std::make_shared<virtualized_with_special_permission_csr_t>(proc, nonvirtual_stopi, vstopi)); + add_supervisor_csr(CSR_STOPEI, std::make_shared<inaccessible_csr_t>(proc, CSR_STOPEI)); + auto hvien = std::make_shared<aia_csr_t>(proc, CSR_HVIEN, 0, 0); + auto hviprio1 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO1, 0, 0); + auto hviprio2 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO2, 0, 0); + if (xlen == 32) { + add_hypervisor_csr(CSR_HVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIEN, hvien)); + add_hypervisor_csr(CSR_HVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIENH, hvien)); + add_hypervisor_csr(CSR_HVIPRIO1, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO1, hviprio1)); + add_hypervisor_csr(CSR_HVIPRIO1H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO1H, hviprio1)); + add_hypervisor_csr(CSR_HVIPRIO2, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO2, hviprio2)); + add_hypervisor_csr(CSR_HVIPRIO2H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO2H, hviprio2)); + } else { + add_hypervisor_csr(CSR_HVIEN, hvien); + add_hypervisor_csr(CSR_HVIPRIO1, hviprio1); + add_hypervisor_csr(CSR_HVIPRIO2, hviprio2); + } + add_hypervisor_csr(CSR_HVICTL, hvictl); + add_hypervisor_csr(CSR_VSTOPI, vstopi); + } } diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 1873f7e..914662a 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -15,6 +15,8 @@ #include "insn_macros.h" // For CSR_DCSR_V: #include "debug_defines.h" +// For ctz: +#include "arith.h" // STATE macro used by require_privilege() macro: #undef STATE @@ -119,7 +121,7 @@ bool pmpaddr_csr_t::unlogged_write(const reg_t val) noexcept { const bool locked = !lock_bypass && (cfg & PMP_L); if (pmpidx < proc->n_pmp && !locked && !next_locked_and_tor()) { - this->val = val & ((reg_t(1) << (MAX_PADDR_BITS - PMP_SHIFT)) - 1); + this->val = val & ((reg_t(1) << (proc->paddr_bits() - PMP_SHIFT)) - 1); } else return false; @@ -247,7 +249,10 @@ bool pmpcfg_csr_t::unlogged_write(const reg_t val) noexcept { if (i < proc->n_pmp) { const bool locked = (state->pmpaddr[i]->cfg & PMP_L); if (rlb || !locked) { - uint8_t cfg = (val >> (8 * (i - i0))) & (PMP_R | PMP_W | PMP_X | PMP_A | PMP_L); + uint8_t all_cfg_fields = (PMP_R | PMP_W | PMP_X | PMP_A | + (proc->extension_enabled(EXT_SMPMPMT) ? PMP_MT : 0) | + PMP_L); + uint8_t cfg = (val >> (8 * (i - i0))) & all_cfg_fields; // Drop R=0 W=1 when MML = 0 // Remove the restriction when MML = 1 if (!mml) { @@ -256,6 +261,9 @@ bool pmpcfg_csr_t::unlogged_write(const reg_t val) noexcept { // Disallow A=NA4 when granularity > 4 if (proc->lg_pmp_granularity != PMP_SHIFT && (cfg & PMP_A) == PMP_NA4) cfg |= PMP_NAPOT; + // MT value 0x3 is reserved + if (get_field(cfg, PMP_MT) == 0x3) + cfg = set_field(cfg, PMP_MT, 0); /* * Adding a rule with executable privileges that either is M-mode-only or a locked Shared-Region * is not possible and such pmpcfg writes are ignored, leaving pmpcfg unchanged. @@ -313,31 +321,31 @@ bool mseccfg_csr_t::get_sseed() const noexcept { } bool mseccfg_csr_t::unlogged_write(const reg_t val) noexcept { - if (proc->n_pmp == 0) - return false; - - // pmpcfg.L is 1 in any rule or entry (including disabled entries) - const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp, - [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } ); reg_t new_val = read(); - // When RLB is 0 and pmplock_recorded, RLB is locked to 0. - // Otherwise set the RLB bit according val - if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) { - new_val &= ~MSECCFG_RLB; - new_val |= (val & MSECCFG_RLB); - } + if (proc->n_pmp != 0) { + // pmpcfg.L is 1 in any rule or entry (including disabled entries) + const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp, + [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } ); + + // When RLB is 0 and pmplock_recorded, RLB is locked to 0. + // Otherwise set the RLB bit according val + if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) { + new_val &= ~MSECCFG_RLB; + new_val |= (val & MSECCFG_RLB); + } - new_val |= (val & MSECCFG_MMWP); //MMWP is sticky - new_val |= (val & MSECCFG_MML); //MML is sticky + new_val |= (val & MSECCFG_MMWP); //MMWP is sticky + new_val |= (val & MSECCFG_MML); //MML is sticky + + proc->get_mmu()->flush_tlb(); + } if (proc->extension_enabled(EXT_ZKR)) { uint64_t mask = MSECCFG_USEED | MSECCFG_SSEED; new_val = (new_val & ~mask) | (val & mask); } - proc->get_mmu()->flush_tlb(); - if (proc->extension_enabled(EXT_ZICFILP)) { new_val &= ~MSECCFG_MLPE; new_val |= (val & MSECCFG_MLPE); @@ -423,7 +431,7 @@ reg_t cause_csr_t::read() const noexcept { // implement class base_status_csr_t base_status_csr_t::base_status_csr_t(processor_t* const proc, const reg_t addr): csr_t(proc, addr), - has_page(proc->extension_enabled_const('S') && proc->supports_impl(IMPL_MMU)), + has_page(proc->extension_enabled_const('S') && proc->has_mmu()), sstatus_write_mask(compute_sstatus_write_mask()), sstatus_read_mask(sstatus_write_mask | SSTATUS_UBE | SSTATUS_UXL | (proc->get_const_xlen() == 32 ? SSTATUS32_SD : SSTATUS64_SD)) { @@ -441,7 +449,7 @@ reg_t base_status_csr_t::compute_sstatus_write_mask() const noexcept { | (has_fs ? SSTATUS_FS : 0) | (proc->any_custom_extensions() ? SSTATUS_XS : 0) | (has_vs ? SSTATUS_VS : 0) - | (proc->extension_enabled(EXT_ZICFILP) ? SSTATUS_SPELP : 0) + | (proc->extension_enabled('S') && proc->extension_enabled(EXT_ZICFILP) ? SSTATUS_SPELP : 0) | (proc->extension_enabled(EXT_SSDBLTRP) ? SSTATUS_SDT : 0) ; } @@ -536,11 +544,16 @@ mstatus_csr_t::mstatus_csr_t(processor_t* const proc, const reg_t addr): val(compute_mstatus_initial_value()) { } +reg_t mstatus_csr_t::read() const noexcept { + return val & ~reg_t(state->menvcfg->read() & MENVCFG_DTE ? 0 : MSTATUS_SDT); +} + bool mstatus_csr_t::unlogged_write(const reg_t val) noexcept { const bool has_mpv = proc->extension_enabled('H'); const bool has_gva = has_mpv; + const reg_t adj_write_mask = sstatus_write_mask & ~reg_t(state->menvcfg->read() & MENVCFG_DTE ? 0 : SSTATUS_SDT); - const reg_t mask = sstatus_write_mask + const reg_t mask = adj_write_mask | MSTATUS_MIE | MSTATUS_MPIE | (proc->extension_enabled('U') ? MSTATUS_MPRV : 0) | MSTATUS_MPP | MSTATUS_TW @@ -549,13 +562,12 @@ bool mstatus_csr_t::unlogged_write(const reg_t val) noexcept { | (has_gva ? MSTATUS_GVA : 0) | (has_mpv ? MSTATUS_MPV : 0) | (proc->extension_enabled(EXT_SMDBLTRP) ? MSTATUS_MDT : 0) - | (proc->extension_enabled(EXT_ZICFILP) ? (MSTATUS_SPELP | MSTATUS_MPELP) : 0) - | (proc->extension_enabled(EXT_SSDBLTRP) ? SSTATUS_SDT : 0) + | (proc->extension_enabled(EXT_ZICFILP) ? (MSTATUS_MPELP | (proc->extension_enabled('S') ? MSTATUS_SPELP : 0)) : 0) ; const reg_t requested_mpp = proc->legalize_privilege(get_field(val, MSTATUS_MPP)); const reg_t adjusted_val = set_field(val, MSTATUS_MPP, requested_mpp); - reg_t new_mstatus = (read() & ~mask) | (adjusted_val & mask); + reg_t new_mstatus = (this->val & ~mask) | (adjusted_val & mask); new_mstatus = (new_mstatus & MSTATUS_MDT) ? (new_mstatus & ~MSTATUS_MIE) : new_mstatus; new_mstatus = (new_mstatus & MSTATUS_SDT) ? (new_mstatus & ~MSTATUS_SIE) : new_mstatus; maybe_flush_tlb(new_mstatus); @@ -639,6 +651,22 @@ reg_t rv32_high_csr_t::written_value() const noexcept { return (orig->written_value() >> 32) & 0xffffffffU; } +aia_rv32_high_csr_t::aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig): + rv32_high_csr_t(proc, addr, orig) { +} + +void aia_rv32_high_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + rv32_high_csr_t::verify_permissions(insn, write); +} + // implement class sstatus_csr_t sstatus_csr_t::sstatus_csr_t(processor_t* const proc, sstatus_proxy_csr_t_p orig, vsstatus_csr_t_p virt): virtualized_csr_t(proc, orig, virt), @@ -708,17 +736,18 @@ bool misa_csr_t::unlogged_write(const reg_t val) noexcept { const bool prev_h = old_misa & (1L << ('H' - 'A')); const reg_t new_misa = (adjusted_val & write_mask) | (old_misa & ~write_mask); const bool new_h = new_misa & (1L << ('H' - 'A')); + const bool new_v = proc->get_isa().has_any_vector(); proc->set_extension_enable(EXT_ZCA, (new_misa & (1L << ('C' - 'A'))) || !proc->get_isa().extension_enabled('C')); - proc->set_extension_enable(EXT_ZCF, (new_misa & (1L << ('F' - 'A'))) && proc->extension_enabled(EXT_ZCA)); + proc->set_extension_enable(EXT_ZCF, (new_misa & (1L << ('F' - 'A'))) && proc->extension_enabled(EXT_ZCA) && proc->get_xlen() == 32); proc->set_extension_enable(EXT_ZCD, (new_misa & (1L << ('D' - 'A'))) && proc->extension_enabled(EXT_ZCA)); proc->set_extension_enable(EXT_ZCB, proc->extension_enabled(EXT_ZCA)); proc->set_extension_enable(EXT_ZCMP, proc->extension_enabled(EXT_ZCA)); proc->set_extension_enable(EXT_ZCMT, proc->extension_enabled(EXT_ZCA)); proc->set_extension_enable(EXT_ZFH, new_misa & (1L << ('F' - 'A'))); proc->set_extension_enable(EXT_ZFHMIN, new_misa & (1L << ('F' - 'A'))); - proc->set_extension_enable(EXT_ZVFH, (new_misa & (1L << ('V' - 'A'))) && proc->extension_enabled(EXT_ZFHMIN)); - proc->set_extension_enable(EXT_ZVFHMIN, new_misa & (1L << ('V' - 'A'))); + proc->set_extension_enable(EXT_ZVFH, new_v && proc->get_isa().get_zvf() && proc->extension_enabled(EXT_ZFHMIN)); + proc->set_extension_enable(EXT_ZVFHMIN, new_v && proc->get_isa().get_zvf()); proc->set_extension_enable(EXT_ZAAMO, (new_misa & (1L << ('A' - 'A'))) || !proc->get_isa().extension_enabled('A')); proc->set_extension_enable(EXT_ZALRSC, (new_misa & (1L << ('A' - 'A'))) || !proc->get_isa().extension_enabled('A')); proc->set_extension_enable(EXT_ZBA, (new_misa & (1L << ('B' - 'A'))) || !proc->get_isa().extension_enabled('B')); @@ -749,6 +778,9 @@ bool misa_csr_t::unlogged_write(const reg_t val) noexcept { } } + proc->get_mmu()->flush_tlb(); + proc->build_opcode_map(); + return basic_csr_t::unlogged_write(new_misa); } @@ -781,8 +813,14 @@ mip_csr_t::mip_csr_t(processor_t* const proc, const reg_t addr): mip_or_mie_csr_t(proc, addr) { } +void mip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept { + if (!(state->mvien->read() & MIP_SEIP) && (mask & MIP_SEIP)) + state->mvip->write_with_mask(MIP_SEIP, val); // mvip.SEIP is an alias of mip.SEIP when mvien.SEIP=0 + mip_or_mie_csr_t::write_with_mask(mask & ~MIP_SEIP, val); +} + reg_t mip_csr_t::read() const noexcept { - return val | state->hvip->basic_csr_t::read(); + return val | state->hvip->basic_csr_t::read() | ((state->mvien->read() & MIP_SEIP) ? 0 : (state->mvip->basic_csr_t::read() & MIP_SEIP)); } void mip_csr_t::backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept { @@ -864,6 +902,15 @@ mip_proxy_csr_t::mip_proxy_csr_t(processor_t* const proc, const reg_t addr, gene accr(accr) { } +void mip_proxy_csr_t::verify_permissions(insn_t insn, bool write) const { + csr_t::verify_permissions(insn, write); + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && + proc->extension_enabled('S') && state->v) + throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sip when hvictl.VTI=1 + } +} + reg_t mip_proxy_csr_t::read() const noexcept { return accr->ip_read(); } @@ -879,6 +926,15 @@ mie_proxy_csr_t::mie_proxy_csr_t(processor_t* const proc, const reg_t addr, gene accr(accr) { } +void mie_proxy_csr_t::verify_permissions(insn_t insn, bool write) const { + csr_t::verify_permissions(insn, write); + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && + proc->extension_enabled('S') && state->v) + throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sie when hvictl.VTI=1 + } +} + reg_t mie_proxy_csr_t::read() const noexcept { return accr->ie_read(); } @@ -924,8 +980,11 @@ medeleg_csr_t::medeleg_csr_t(processor_t* const proc, const reg_t addr): | (1 << CAUSE_FETCH_GUEST_PAGE_FAULT) | (1 << CAUSE_LOAD_GUEST_PAGE_FAULT) | (1 << CAUSE_VIRTUAL_INSTRUCTION) - | (1 << CAUSE_STORE_GUEST_PAGE_FAULT) - ) { + | (1 << CAUSE_STORE_GUEST_PAGE_FAULT)), + mmu_exceptions(0 + | (1 << CAUSE_FETCH_PAGE_FAULT) + | (1 << CAUSE_LOAD_PAGE_FAULT) + | (1 << CAUSE_STORE_PAGE_FAULT)) { } void medeleg_csr_t::verify_permissions(insn_t insn, bool write) const { @@ -946,9 +1005,7 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept { | (1 << CAUSE_STORE_ACCESS) | (1 << CAUSE_USER_ECALL) | (1 << CAUSE_SUPERVISOR_ECALL) - | (1 << CAUSE_FETCH_PAGE_FAULT) - | (1 << CAUSE_LOAD_PAGE_FAULT) - | (1 << CAUSE_STORE_PAGE_FAULT) + | (proc->has_mmu() ? mmu_exceptions : 0) | (proc->extension_enabled('H') ? hypervisor_exceptions : 0) | (1 << CAUSE_SOFTWARE_CHECK_FAULT) | (1 << CAUSE_HARDWARE_ERROR_FAULT) @@ -956,6 +1013,38 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept { return basic_csr_t::unlogged_write((read() & ~mask) | (val & mask)); } +sip_csr_t::sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr): + mip_proxy_csr_t(proc, addr, accr) { +} + +reg_t sip_csr_t::read() const noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + return (mip_proxy_csr_t::read() & ~mask) | (state->mvip->read() & mask); +} + +bool sip_csr_t::unlogged_write(const reg_t val) noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + state->mvip->write_with_mask(mask & accr->get_ip_write_mask(), val); + return mip_proxy_csr_t::unlogged_write(val & ~mask); +} + +sie_csr_t::sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr): + mie_proxy_csr_t(proc, addr, accr), + val(0) { +} + +reg_t sie_csr_t::read() const noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + return (mie_proxy_csr_t::read() & ~mask) | (val & mask); +} + +bool sie_csr_t::unlogged_write(const reg_t val) noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + this->val = (this->val & ~mask) | (val & mask); + mie_proxy_csr_t::unlogged_write(val & ~mask); + return true; +} + // implement class masked_csr_t masked_csr_t::masked_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init): basic_csr_t(proc, addr, init), @@ -999,7 +1088,7 @@ base_atp_csr_t::base_atp_csr_t(processor_t* const proc, const reg_t addr): } bool base_atp_csr_t::unlogged_write(const reg_t val) noexcept { - const reg_t newval = proc->supports_impl(IMPL_MMU) ? compute_new_satp(val) : 0; + const reg_t newval = proc->has_mmu() ? compute_new_satp(val) : 0; if (newval != read()) proc->get_mmu()->flush_tlb(); return basic_csr_t::unlogged_write(newval); @@ -1008,23 +1097,23 @@ bool base_atp_csr_t::unlogged_write(const reg_t val) noexcept { bool base_atp_csr_t::satp_valid(reg_t val) const noexcept { if (proc->get_xlen() == 32) { switch (get_field(val, SATP32_MODE)) { - case SATP_MODE_SV32: return proc->supports_impl(IMPL_MMU_SV32); case SATP_MODE_OFF: return true; + case SATP_MODE_SV32: return proc->get_max_vaddr_bits() >= 32; default: return false; } } else { switch (get_field(val, SATP64_MODE)) { - case SATP_MODE_SV39: return proc->supports_impl(IMPL_MMU_SV39); - case SATP_MODE_SV48: return proc->supports_impl(IMPL_MMU_SV48); - case SATP_MODE_SV57: return proc->supports_impl(IMPL_MMU_SV57); case SATP_MODE_OFF: return true; + case SATP_MODE_SV39: return proc->get_max_vaddr_bits() >= 39; + case SATP_MODE_SV48: return proc->get_max_vaddr_bits() >= 48; + case SATP_MODE_SV57: return proc->get_max_vaddr_bits() >= 57; default: return false; } } } reg_t base_atp_csr_t::compute_new_satp(reg_t val) const noexcept { - reg_t rv64_ppn_mask = (reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1; + reg_t rv64_ppn_mask = (reg_t(1) << (proc->paddr_bits() - PGSHIFT)) - 1; reg_t mode_mask = proc->get_xlen() == 32 ? SATP32_MODE : SATP64_MODE; reg_t asid_mask_if_enabled = proc->get_xlen() == 32 ? SATP32_ASID : SATP64_ASID; @@ -1252,13 +1341,13 @@ bool hgatp_csr_t::unlogged_write(const reg_t val) noexcept { HGATP32_MODE | (proc->supports_impl(IMPL_MMU_VMID) ? HGATP32_VMID : 0); } else { - mask = (HGATP64_PPN & ((reg_t(1) << (MAX_PADDR_BITS - PGSHIFT)) - 1)) | + mask = (HGATP64_PPN & ((reg_t(1) << (proc->paddr_bits() - PGSHIFT)) - 1)) | (proc->supports_impl(IMPL_MMU_VMID) ? HGATP64_VMID : 0); if (get_field(val, HGATP64_MODE) == HGATP_MODE_OFF || - (proc->supports_impl(IMPL_MMU_SV39) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4) || - (proc->supports_impl(IMPL_MMU_SV48) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4) || - (proc->supports_impl(IMPL_MMU_SV57) && get_field(val, HGATP64_MODE) == HGATP_MODE_SV57X4)) + (proc->get_max_vaddr_bits() >= 39 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV39X4) || + (proc->get_max_vaddr_bits() >= 48 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV48X4) || + (proc->get_max_vaddr_bits() >= 57 && get_field(val, HGATP64_MODE) == HGATP_MODE_SV57X4)) mask |= HGATP64_MODE; } mask &= ~(reg_t)3; @@ -1347,6 +1436,7 @@ dcsr_csr_t::dcsr_csr_t(processor_t* const proc, const reg_t addr): ebreakvs(false), ebreakvu(false), v(false), + mprven(false), cause(0), ext_cause(0), cetrig(0), @@ -1376,6 +1466,7 @@ reg_t dcsr_csr_t::read() const noexcept { result = set_field(result, DCSR_STEP, step); result = set_field(result, DCSR_PRV, prv); result = set_field(result, CSR_DCSR_V, v); + result = set_field(result, DCSR_MPRVEN, mprven); result = set_field(result, DCSR_PELP, pelp); return result; } @@ -1390,6 +1481,7 @@ bool dcsr_csr_t::unlogged_write(const reg_t val) noexcept { ebreakvs = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_EBREAKVS) : false; ebreakvu = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_EBREAKVU) : false; v = proc->extension_enabled('H') ? get_field(val, CSR_DCSR_V) : false; + mprven = get_field(val, CSR_DCSR_MPRVEN); pelp = proc->extension_enabled(EXT_ZICFILP) ? static_cast<elp_t>(get_field(val, DCSR_PELP)) : elp_t::NO_LP_EXPECTED; cetrig = proc->extension_enabled(EXT_SMDBLTRP) ? get_field(val, DCSR_CETRIG) : false; @@ -1645,10 +1737,6 @@ bool stimecmp_csr_t::unlogged_write(const reg_t val) noexcept { return basic_csr_t::unlogged_write(val); } -virtualized_stimecmp_csr_t::virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt): - virtualized_csr_t(proc, orig, virt) { -} - void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { if (!(state->menvcfg->read() & MENVCFG_STCE)) { // access to (v)stimecmp with MENVCFG.STCE = 0 @@ -1664,9 +1752,18 @@ void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { } basic_csr_t::verify_permissions(insn, write); + + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && state->v && write) + throw trap_virtual_instruction(insn.bits()); + } +} + +virtualized_with_special_permission_csr_t::virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt): + virtualized_csr_t(proc, orig, virt) { } -void virtualized_stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { +void virtualized_with_special_permission_csr_t::verify_permissions(insn_t insn, bool write) const { orig_csr->verify_permissions(insn, write); } @@ -1754,20 +1851,22 @@ sscsrind_reg_csr_t::sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr } void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND)) + throw trap_illegal_instruction(insn.bits()); + } + // Don't call base verify_permission for VS registers remapped to S-mode if (insn.csr() == address) csr_t::verify_permissions(insn, write); if (proc->extension_enabled(EXT_SMSTATEEN)) { - if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND)) - throw trap_illegal_instruction(insn.bits()); - if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND)) throw trap_virtual_instruction(insn.bits()); } if (proc->extension_enabled(EXT_SMCDELEG)) { - if (insn.csr() >= CSR_VSIREG && insn.csr() <= CSR_VSIREG6) { + if (address >= CSR_VSIREG && address <= CSR_VSIREG6) { if (!state->v) { // An attempt to access any vsireg* from M or S mode raises an illegal instruction exception. throw trap_illegal_instruction(insn.bits()); @@ -1785,7 +1884,7 @@ void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const { } } } - if (insn.csr() >= CSR_SIREG && insn.csr() <= CSR_SIREG6) { + if (address >= CSR_SIREG && address <= CSR_SIREG6) { // attempts to access any sireg* when menvcfg.CDE = 0; if ((state->menvcfg->read() & MENVCFG_CDE) != MENVCFG_CDE) { if (!state->v) { @@ -1936,8 +2035,9 @@ hstatus_csr_t::hstatus_csr_t(processor_t* const proc, const reg_t addr): } bool hstatus_csr_t::unlogged_write(const reg_t val) noexcept { - const reg_t mask = HSTATUS_VTSR | HSTATUS_VTW - | (proc->supports_impl(IMPL_MMU) ? HSTATUS_VTVM : 0) + const reg_t mask = (proc->extension_enabled(EXT_SVUKTE) ? HSTATUS_HUKTE : 0) + | HSTATUS_VTSR | HSTATUS_VTW + | (proc->has_mmu() ? HSTATUS_VTVM : 0) | (proc->extension_enabled(EXT_SSNPM) ? HSTATUS_HUPMM : 0) | HSTATUS_HU | HSTATUS_SPVP | HSTATUS_SPV | HSTATUS_GVA; @@ -1973,3 +2073,176 @@ bool scntinhibit_csr_t::unlogged_write(const reg_t val) noexcept { reg_t scntinhibit_csr_t::read() const noexcept { return state->mcounteren->read() & state->mcountinhibit->read(); } + +mtopi_csr_t::mtopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +reg_t mtopi_csr_t::read() const noexcept { + reg_t enabled_interrupts = state->mip->read() & state->mie->read() & ~state->mideleg->read(); + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to M-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + reg_t identity = ctz(selected_interrupt); + return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0 +} + +bool mtopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +mvip_csr_t::mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init): + basic_csr_t(proc, addr, init) { +} + +reg_t mvip_csr_t::read() const noexcept { + const reg_t val = basic_csr_t::read(); + const reg_t mvien = state->mvien->read(); + const reg_t mip = state->mip->read(); + const reg_t menvcfg = state->menvcfg->read(); + return 0 + | (val & MIP_SEIP) + | ((menvcfg & MENVCFG_STCE) ? 0 : (mip & MIP_STIP)) + | (((mvien & MIP_SSIP) ? val : mip) & MIP_SSIP) + ; +} + +bool mvip_csr_t::unlogged_write(const reg_t val) noexcept { + if (!(state->menvcfg->read() & MENVCFG_STCE)) + state->mip->write_with_mask(MIP_STIP, val); // mvip.STIP is an alias of mip.STIP when mip.STIP is writable + if (!(state->mvien->read() & MIP_SSIP)) + state->mip->write_with_mask(MIP_SSIP, val); // mvip.SSIP is an alias of mip.SSIP when mvien.SSIP=0 + + const reg_t new_val = (val & MIP_SEIP) | (((state->mvien->read() & MIP_SSIP) ? val : basic_csr_t::read()) & MIP_SSIP); + return basic_csr_t::unlogged_write(new_val); +} + +void mvip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept { + basic_csr_t::unlogged_write((basic_csr_t::read() & ~mask) | (val & mask)); + log_write(); +} + +nonvirtual_stopi_csr_t::nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void nonvirtual_stopi_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + csr_t::verify_permissions(insn, write); +} + +reg_t nonvirtual_stopi_csr_t::read() const noexcept { + reg_t enabled_interrupts = state->nonvirtual_sip->read() & state->nonvirtual_sie->read() & ~state->hideleg->read(); + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to S-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + reg_t identity = ctz(selected_interrupt); + return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0 +} + +bool nonvirtual_stopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +inaccessible_csr_t::inaccessible_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void inaccessible_csr_t::verify_permissions(insn_t insn, bool UNUSED write) const { + if (state->v) + throw trap_virtual_instruction(insn.bits()); + else + throw trap_illegal_instruction(insn.bits()); +} + +vstopi_csr_t::vstopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void vstopi_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + csr_t::verify_permissions(insn, write); +} + +reg_t vstopi_csr_t::read() const noexcept { + reg_t hvictl = state->hvictl->read(); + bool vti = hvictl & HVICTL_VTI; + reg_t iid = get_field(hvictl, HVICTL_IID); + bool dpr = hvictl & HVICTL_DPR; + bool ipriom = hvictl & HVICTL_IPRIOM; + reg_t iprio = get_field(hvictl, HVICTL_IPRIO); + + reg_t enabled_interrupts = state->mip->read() & state->mie->read() & state->hideleg->read(); + enabled_interrupts >>= 1; // VSSIP -> SSIP, etc + reg_t vgein = get_field(state->hstatus->read(), HSTATUS_VGEIN); + reg_t virtual_sei_priority = (vgein == 0 && iid == IRQ_S_EXT && iprio != 0) ? iprio : 255; // vstopi.IPRIO is 255 for priority number 256 + + reg_t identity, priority; + if (vti) { + if (!(enabled_interrupts & MIP_SEIP) && iid == IRQ_S_EXT) + return 0; + + identity = ((enabled_interrupts & MIP_SEIP) && (iid == IRQ_S_EXT || dpr)) ? IRQ_S_EXT : iid; + priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : ((iprio != 0 || !dpr) ? iprio : 255); + } else { + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to VS-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + identity = ctz(selected_interrupt); + priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : 255; // vstopi.IPRIO is 255 for interrupt with default priority lower than VSEI + } + return set_field((reg_t)(ipriom ? priority : 1), MTOPI_IID, identity); +} + +bool vstopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +siselect_csr_t::siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init): + basic_csr_t(proc, addr, init) { +} + +void siselect_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND)) + throw trap_virtual_instruction(insn.bits()); + } + + basic_csr_t::verify_permissions(insn, write); +} + +aia_csr_t::aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init): + masked_csr_t(proc, addr, mask, init) { +} + +void aia_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + basic_csr_t::verify_permissions(insn, write); +} diff --git a/riscv/csrs.h b/riscv/csrs.h index 33ac33e..b1d5a3b 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -255,9 +255,7 @@ class mstatus_csr_t final: public base_status_csr_t { public: mstatus_csr_t(processor_t* const proc, const reg_t addr); - reg_t read() const noexcept override { - return val; - } + reg_t read() const noexcept override; protected: virtual bool unlogged_write(const reg_t val) noexcept override; @@ -301,6 +299,12 @@ class rv32_high_csr_t: public csr_t { csr_t_p orig; }; +class aia_rv32_high_csr_t: public rv32_high_csr_t { + public: + aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; + // sstatus.sdt is read_only 0 when menvcfg.dte = 0 class sstatus_proxy_csr_t final: public base_status_csr_t { public: @@ -356,7 +360,7 @@ class mip_or_mie_csr_t: public csr_t { mip_or_mie_csr_t(processor_t* const proc, const reg_t addr); virtual reg_t read() const noexcept override; - void write_with_mask(const reg_t mask, const reg_t val) noexcept; + virtual void write_with_mask(const reg_t mask, const reg_t val) noexcept; protected: virtual bool unlogged_write(const reg_t val) noexcept override final; @@ -371,6 +375,8 @@ class mip_csr_t: public mip_or_mie_csr_t { mip_csr_t(processor_t* const proc, const reg_t addr); virtual reg_t read() const noexcept override final; + void write_with_mask(const reg_t mask, const reg_t val) noexcept override; + // Does not log. Used by external things (clint) that wiggle bits in mip. void backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept; private: @@ -406,6 +412,7 @@ class generic_int_accessor_t { void ip_write(const reg_t val) noexcept; reg_t ie_read() const noexcept; void ie_write(const reg_t val) noexcept; + reg_t get_ip_write_mask() { return ip_write_mask; } private: state_t* const state; const reg_t read_mask; @@ -423,10 +430,10 @@ typedef std::shared_ptr<generic_int_accessor_t> generic_int_accessor_t_p; class mip_proxy_csr_t: public csr_t { public: mip_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual void verify_permissions(insn_t insn, bool write) const override; virtual reg_t read() const noexcept override; protected: virtual bool unlogged_write(const reg_t val) noexcept override; - private: generic_int_accessor_t_p accr; }; @@ -434,6 +441,7 @@ class mip_proxy_csr_t: public csr_t { class mie_proxy_csr_t: public csr_t { public: mie_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual void verify_permissions(insn_t insn, bool write) const override; virtual reg_t read() const noexcept override; protected: virtual bool unlogged_write(const reg_t val) noexcept override; @@ -458,6 +466,25 @@ class medeleg_csr_t: public basic_csr_t { virtual bool unlogged_write(const reg_t val) noexcept override; private: const reg_t hypervisor_exceptions; + const reg_t mmu_exceptions; +}; + +class sip_csr_t: public mip_proxy_csr_t { + public: + sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual reg_t read() const noexcept override; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + +class sie_csr_t: public mie_proxy_csr_t { + public: + sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual reg_t read() const noexcept override; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; + private: + reg_t val; }; // For CSRs with certain bits hardwired @@ -697,6 +724,7 @@ class dcsr_csr_t: public csr_t { bool ebreakvs; bool ebreakvu; bool v; + bool mprven; uint8_t cause; uint8_t ext_cause; bool cetrig; @@ -805,9 +833,9 @@ class stimecmp_csr_t: public basic_csr_t { reg_t intr_mask; }; -class virtualized_stimecmp_csr_t: public virtualized_csr_t { +class virtualized_with_special_permission_csr_t: public virtualized_csr_t { public: - virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt); + virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt); virtual void verify_permissions(insn_t insn, bool write) const override; }; @@ -909,4 +937,63 @@ class scntinhibit_csr_t: public basic_csr_t { virtual bool unlogged_write(const reg_t val) noexcept override; }; +class mtopi_csr_t: public csr_t { + public: + mtopi_csr_t(processor_t* const proc, const reg_t addr); + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class mvip_csr_t : public basic_csr_t { + public: + mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init); + reg_t read() const noexcept override; + + void write_with_mask(const reg_t mask, const reg_t val) noexcept; + + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + +typedef std::shared_ptr<mvip_csr_t> mvip_csr_t_p; + +class nonvirtual_stopi_csr_t: public csr_t { + public: + nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class inaccessible_csr_t: public csr_t { + public: + inaccessible_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + reg_t read() const noexcept override { return 0; } + protected: + bool unlogged_write(const reg_t UNUSED val) noexcept override { return false; } +}; + +class vstopi_csr_t: public csr_t { + public: + vstopi_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class siselect_csr_t: public basic_csr_t { + public: + siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; + +class aia_csr_t: public masked_csr_t { + public: + aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; #endif diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index a89a4ff..410e0b3 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -1,4 +1,8 @@ +#include <algorithm> +#include <array> #include <cassert> +#include <iterator> +#include <limits> #include "simif.h" #include "devices.h" @@ -32,6 +36,25 @@ static unsigned field_width(unsigned n) ///////////////////////// debug_module_t +static bool region_descriptor_comparator(const region_descriptor &lhs, + const region_descriptor &rhs) { + return lhs.addr < rhs.addr; +} + +template <typename It> +static bool has_intersection(It begin, It end) { + assert(std::is_sorted(begin, end, region_descriptor_comparator)); + + // If current interval's end > next interval's start, they intersect + auto intersecion = + std::adjacent_find(begin, end, [](const auto &lhs, const auto &rhs) { + assert(std::numeric_limits<reg_t>::max() - lhs.addr >= lhs.len); + return lhs.addr + lhs.len > rhs.addr; + }); + + return intersecion != end; +} + debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config) : config(config), program_buffer_bytes((config.support_impebreak ? 4 : 0) + 4*config.progbufsize), @@ -57,11 +80,18 @@ debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config exit(1); } + constexpr unsigned max_data_reg = 12; + constexpr unsigned min_data_reg = 1; + if (config.datacount < min_data_reg || config.datacount > max_data_reg) { + fprintf(stderr, "dm-datacount must be between 1 and 12 (got %u)\n", config.datacount); + exit(1); + } + + dmdata.resize(config.datacount * dmdata_reg_size); program_buffer = new uint8_t[program_buffer_bytes]; memset(debug_rom_flags, 0, sizeof(debug_rom_flags)); memset(program_buffer, 0, program_buffer_bytes); - memset(dmdata, 0, sizeof(dmdata)); if (config.support_impebreak) { program_buffer[4*config.progbufsize] = ebreak(); @@ -78,6 +108,20 @@ debug_module_t::debug_module_t(simif_t *sim, const debug_module_config_t &config hart_available_state[i] = true; } + debug_memory_regions = { + region_descriptor{DEBUG_ROM_ENTRY, debug_rom_raw_len, debug_rom_raw}, + region_descriptor{DEBUG_ROM_WHERETO, sizeof(debug_rom_whereto), debug_rom_whereto}, + region_descriptor{DEBUG_ROM_FLAGS, sizeof(debug_rom_flags), debug_rom_flags}, + region_descriptor{debug_data_start, dmdata.size(), dmdata.data()}, + region_descriptor{debug_abstract_start, sizeof(debug_abstract), debug_abstract}, + region_descriptor{debug_progbuf_start, program_buffer_bytes, program_buffer}, + }; + + std::sort(debug_memory_regions.begin(), debug_memory_regions.end(), + region_descriptor_comparator); + assert(!has_intersection(debug_memory_regions.begin(), + debug_memory_regions.end())); + reset(); } @@ -100,7 +144,7 @@ void debug_module_t::reset() dmstatus.version = 2; memset(&abstractcs, 0, sizeof(abstractcs)); - abstractcs.datacount = sizeof(dmdata) / 4; + abstractcs.datacount = config.datacount; abstractcs.progbufsize = config.progbufsize; memset(&abstractauto, 0, sizeof(abstractauto)); @@ -122,38 +166,27 @@ void debug_module_t::reset() challenge = random(); } +static bool belongs_to_range(reg_t access_addr, size_t access_len, + reg_t range_addr, size_t range_len) +{ + assert(std::numeric_limits<reg_t>::max() - access_addr >= access_len); + assert(std::numeric_limits<reg_t>::max() - range_addr >= range_len); + return access_addr >= range_addr && (access_addr < range_addr + range_len) && + ((access_addr + access_len) <= (range_addr + range_len)); +} + bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes) { addr = DEBUG_START + addr; - if (addr >= DEBUG_ROM_ENTRY && - (addr + len) <= (DEBUG_ROM_ENTRY + debug_rom_raw_len)) { - memcpy(bytes, debug_rom_raw + addr - DEBUG_ROM_ENTRY, len); - return true; - } - - if (addr >= DEBUG_ROM_WHERETO && (addr + len) <= (DEBUG_ROM_WHERETO + 4)) { - memcpy(bytes, debug_rom_whereto + addr - DEBUG_ROM_WHERETO, len); - return true; - } - - if (addr >= DEBUG_ROM_FLAGS && ((addr + len) <= DEBUG_ROM_FLAGS + 1024)) { - memcpy(bytes, debug_rom_flags + addr - DEBUG_ROM_FLAGS, len); - return true; - } - - if (addr >= debug_abstract_start && ((addr + len) <= (debug_abstract_start + sizeof(debug_abstract)))) { - memcpy(bytes, debug_abstract + addr - debug_abstract_start, len); - return true; - } + const auto interval_ptr = + std::find_if(debug_memory_regions.begin(), debug_memory_regions.end(), + [addr, len](const auto &range) { + return belongs_to_range(addr, len, range.addr, range.len); + }); - if (addr >= debug_data_start && (addr + len) <= (debug_data_start + sizeof(dmdata))) { - memcpy(bytes, dmdata + addr - debug_data_start, len); - return true; - } - - if (addr >= debug_progbuf_start && ((addr + len) <= (debug_progbuf_start + program_buffer_bytes))) { - memcpy(bytes, program_buffer + addr - debug_progbuf_start, len); + if (interval_ptr != debug_memory_regions.end()) { + std::copy_n(std::next(interval_ptr->bytes, addr - interval_ptr->addr), len, bytes); return true; } @@ -163,6 +196,15 @@ bool debug_module_t::load(reg_t addr, size_t len, uint8_t* bytes) return false; } +static bool handle_range_store(reg_t input_addr, size_t input_len, const uint8_t *bytes, + reg_t range_addr, size_t range_len, uint8_t *data) +{ + if (!belongs_to_range(input_addr, input_len, range_addr, range_len)) + return false; + std::copy_n(bytes, input_len, std::next(data, input_addr - range_addr)); + return true; +} + bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) { D( @@ -188,16 +230,11 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) addr = DEBUG_START + addr; - if (addr >= debug_data_start && (addr + len) <= (debug_data_start + sizeof(dmdata))) { - memcpy(dmdata + addr - debug_data_start, bytes, len); + if (handle_range_store(addr, len, bytes, debug_data_start, dmdata.size(), dmdata.data())) return true; - } - - if (addr >= debug_progbuf_start && ((addr + len) <= (debug_progbuf_start + program_buffer_bytes))) { - memcpy(program_buffer + addr - debug_progbuf_start, bytes, len); + if (handle_range_store(addr, len, bytes, debug_progbuf_start, program_buffer_bytes, program_buffer)) return true; - } if (addr == DEBUG_ROM_HALTED) { assert (len == 4); @@ -283,6 +320,16 @@ unsigned debug_module_t::sb_access_bits() return 8 << sbcs.sbaccess; } +uint8_t *debug_module_t::get_dmdata_checked(size_t required_size) +{ + if(dmdata.size() < required_size) { + fprintf(stderr, "dmdata size (%ld) less then required (%ld)\n", + dmdata.size(), required_size); + exit(1); + } + return dmdata.data(); +} + void debug_module_t::sb_autoincrement() { if (!sbcs.autoincrement || !config.max_sba_data_width) @@ -392,7 +439,8 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) D(fprintf(stderr, "dmi_read(0x%x) -> ", address)); if (address >= DM_DATA0 && address < DM_DATA0 + abstractcs.datacount) { unsigned i = address - DM_DATA0; - result = read32(dmdata, i); + assert(dmdata.size() >= 4); + result = read32(get_dmdata_checked(i + 1), i); if (abstractcs.busy) { result = -1; D(fprintf(stderr, "\ndmi_read(0x%02x (data[%d]) -> -1 because abstractcs.busy==true\n", address, i)); @@ -649,130 +697,152 @@ bool debug_module_t::perform_abstract_command() return true; } - if ((command >> 24) == 0) { - // register access - unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE); - bool write = get_field(command, AC_ACCESS_REGISTER_WRITE); - unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO); + auto cmdtype = get_field(command, DM_COMMAND_CMDTYPE); + constexpr decltype(cmdtype) CMDTYPE_ACCESS_REGISTER = 0ULL; + constexpr decltype(cmdtype) CMDTYPE_ACCESS_MEMORY = 2ULL; + + if (cmdtype == CMDTYPE_ACCESS_REGISTER) + return perform_abstract_register_access(); + + if (cmdtype == CMDTYPE_ACCESS_MEMORY) + return perform_abstract_memory_access(); + + abstractcs.cmderr = CMDERR_NOTSUP; + return true; +} + +bool debug_module_t::perform_abstract_register_access() +{ + // register access + unsigned size = get_field(command, AC_ACCESS_REGISTER_AARSIZE); + bool write = get_field(command, AC_ACCESS_REGISTER_WRITE); + unsigned regno = get_field(command, AC_ACCESS_REGISTER_REGNO); if (!selected_hart_state().halted) { abstractcs.cmderr = CMDERR_HALTRESUME; return true; } - unsigned i = 0; - if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + assert(size < 8); + // Check if register fit in dmdata + if ((1U << size) > dmdata.size()) { + abstractcs.cmderr = CMDERR_NOTSUP; + return true; + } + + unsigned i = 0; + if (get_field(command, AC_ACCESS_REGISTER_TRANSFER)) { + + if (is_fpu_reg(regno)) { + // Save S0 + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + // Save mstatus + write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS)); + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1)); + // Set mstatus.fs + assert((MSTATUS_FS & 0xfff) == 0); + write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12)); + write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS)); + } - if (is_fpu_reg(regno)) { - // Save S0 + if (regno < 0x1000 && config.support_abstract_csr_access) { + if (!is_fpu_reg(regno)) { write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); - // Save mstatus - write32(debug_abstract, i++, csrr(S0, CSR_MSTATUS)); - write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH1)); - // Set mstatus.fs - assert((MSTATUS_FS & 0xfff) == 0); - write32(debug_abstract, i++, lui(S0, MSTATUS_FS >> 12)); - write32(debug_abstract, i++, csrrs(ZERO, S0, CSR_MSTATUS)); } - if (regno < 0x1000 && config.support_abstract_csr_access) { - if (!is_fpu_reg(regno)) { - write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); - } - - if (write) { - switch (size) { - case 2: - write32(debug_abstract, i++, lw(S0, ZERO, debug_data_start)); - break; - case 3: - write32(debug_abstract, i++, ld(S0, ZERO, debug_data_start)); - break; - default: - abstractcs.cmderr = CMDERR_NOTSUP; - return true; - } - write32(debug_abstract, i++, csrw(S0, regno)); - - } else { - write32(debug_abstract, i++, csrr(S0, regno)); - switch (size) { - case 2: - write32(debug_abstract, i++, sw(S0, ZERO, debug_data_start)); - break; - case 3: - write32(debug_abstract, i++, sd(S0, ZERO, debug_data_start)); - break; - default: - abstractcs.cmderr = CMDERR_NOTSUP; - return true; - } - } - if (!is_fpu_reg(regno)) { - write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + if (write) { + switch (size) { + case 2: + write32(debug_abstract, i++, lw(S0, ZERO, debug_data_start)); + break; + case 3: + write32(debug_abstract, i++, ld(S0, ZERO, debug_data_start)); + break; + default: + abstractcs.cmderr = CMDERR_NOTSUP; + return true; } + write32(debug_abstract, i++, csrw(S0, regno)); - } else if (regno >= 0x1000 && regno < 0x1020) { - unsigned regnum = regno - 0x1000; - + } else { + write32(debug_abstract, i++, csrr(S0, regno)); switch (size) { case 2: - if (write) - write32(debug_abstract, i++, lw(regnum, ZERO, debug_data_start)); - else - write32(debug_abstract, i++, sw(regnum, ZERO, debug_data_start)); + write32(debug_abstract, i++, sw(S0, ZERO, debug_data_start)); break; case 3: - if (write) - write32(debug_abstract, i++, ld(regnum, ZERO, debug_data_start)); - else - write32(debug_abstract, i++, sd(regnum, ZERO, debug_data_start)); + write32(debug_abstract, i++, sd(S0, ZERO, debug_data_start)); break; default: abstractcs.cmderr = CMDERR_NOTSUP; return true; } + } + if (!is_fpu_reg(regno)) { + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } - if (regno == 0x1000 + S0 && write) { - /* - * The exception handler starts out be restoring dscratch to s0, - * which was saved before executing the abstract memory region. Since - * we just wrote s0, also make sure to write that same value to - * dscratch in case an exception occurs in a program buffer that - * might be executed later. - */ - write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); - } + } else if (regno >= 0x1000 && regno < 0x1020) { + unsigned regnum = regno - 0x1000; - } else if (regno >= 0x1020 && regno < 0x1040 && config.support_abstract_fpr_access) { - unsigned fprnum = regno - 0x1020; + switch (size) { + case 2: + if (write) + write32(debug_abstract, i++, lw(regnum, ZERO, debug_data_start)); + else + write32(debug_abstract, i++, sw(regnum, ZERO, debug_data_start)); + break; + case 3: + if (write) + write32(debug_abstract, i++, ld(regnum, ZERO, debug_data_start)); + else + write32(debug_abstract, i++, sd(regnum, ZERO, debug_data_start)); + break; + default: + abstractcs.cmderr = CMDERR_NOTSUP; + return true; + } - if (write) { - switch (size) { - case 2: - write32(debug_abstract, i++, flw(fprnum, ZERO, debug_data_start)); - break; - case 3: - write32(debug_abstract, i++, fld(fprnum, ZERO, debug_data_start)); - break; - default: - abstractcs.cmderr = CMDERR_NOTSUP; - return true; - } + if (regno == 0x1000 + S0 && write) { + /* + * The exception handler starts out be restoring dscratch to s0, + * which was saved before executing the abstract memory region. Since + * we just wrote s0, also make sure to write that same value to + * dscratch in case an exception occurs in a program buffer that + * might be executed later. + */ + write32(debug_abstract, i++, csrw(S0, CSR_DSCRATCH0)); + } - } else { - switch (size) { - case 2: - write32(debug_abstract, i++, fsw(fprnum, ZERO, debug_data_start)); - break; - case 3: - write32(debug_abstract, i++, fsd(fprnum, ZERO, debug_data_start)); - break; - default: - abstractcs.cmderr = CMDERR_NOTSUP; - return true; - } + } else if (regno >= 0x1020 && regno < 0x1040 && config.support_abstract_fpr_access) { + unsigned fprnum = regno - 0x1020; + + if (write) { + switch (size) { + case 2: + write32(debug_abstract, i++, flw(fprnum, ZERO, debug_data_start)); + break; + case 3: + write32(debug_abstract, i++, fld(fprnum, ZERO, debug_data_start)); + break; + default: + abstractcs.cmderr = CMDERR_NOTSUP; + return true; + } + + } else { + switch (size) { + case 2: + write32(debug_abstract, i++, fsw(fprnum, ZERO, debug_data_start)); + break; + case 3: + write32(debug_abstract, i++, fsd(fprnum, ZERO, debug_data_start)); + break; + default: + abstractcs.cmderr = CMDERR_NOTSUP; + return true; } + } } else if (regno >= 0xc000 && (regno & 1) == 1) { // Support odd-numbered custom registers, to allow for debugger testing. @@ -781,46 +851,146 @@ bool debug_module_t::perform_abstract_command() if (write) { // Writing V to custom register N will cause future reads of N to // return V, reads of N-1 will return V-1, etc. - custom_base = read32(dmdata, 0) - custom_number; + assert(dmdata.size() >= 4); + custom_base = read32(get_dmdata_checked(1), 0) - custom_number; } else { - write32(dmdata, 0, custom_number + custom_base); - write32(dmdata, 1, 0); + write32(get_dmdata_checked(1), 0, custom_number + custom_base); + write32(get_dmdata_checked(2), 1, 0); } return true; - } else { - abstractcs.cmderr = CMDERR_NOTSUP; - return true; - } - - if (is_fpu_reg(regno)) { - // restore mstatus - write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1)); - write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS)); - // restore s0 - write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); - } - } - - if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) { - write32(debug_abstract, i, - jal(ZERO, debug_progbuf_start - debug_abstract_start - 4 * i)); - i++; } else { - write32(debug_abstract, i++, ebreak()); + abstractcs.cmderr = CMDERR_NOTSUP; + return true; } - debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO; - rti_remaining = config.abstract_rti; - abstract_command_completed = false; + if (is_fpu_reg(regno)) { + // restore mstatus + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH1)); + write32(debug_abstract, i++, csrw(S0, CSR_MSTATUS)); + // restore s0 + write32(debug_abstract, i++, csrr(S0, CSR_DSCRATCH0)); + } + } - abstractcs.busy = true; + if (get_field(command, AC_ACCESS_REGISTER_POSTEXEC)) { + write32(debug_abstract, i, + jal(ZERO, debug_progbuf_start - debug_abstract_start - 4 * i)); + i++; } else { + write32(debug_abstract, i++, ebreak()); + } + + debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO; + rti_remaining = config.abstract_rti; + abstract_command_completed = false; + + abstractcs.busy = true; + return true; +} + +static unsigned idx(unsigned xlen) +{ + return field_width(xlen) - 3U; +} + +bool debug_module_t::perform_abstract_memory_access() { + unsigned aamsize = get_field(command, AC_ACCESS_MEMORY_AAMSIZE); + bool aampostincrement = get_field(command, AC_ACCESS_MEMORY_AAMPOSTINCREMENT); + bool aamvirtual = get_field(command, AC_ACCESS_MEMORY_AAMVIRTUAL); + bool is_write = get_field(command, AC_ACCESS_MEMORY_WRITE); + auto xlen = sim->get_harts().at(selected_hart_id())->get_xlen(); + + if (!selected_hart_state().halted) { + abstractcs.cmderr = CMDERR_HALTRESUME; + return true; + } + + if (aamsize > idx(xlen)) { abstractcs.cmderr = CMDERR_NOTSUP; + return true; } + + unsigned offset = 0; + generate_initial_sequence(aamvirtual, offset); + is_write ? handle_memory_write(xlen, aamsize, offset) + : handle_memory_read(xlen, aamsize, offset); + + if (aampostincrement) + handle_post_increment(xlen, aamsize, offset); + + generate_termination_sequence(offset); + start_command_execution(); + + abstractcs.cmderr = CMDERR_NONE; return true; } +using handle_memory_func = uint32_t (*)(unsigned rd_src, unsigned base, uint16_t offset); +using handle_mstatus_func = uint32_t(*)(unsigned rd, unsigned rs1, unsigned csr); +static constexpr std::array<handle_memory_func, 4> lx = {&lb, &lh, &lw, &ld}; +static constexpr std::array<handle_memory_func, 4> sx = {&sb, &sh, &sw, &sd}; +static constexpr std::array<handle_mstatus_func, 2> csrrx = {&csrrc, &csrrs}; + +unsigned debug_module_t::arg(unsigned xlen, unsigned idx) +{ + return debug_data_start + idx * xlen / 8; +} + +void debug_module_t::handle_memory_read(size_t xlen, unsigned aamsize, unsigned &offset) +{ + write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1))); + write32(debug_abstract, offset++, lx[aamsize](S1, S1, 0)); + write32(debug_abstract, offset++, sx[idx(xlen)](S1, ZERO, arg(xlen, 0))); +} + +void debug_module_t::handle_memory_write(size_t xlen, unsigned aamsize, unsigned &offset) +{ + // Use Arg1 as temporary storage for old mstatus value + write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1))); // Arg1 -> S1 + write32(debug_abstract, offset++, sx[idx(xlen)](S0, ZERO, arg(xlen, 1))); // S0 -> Arg1 + write32(debug_abstract, offset++, lx[idx(xlen)](S0, ZERO, arg(xlen, 0))); // Arg0 -> S0 + + write32(debug_abstract, offset++, sx[aamsize](S0, S1, 0)); + + write32(debug_abstract, offset++, lx[idx(xlen)](S0, ZERO, arg(xlen, 1))); // Restore S0 +} + +void debug_module_t::handle_post_increment(size_t xlen, unsigned aamsize, unsigned &offset) +{ + write32(debug_abstract, offset++, lx[idx(xlen)](S1, ZERO, arg(xlen, 1))); + write32(debug_abstract, offset++, addi(S1, S1, 1U << aamsize)); + write32(debug_abstract, offset++, sx[idx(xlen)](S1, ZERO, arg(xlen, 1))); +} + +void debug_module_t::generate_initial_sequence(bool aamvirtual, unsigned &offset) +{ + write32(debug_abstract, offset++, csrw(S0, CSR_DSCRATCH0)); + write32(debug_abstract, offset++, csrw(S1, CSR_DSCRATCH1)); + + // Modify mstatus.mprv and save old mstatus + write32(debug_abstract, offset++, lui(S0, MSTATUS_MPRV >> 12)); + write32(debug_abstract, offset++, csrrx[aamvirtual](S0, S0, CSR_MSTATUS)); +} + +void debug_module_t::generate_termination_sequence(unsigned &offset) +{ + // Restore mstatus + write32(debug_abstract, offset++, csrw(S0, CSR_MSTATUS)); + + write32(debug_abstract, offset++, csrr(S0, CSR_DSCRATCH0)); + write32(debug_abstract, offset++, csrr(S1, CSR_DSCRATCH1)); + write32(debug_abstract, offset++, ebreak()); +} + +void debug_module_t::start_command_execution() +{ + debug_rom_flags[selected_hart_id()] |= 1 << DEBUG_ROM_FLAG_GO; + rti_remaining = config.abstract_rti; + abstract_command_completed = false; + abstractcs.busy = true; +} + bool debug_module_t::dmi_write(unsigned address, uint32_t value) { D(fprintf(stderr, "dmi_write(0x%x, 0x%x)\n", address, value)); @@ -832,7 +1002,7 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) if (address >= DM_DATA0 && address < DM_DATA0 + abstractcs.datacount) { unsigned i = address - DM_DATA0; if (!abstractcs.busy) - write32(dmdata, address - DM_DATA0, value); + write32(get_dmdata_checked(address - DM_DATA0), address - DM_DATA0, value); if (abstractcs.busy && abstractcs.cmderr == CMDERR_NONE) { abstractcs.cmderr = CMDERR_BUSY; @@ -870,8 +1040,6 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) dmcontrol.ndmreset = get_field(value, DM_DMCONTROL_NDMRESET); if (config.support_hasel) dmcontrol.hasel = get_field(value, DM_DMCONTROL_HASEL); - else - dmcontrol.hasel = 0; dmcontrol.hartsel = get_field(value, DM_DMCONTROL_HARTSELHI) << DM_DMCONTROL_HARTSELLO_LENGTH; dmcontrol.hartsel |= get_field(value, DM_DMCONTROL_HARTSELLO); @@ -931,10 +1099,12 @@ bool debug_module_t::dmi_write(unsigned address, uint32_t value) return true; case DM_ABSTRACTAUTO: - abstractauto.autoexecprogbuf = get_field(value, - DM_ABSTRACTAUTO_AUTOEXECPROGBUF); - abstractauto.autoexecdata = get_field(value, - DM_ABSTRACTAUTO_AUTOEXECDATA); + if (config.support_abstractauto) { + abstractauto.autoexecprogbuf = get_field(value, + DM_ABSTRACTAUTO_AUTOEXECPROGBUF); + abstractauto.autoexecdata = get_field(value, + DM_ABSTRACTAUTO_AUTOEXECDATA); + } return true; case DM_SBCS: sbcs.readonaddr = get_field(value, DM_SBCS_SBREADONADDR); diff --git a/riscv/debug_module.h b/riscv/debug_module.h index 904f03e..831df10 100644 --- a/riscv/debug_module.h +++ b/riscv/debug_module.h @@ -2,7 +2,7 @@ #ifndef _RISCV_DEBUG_MODULE_H #define _RISCV_DEBUG_MODULE_H -#include <set> +#include <array> #include <vector> #include "abstract_device.h" @@ -15,6 +15,7 @@ struct debug_module_config_t { // Size of program_buffer in 32-bit words, as exposed to the rest of the // world. unsigned progbufsize = 2; + unsigned datacount = 2; unsigned max_sba_data_width = 0; bool require_authentication = false; unsigned abstract_rti = 0; @@ -23,6 +24,7 @@ struct debug_module_config_t { bool support_abstract_fpr_access = true; bool support_haltgroups = true; bool support_impebreak = true; + bool support_abstractauto = true; }; struct dmcontrol_t { @@ -99,6 +101,13 @@ struct hart_debug_state_t { uint8_t haltgroup; }; +// structure to describe mmio region +struct region_descriptor { + reg_t addr; // 1st addr in a range + size_t len; // range size + const uint8_t *bytes; // data +}; + class debug_module_t : public abstract_device_t { public: @@ -131,7 +140,6 @@ class debug_module_t : public abstract_device_t void proc_reset(unsigned id); private: - static const unsigned datasize = 2; debug_module_config_t config; // Actual size of the program buffer, which is 1 word bigger than we let on // to implement the implicit ebreak at the end. @@ -139,7 +147,7 @@ class debug_module_t : public abstract_device_t static const unsigned debug_data_start = 0x380; unsigned debug_progbuf_start; - static const unsigned debug_abstract_size = 12; + static const unsigned debug_abstract_size = 24; unsigned debug_abstract_start; // R/W this through custom registers, to allow debuggers to test that // functionality. @@ -150,7 +158,8 @@ class debug_module_t : public abstract_device_t uint8_t debug_rom_whereto[4]; uint8_t debug_abstract[debug_abstract_size * 4]; uint8_t *program_buffer; - uint8_t dmdata[datasize * 4]; + static constexpr unsigned dmdata_reg_size = 4; + std::vector<uint8_t> dmdata; std::vector<hart_debug_state_t> hart_state; uint8_t debug_rom_flags[1024]; @@ -174,6 +183,8 @@ class debug_module_t : public abstract_device_t unsigned sb_access_bits(); + uint8_t *get_dmdata_checked(size_t required_size); + dmcontrol_t dmcontrol; dmstatus_t dmstatus; abstractcs_t abstractcs; @@ -191,7 +202,20 @@ class debug_module_t : public abstract_device_t bool hart_selected(unsigned hartid) const; void reset(); + bool perform_abstract_command(); + bool perform_abstract_register_access(); + bool perform_abstract_memory_access(); + + unsigned arg(unsigned xlen, unsigned i); + + void handle_post_increment(size_t xlen, unsigned aamsize, unsigned &offset); + void handle_memory_read(size_t xlen, unsigned aamsize, unsigned &offset); + void handle_memory_write(size_t xlen, unsigned aamsize, unsigned &offset); + + void generate_initial_sequence(bool aamvirtual, unsigned &offset); + void generate_termination_sequence(unsigned &offset); + void start_command_execution(); bool abstract_command_completed; unsigned rti_remaining; @@ -206,6 +230,8 @@ class debug_module_t : public abstract_device_t bool hart_available(unsigned hart_id) const; unsigned sb_read_wait, sb_write_wait; + + std::array<region_descriptor, 6> debug_memory_regions; }; #endif diff --git a/riscv/decode.h b/riscv/decode.h index d17cb6b..0c13528 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -79,6 +79,10 @@ public: insn_t(insn_bits_t bits) : b(bits) {} insn_bits_t bits() { return b; } int length() { return insn_length(b); } + [[maybe_unused]] int64_t opcode() { return x(0, 7); } + [[maybe_unused]] int64_t funct7() { return x(25, 7); } + [[maybe_unused]] int64_t funct3() { return x(12, 3); } + [[maybe_unused]] int64_t funct2() { return x(25, 2); } int64_t i_imm() { return xs(20, 12); } int64_t shamt() { return x(20, 6); } int64_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); } @@ -95,6 +99,7 @@ public: uint64_t bs() { return x(30, 2); } // Crypto ISE - SM4/AES32 byte select. uint64_t rcon() { return x(20, 4); } // Crypto ISE - AES64 round const. + [[maybe_unused]] int64_t rvc_opcode() { return x(0, 2); } int64_t rvc_imm() { return x(2, 5) + (xs(12, 1) << 5); } int64_t rvc_zimm() { return x(2, 5) + (x(12, 1) << 5); } int64_t rvc_addi4spn_imm() { return (x(6, 1) << 2) + (x(5, 1) << 3) + (x(11, 2) << 4) + (x(7, 4) << 6); } @@ -149,6 +154,8 @@ public: uint64_t p_imm5() { return x(20, 5); } uint64_t p_imm6() { return x(20, 6); } + uint64_t b_imm5() { return (x(20, 5) == 0) ? -1ul : x(20, 5); } + uint64_t zcmp_regmask() { unsigned mask = 0; uint64_t rlist = rvc_rlist(); @@ -240,7 +247,4 @@ private: #define set_field(reg, mask, val) \ (((reg) & ~(std::remove_cv<decltype(reg)>::type)(mask)) | (((std::remove_cv<decltype(reg)>::type)(val) * ((mask) & ~((mask) << 1))) & (std::remove_cv<decltype(reg)>::type)(mask))) -#define DEBUG_START 0x0 -#define DEBUG_END (0x1000 - 1) - #endif diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 892515f..6f24799 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -146,10 +146,9 @@ do { \ #define SHAMT (insn.i_imm() & 0x3F) #define BRANCH_TARGET (pc + insn.sb_imm()) #define JUMP_TARGET (pc + insn.uj_imm()) -#define RM ({ int rm = insn.rm(); \ - if (rm == 7) rm = STATE.frm->read(); \ - if (rm > 4) throw trap_illegal_instruction(insn.bits()); \ - rm; }) +#define validate_rm(rm) ({ require(rm < 5); rm; }) +#define VFP_RM validate_rm(STATE.frm->read()) +#define RM (insn.rm() == 7 ? VFP_RM : validate_rm(insn.rm())) static inline bool is_aligned(const unsigned val, const unsigned pos) { @@ -164,7 +163,6 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define require_rv32 require(xlen == 32) #define require_extension(s) require(p->extension_enabled(s)) #define require_either_extension(A,B) require(p->extension_enabled(A) || p->extension_enabled(B)); -#define require_impl(s) require(p->supports_impl(s)) #define require_fp STATE.fflags->verify_permissions(insn, false) #define require_accelerator require(STATE.sstatus->enabled(SSTATUS_XS)) #define require_vector_vs require(p->any_vector_extensions() && STATE.sstatus->enabled(SSTATUS_VS)) @@ -226,7 +224,8 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define zext_xlen(x) zext(x, xlen) #define set_pc(x) \ - do { p->check_pc_alignment(x); \ + do { if (unlikely((x) & ~p->pc_alignment_mask())) \ + return p->throw_instruction_address_misaligned(x); \ npc = sext_xlen(x); \ } while (0) @@ -268,15 +267,21 @@ inline bfloat16_t bf16(freg_t r) { return bf16(unboxBF16(r)); } inline float32_t f32(freg_t r) { return f32(unboxF32(r)); } inline float64_t f64(freg_t r) { return f64(unboxF64(r)); } inline float128_t f128(freg_t r) { return r; } +inline float16_t f16(freg_t r, reg_t altfmt) { return altfmt ? bf16(r) : f16(r); } +inline float32_t f32(freg_t r, UNUSED reg_t altfmt) { return f32(r); } +inline float64_t f64(freg_t r, UNUSED reg_t altfmt) { return f64(r); } inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; } inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; } inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; } inline freg_t freg(float128_t f) { return f; } #define F16_SIGN ((uint16_t)1 << 15) +#define BF16_SIGN F16_SIGN #define F32_SIGN ((uint32_t)1 << 31) #define F64_SIGN ((uint64_t)1 << 63) #define fsgnj16(a, b, n, x) \ f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN)) +#define bfsgnj16(a, b, n, x) \ + bf16((bf16(a).v & ~BF16_SIGN) | ((((x) ? bf16(a).v : (n) ? BF16_SIGN : 0) ^ bf16(b).v) & BF16_SIGN)) #define fsgnj32(a, b, n, x) \ f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN)) #define fsgnj64(a, b, n, x) \ @@ -369,3 +374,10 @@ inline long double to_f(float128_t f) { long double r; memcpy(&r, &f, sizeof(r)) #define ZICFILP_IS_LP_EXPECTED(reg_num) \ (((reg_num) != 1 && (reg_num) != 5 && (reg_num) != 7) ? \ elp_t::LP_EXPECTED : elp_t::NO_LP_EXPECTED) +#define maybe_set_elp(reg_num) \ + if (unlikely(p->extension_enabled(EXT_ZICFILP))) { \ + if (unlikely(ZICFILP_IS_LP_EXPECTED(reg_num) == elp_t::LP_EXPECTED)) { \ + serialize(); \ + return p->set_lpad_expected(npc); \ + } \ + } diff --git a/riscv/devices.cc b/riscv/devices.cc index fb5bb5a..b816ca1 100644 --- a/riscv/devices.cc +++ b/riscv/devices.cc @@ -156,21 +156,21 @@ void mem_t::dump(std::ostream& o) { } } -external_sim_device_t::external_sim_device_t(void* sim) +external_sim_device_t::external_sim_device_t(abstract_sim_if_t* sim) : external_simulator(sim) {} -void external_sim_device_t::set_simulator(void* sim) { +void external_sim_device_t::set_simulator(abstract_sim_if_t* sim) { external_simulator = sim; } bool external_sim_device_t::load(reg_t addr, size_t len, uint8_t* bytes) { if (unlikely(external_simulator == nullptr)) return false; - return static_cast<abstract_sim_if_t*>(external_simulator)->load(addr, len, bytes); + return external_simulator->load(addr, len, bytes); } bool external_sim_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) { if (unlikely(external_simulator == nullptr)) return false; - return static_cast<abstract_sim_if_t*>(external_simulator)->store(addr, len, bytes); + return external_simulator->store(addr, len, bytes); } reg_t external_sim_device_t::size() { diff --git a/riscv/devices.h b/riscv/devices.h index e7b80ad..ccb5c9b 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -80,14 +80,14 @@ public: class external_sim_device_t : public abstract_device_t { public: - external_sim_device_t(void* sim); - void set_simulator(void* sim); + external_sim_device_t(abstract_sim_if_t* sim); + void set_simulator(abstract_sim_if_t* sim); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; reg_t size() override; private: - void* external_simulator; + abstract_sim_if_t* external_simulator; }; class clint_t : public abstract_device_t { diff --git a/riscv/disasm.h b/riscv/disasm.h index 4a1ea42..64cfd2e 100644 --- a/riscv/disasm.h +++ b/riscv/disasm.h @@ -15,6 +15,7 @@ extern const char* xpr_name[NXPR]; extern const char* fpr_name[NFPR]; extern const char* vr_name[NVPR]; extern const char* csr_name(int which); +extern const char* frm_name(int which); class arg_t { diff --git a/riscv/encoding.h b/riscv/encoding.h index bcc1ace..776a2ae 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (8899b32) + * https://github.com/riscv/riscv-opcodes (3deaa8c) */ #ifndef RISCV_CSR_ENCODING_H @@ -65,16 +65,17 @@ #define SSTATUS_UXL 0x0000000300000000 #define SSTATUS64_SD 0x8000000000000000 -#define HSTATUS_VSXL 0x300000000 -#define HSTATUS_VTSR 0x00400000 -#define HSTATUS_VTW 0x00200000 -#define HSTATUS_VTVM 0x00100000 -#define HSTATUS_VGEIN 0x0003f000 -#define HSTATUS_HU 0x00000200 -#define HSTATUS_SPVP 0x00000100 -#define HSTATUS_SPV 0x00000080 -#define HSTATUS_GVA 0x00000040 #define HSTATUS_VSBE 0x00000020 +#define HSTATUS_GVA 0x00000040 +#define HSTATUS_SPV 0x00000080 +#define HSTATUS_SPVP 0x00000100 +#define HSTATUS_HU 0x00000200 +#define HSTATUS_VGEIN 0x0003f000 +#define HSTATUS_VTVM 0x00100000 +#define HSTATUS_VTW 0x00200000 +#define HSTATUS_VTSR 0x00400000 +#define HSTATUS_HUKTE 0x01000000 +#define HSTATUS_VSXL 0x0000000300000000 #define HSTATUS_HUPMM 0x0003000000000000 #define USTATUS_UIE 0x00000001 @@ -197,18 +198,20 @@ #define MSTATEEN0_FCSR 0x00000002 #define MSTATEEN0_JVT 0x00000004 #define MSTATEEN0_CTR 0x0040000000000000 -#define MSTATEEN0_PRIV113 0x0100000000000000 #define MSTATEEN0_PRIV114 0x0080000000000000 +#define MSTATEEN0_PRIV113 0x0100000000000000 #define MSTATEEN0_HCONTEXT 0x0200000000000000 +#define MSTATEEN0_IMSIC 0x0400000000000000 #define MSTATEEN0_AIA 0x0800000000000000 #define MSTATEEN0_CSRIND 0x1000000000000000 #define MSTATEEN0_HENVCFG 0x4000000000000000 #define MSTATEEN_HSTATEEN 0x8000000000000000 #define MSTATEEN0H_CTR 0x00400000 -#define MSTATEEN0H_PRIV113 0x01000000 #define MSTATEEN0H_PRIV114 0x00800000 +#define MSTATEEN0H_PRIV113 0x01000000 #define MSTATEEN0H_HCONTEXT 0x02000000 +#define MSTATEEN0H_IMSIC 0x04000000 #define MSTATEEN0H_AIA 0x08000000 #define MSTATEEN0H_CSRIND 0x10000000 #define MSTATEEN0H_HENVCFG 0x40000000 @@ -268,11 +271,25 @@ #define SISELECT_SMCDELEG_HPMEVENT_3 0x43 #define SISELECT_SMCDELEG_END 0x5f +#define MISELECT_IPRIO 0x30 +#define MISELECT_IPRIO_TOP 0x3f +#define MISELECT_IMSIC 0x70 +#define MISELECT_IMSIC_TOP 0xff + +#define SISELECT_IPRIO 0x30 +#define SISELECT_IPRIO_TOP 0x3f +#define SISELECT_IMSIC 0x70 +#define SISELECT_IMSIC_TOP 0xff + +#define VSISELECT_IMSIC 0x70 +#define VSISELECT_IMSIC_TOP 0xff + #define HSTATEEN0_CS 0x00000001 #define HSTATEEN0_FCSR 0x00000002 #define HSTATEEN0_JVT 0x00000004 #define HSTATEEN0_CTR 0x0040000000000000 #define HSTATEEN0_SCONTEXT 0x0200000000000000 +#define HSTATEEN0_IMSIC 0x0400000000000000 #define HSTATEEN0_AIA 0x0800000000000000 #define HSTATEEN0_CSRIND 0x1000000000000000 #define HSTATEEN0_SENVCFG 0x4000000000000000 @@ -280,6 +297,7 @@ #define HSTATEEN0H_CTR 0x00400000 #define HSTATEEN0H_SCONTEXT 0x02000000 +#define HSTATEEN0H_IMSIC 0x04000000 #define HSTATEEN0H_AIA 0x08000000 #define HSTATEEN0H_CSRIND 0x10000000 #define HSTATEEN0H_SENVCFG 0x40000000 @@ -291,6 +309,7 @@ #define SENVCFG_CBIE 0x00000030 #define SENVCFG_CBCFE 0x00000040 #define SENVCFG_CBZE 0x00000080 +#define SENVCFG_UKTE 0x00000100 #define SENVCFG_PMM 0x0000000300000000 #define SSTATEEN0_CS 0x00000001 @@ -356,6 +375,7 @@ #define PMP_W 0x02 #define PMP_X 0x04 #define PMP_A 0x18 +#define PMP_MT 0x60 #define PMP_L 0x80 #define PMP_SHIFT 2 @@ -363,6 +383,9 @@ #define PMP_NA4 0x10 #define PMP_NAPOT 0x18 +#define SPMP_U 0x100 +#define SPMP_SHARED 0x200 + #define MCTRCTL_U 0x0000000000000001 #define MCTRCTL_S 0x0000000000000002 #define MCTRCTL_M 0x0000000000000004 @@ -384,6 +407,7 @@ #define MCTRCTL_RETINH 0x0000200000000000 #define MCTRCTL_INDLJMPINH 0x0000400000000000 #define MCTRCTL_DIRLJMPINH 0x0000800000000000 +#define MCTRCTL_CUSTOM 0xF000000000000000 #define SCTRCTL_U 0x0000000000000001 #define SCTRCTL_S 0x0000000000000002 @@ -424,12 +448,25 @@ #define VSCTRCTL_RETINH 0x0000200000000000 #define VSCTRCTL_INDLJMPINH 0x0000400000000000 #define VSCTRCTL_DIRLJMPINH 0x0000800000000000 +#define VSCTRCTL_CUSTOM 0xF000000000000000 #define SCTRDEPTH_DEPTH 0x00000007 #define SCTRSTATUS_WRPTR 0x000000FF #define SCTRSTATUS_FROZEN 0x80000000 +#define SCTR_ENTRY_BASE 0x200 + +#define SCTR_SOURCE_V 0x0000000000000001 +#define SCTR_SOURCE_PC 0xFFFFFFFFFFFFFFFE + +#define SCTR_TARGET_MISP 0x0000000000000001 +#define SCTR_TARGET_PC 0xFFFFFFFFFFFFFFFE + +#define SCTR_DATA_TYPE 0x000000000000000F +#define SCTR_DATA_CCV 0x0000000000008000 +#define SCTR_DATA_CC 0x00000000FFFF0000 + #define IRQ_U_SOFT 0 #define IRQ_S_SOFT 1 #define IRQ_VS_SOFT 2 @@ -458,7 +495,8 @@ #define PTE_A 0x040 /* Accessed */ #define PTE_D 0x080 /* Dirty */ #define PTE_SOFT 0x300 /* Reserved for Software */ -#define PTE_RSVD 0x1FC0000000000000 /* Reserved for future standard use */ +#define PTE_SVRSW60T59B 0x1800000000000000 /* Svrsw60t59b: Reserved for software use */ +#define PTE_RSVD 0x07C0000000000000 /* Reserved for future standard use */ #define PTE_PBMT 0x6000000000000000 /* Svpbmt: Page-based memory types */ #define PTE_N 0x8000000000000000 /* Svnapot: NAPOT translation contiguity */ #define PTE_ATTR 0xFFC0000000000000 /* All attributes and reserved bits */ @@ -657,6 +695,8 @@ #define MASK_BCLRI 0xfc00707f #define MATCH_BEQ 0x63 #define MASK_BEQ 0x707f +#define MATCH_BEQI 0x2063 +#define MASK_BEQI 0x707f #define MATCH_BEXT 0x48005033 #define MASK_BEXT 0xfe00707f #define MATCH_BEXTI 0x48005013 @@ -675,6 +715,8 @@ #define MASK_BLTU 0x707f #define MATCH_BNE 0x1063 #define MASK_BNE 0x707f +#define MATCH_BNEI 0x3063 +#define MASK_BNEI 0x707f #define MATCH_BSET 0x28001033 #define MASK_BSET 0xfe00707f #define MATCH_BSETI 0x28001013 @@ -1671,6 +1713,8 @@ #define MASK_VFADD_VF 0xfc00707f #define MATCH_VFADD_VV 0x1057 #define MASK_VFADD_VV 0xfc00707f +#define MATCH_VFBDOT_VV 0xac001077 +#define MASK_VFBDOT_VV 0xfc00707f #define MATCH_VFCLASS_V 0x4c081057 #define MASK_VFCLASS_V 0xfc0ff07f #define MATCH_VFCVT_F_X_V 0x48019057 @@ -1689,6 +1733,8 @@ #define MASK_VFDIV_VF 0xfc00707f #define MATCH_VFDIV_VV 0x80001057 #define MASK_VFDIV_VV 0xfc00707f +#define MATCH_VFEXT_VF2 0x480b2057 +#define MASK_VFEXT_VF2 0xfc0ff07f #define MATCH_VFIRST_M 0x4008a057 #define MASK_VFIRST_M 0xfc0ff07f #define MATCH_VFMACC_VF 0xb0005057 @@ -1727,6 +1773,8 @@ #define MASK_VFMV_S_F 0xfff0707f #define MATCH_VFMV_V_F 0x5e005057 #define MASK_VFMV_V_F 0xfff0707f +#define MATCH_VFNCVT_F_F_Q 0x480c9057 +#define MASK_VFNCVT_F_F_Q 0xfc0ff07f #define MATCH_VFNCVT_F_F_W 0x480a1057 #define MASK_VFNCVT_F_F_W 0xfc0ff07f #define MATCH_VFNCVT_F_X_W 0x48099057 @@ -1739,12 +1787,16 @@ #define MASK_VFNCVT_RTZ_X_F_W 0xfc0ff07f #define MATCH_VFNCVT_RTZ_XU_F_W 0x480b1057 #define MASK_VFNCVT_RTZ_XU_F_W 0xfc0ff07f +#define MATCH_VFNCVT_SAT_F_F_Q 0x480d9057 +#define MASK_VFNCVT_SAT_F_F_Q 0xfc0ff07f #define MATCH_VFNCVT_X_F_W 0x48089057 #define MASK_VFNCVT_X_F_W 0xfc0ff07f #define MATCH_VFNCVT_XU_F_W 0x48081057 #define MASK_VFNCVT_XU_F_W 0xfc0ff07f #define MATCH_VFNCVTBF16_F_F_W 0x480e9057 #define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f +#define MATCH_VFNCVTBF16_SAT_F_F_W 0x480f9057 +#define MASK_VFNCVTBF16_SAT_F_F_W 0xfc0ff07f #define MATCH_VFNMACC_VF 0xb4005057 #define MASK_VFNMACC_VF 0xfc00707f #define MATCH_VFNMACC_VV 0xb4001057 @@ -1761,6 +1813,14 @@ #define MASK_VFNMSUB_VF 0xfc00707f #define MATCH_VFNMSUB_VV 0xac001057 #define MASK_VFNMSUB_VV 0xfc00707f +#define MATCH_VFQBDOT_ALT_VV 0xbc001077 +#define MASK_VFQBDOT_ALT_VV 0xfc00707f +#define MATCH_VFQBDOT_VV 0xb8001077 +#define MASK_VFQBDOT_VV 0xfc00707f +#define MATCH_VFQLDOT_ALT_VV 0x9c001077 +#define MASK_VFQLDOT_ALT_VV 0xfc00707f +#define MATCH_VFQLDOT_VV 0x98001077 +#define MASK_VFQLDOT_VV 0xfc00707f #define MATCH_VFRDIV_VF 0x84005057 #define MASK_VFRDIV_VF 0xfc00707f #define MATCH_VFREC7_V 0x4c029057 @@ -1807,6 +1867,8 @@ #define MASK_VFWADD_WF 0xfc00707f #define MATCH_VFWADD_WV 0xd0001057 #define MASK_VFWADD_WV 0xfc00707f +#define MATCH_VFWBDOT_VV 0xb0001077 +#define MASK_VFWBDOT_VV 0xfc00707f #define MATCH_VFWCVT_F_F_V 0x48061057 #define MASK_VFWCVT_F_F_V 0xfc0ff07f #define MATCH_VFWCVT_F_X_V 0x48059057 @@ -1823,6 +1885,8 @@ #define MASK_VFWCVT_XU_F_V 0xfc0ff07f #define MATCH_VFWCVTBF16_F_F_V 0x48069057 #define MASK_VFWCVTBF16_F_F_V 0xfc0ff07f +#define MATCH_VFWLDOT_VV 0x90001077 +#define MASK_VFWLDOT_VV 0xfc00707f #define MATCH_VFWMACC_VF 0xf0005057 #define MASK_VFWMACC_VF 0xfc00707f #define MATCH_VFWMACC_VV 0xf0001057 @@ -2145,6 +2209,10 @@ #define MASK_VOR_VV 0xfc00707f #define MATCH_VOR_VX 0x28004057 #define MASK_VOR_VX 0xfc00707f +#define MATCH_VQBDOTS_VV 0xbc000077 +#define MASK_VQBDOTS_VV 0xfc00707f +#define MATCH_VQBDOTU_VV 0xb8000077 +#define MASK_VQBDOTU_VV 0xfc00707f #define MATCH_VQDOT_VV 0xb0002057 #define MASK_VQDOT_VV 0xfc00707f #define MATCH_VQDOT_VX 0xb0006057 @@ -2159,6 +2227,10 @@ #define MASK_VQDOTU_VX 0xfc00707f #define MATCH_VQDOTUS_VX 0xb8006057 #define MASK_VQDOTUS_VX 0xfc00707f +#define MATCH_VQLDOTS_VV 0x9c000077 +#define MASK_VQLDOTS_VV 0xfc00707f +#define MATCH_VQLDOTU_VV 0x98000077 +#define MASK_VQLDOTU_VV 0xfc00707f #define MATCH_VREDAND_VS 0x4002057 #define MASK_VREDAND_VS 0xfc00707f #define MATCH_VREDMAX_VS 0x1c002057 @@ -2498,8 +2570,6 @@ #define CSR_VTYPE 0xc21 #define CSR_VLENB 0xc22 #define CSR_SSTATUS 0x100 -#define CSR_SEDELEG 0x102 -#define CSR_SIDELEG 0x103 #define CSR_SIE 0x104 #define CSR_STVEC 0x105 #define CSR_SCOUNTEREN 0x106 @@ -2967,7 +3037,6 @@ #define INSN_FIELD_IMM4 0xf00000 #define INSN_FIELD_IMM5 0x1f00000 #define INSN_FIELD_IMM6 0x3f00000 -#define INSN_FIELD_ZIMM 0xf8000 #define INSN_FIELD_OPCODE 0x7f #define INSN_FIELD_FUNCT7 0xfe000000 #define INSN_FIELD_VD 0xf80 @@ -3033,6 +3102,12 @@ #define INSN_FIELD_C_RS2 0x7c #define INSN_FIELD_C_SREG1 0x380 #define INSN_FIELD_C_SREG2 0x1c +#define INSN_FIELD_RD_P_E 0x18 +#define INSN_FIELD_RS2_P_E 0x18 +#define INSN_FIELD_RD_N0_E 0xf00 +#define INSN_FIELD_C_RS2_E 0x78 +#define INSN_FIELD_RD_E 0xf00 +#define INSN_FIELD_RS2_E 0x1e00000 #define INSN_FIELD_MOP_R_T_30 0x40000000 #define INSN_FIELD_MOP_R_T_27_26 0xc000000 #define INSN_FIELD_MOP_R_T_21_20 0x300000 @@ -3106,6 +3181,7 @@ DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) DECLARE_INSN(bclr, MATCH_BCLR, MASK_BCLR) DECLARE_INSN(bclri, MATCH_BCLRI, MASK_BCLRI) DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) +DECLARE_INSN(beqi, MATCH_BEQI, MASK_BEQI) DECLARE_INSN(bext, MATCH_BEXT, MASK_BEXT) DECLARE_INSN(bexti, MATCH_BEXTI, MASK_BEXTI) DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) @@ -3115,6 +3191,7 @@ DECLARE_INSN(binvi, MATCH_BINVI, MASK_BINVI) DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) +DECLARE_INSN(bnei, MATCH_BNEI, MASK_BNEI) DECLARE_INSN(bset, MATCH_BSET, MASK_BSET) DECLARE_INSN(bseti, MATCH_BSETI, MASK_BSETI) DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) @@ -3613,6 +3690,7 @@ DECLARE_INSN(vdivu_vv, MATCH_VDIVU_VV, MASK_VDIVU_VV) DECLARE_INSN(vdivu_vx, MATCH_VDIVU_VX, MASK_VDIVU_VX) DECLARE_INSN(vfadd_vf, MATCH_VFADD_VF, MASK_VFADD_VF) DECLARE_INSN(vfadd_vv, MATCH_VFADD_VV, MASK_VFADD_VV) +DECLARE_INSN(vfbdot_vv, MATCH_VFBDOT_VV, MASK_VFBDOT_VV) DECLARE_INSN(vfclass_v, MATCH_VFCLASS_V, MASK_VFCLASS_V) DECLARE_INSN(vfcvt_f_x_v, MATCH_VFCVT_F_X_V, MASK_VFCVT_F_X_V) DECLARE_INSN(vfcvt_f_xu_v, MATCH_VFCVT_F_XU_V, MASK_VFCVT_F_XU_V) @@ -3622,6 +3700,7 @@ DECLARE_INSN(vfcvt_x_f_v, MATCH_VFCVT_X_F_V, MASK_VFCVT_X_F_V) DECLARE_INSN(vfcvt_xu_f_v, MATCH_VFCVT_XU_F_V, MASK_VFCVT_XU_F_V) DECLARE_INSN(vfdiv_vf, MATCH_VFDIV_VF, MASK_VFDIV_VF) DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) +DECLARE_INSN(vfext_vf2, MATCH_VFEXT_VF2, MASK_VFEXT_VF2) DECLARE_INSN(vfirst_m, MATCH_VFIRST_M, MASK_VFIRST_M) DECLARE_INSN(vfmacc_vf, MATCH_VFMACC_VF, MASK_VFMACC_VF) DECLARE_INSN(vfmacc_vv, MATCH_VFMACC_VV, MASK_VFMACC_VV) @@ -3641,15 +3720,18 @@ DECLARE_INSN(vfmul_vv, MATCH_VFMUL_VV, MASK_VFMUL_VV) DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F) DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) +DECLARE_INSN(vfncvt_f_f_q, MATCH_VFNCVT_F_F_Q, MASK_VFNCVT_F_F_Q) DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W) DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W) DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W) DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W) DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W) DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W) +DECLARE_INSN(vfncvt_sat_f_f_q, MATCH_VFNCVT_SAT_F_F_Q, MASK_VFNCVT_SAT_F_F_Q) DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W) DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W) DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W) +DECLARE_INSN(vfncvtbf16_sat_f_f_w, MATCH_VFNCVTBF16_SAT_F_F_W, MASK_VFNCVTBF16_SAT_F_F_W) DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF) DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV) DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF) @@ -3658,6 +3740,10 @@ DECLARE_INSN(vfnmsac_vf, MATCH_VFNMSAC_VF, MASK_VFNMSAC_VF) DECLARE_INSN(vfnmsac_vv, MATCH_VFNMSAC_VV, MASK_VFNMSAC_VV) DECLARE_INSN(vfnmsub_vf, MATCH_VFNMSUB_VF, MASK_VFNMSUB_VF) DECLARE_INSN(vfnmsub_vv, MATCH_VFNMSUB_VV, MASK_VFNMSUB_VV) +DECLARE_INSN(vfqbdot_alt_vv, MATCH_VFQBDOT_ALT_VV, MASK_VFQBDOT_ALT_VV) +DECLARE_INSN(vfqbdot_vv, MATCH_VFQBDOT_VV, MASK_VFQBDOT_VV) +DECLARE_INSN(vfqldot_alt_vv, MATCH_VFQLDOT_ALT_VV, MASK_VFQLDOT_ALT_VV) +DECLARE_INSN(vfqldot_vv, MATCH_VFQLDOT_VV, MASK_VFQLDOT_VV) DECLARE_INSN(vfrdiv_vf, MATCH_VFRDIV_VF, MASK_VFRDIV_VF) DECLARE_INSN(vfrec7_v, MATCH_VFREC7_V, MASK_VFREC7_V) DECLARE_INSN(vfredmax_vs, MATCH_VFREDMAX_VS, MASK_VFREDMAX_VS) @@ -3681,6 +3767,7 @@ DECLARE_INSN(vfwadd_vf, MATCH_VFWADD_VF, MASK_VFWADD_VF) DECLARE_INSN(vfwadd_vv, MATCH_VFWADD_VV, MASK_VFWADD_VV) DECLARE_INSN(vfwadd_wf, MATCH_VFWADD_WF, MASK_VFWADD_WF) DECLARE_INSN(vfwadd_wv, MATCH_VFWADD_WV, MASK_VFWADD_WV) +DECLARE_INSN(vfwbdot_vv, MATCH_VFWBDOT_VV, MASK_VFWBDOT_VV) DECLARE_INSN(vfwcvt_f_f_v, MATCH_VFWCVT_F_F_V, MASK_VFWCVT_F_F_V) DECLARE_INSN(vfwcvt_f_x_v, MATCH_VFWCVT_F_X_V, MASK_VFWCVT_F_X_V) DECLARE_INSN(vfwcvt_f_xu_v, MATCH_VFWCVT_F_XU_V, MASK_VFWCVT_F_XU_V) @@ -3689,6 +3776,7 @@ DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V) DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V) DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V) DECLARE_INSN(vfwcvtbf16_f_f_v, MATCH_VFWCVTBF16_F_F_V, MASK_VFWCVTBF16_F_F_V) +DECLARE_INSN(vfwldot_vv, MATCH_VFWLDOT_VV, MASK_VFWLDOT_VV) DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF) DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV) DECLARE_INSN(vfwmaccbf16_vf, MATCH_VFWMACCBF16_VF, MASK_VFWMACCBF16_VF) @@ -3850,6 +3938,8 @@ DECLARE_INSN(vnsrl_wx, MATCH_VNSRL_WX, MASK_VNSRL_WX) DECLARE_INSN(vor_vi, MATCH_VOR_VI, MASK_VOR_VI) DECLARE_INSN(vor_vv, MATCH_VOR_VV, MASK_VOR_VV) DECLARE_INSN(vor_vx, MATCH_VOR_VX, MASK_VOR_VX) +DECLARE_INSN(vqbdots_vv, MATCH_VQBDOTS_VV, MASK_VQBDOTS_VV) +DECLARE_INSN(vqbdotu_vv, MATCH_VQBDOTU_VV, MASK_VQBDOTU_VV) DECLARE_INSN(vqdot_vv, MATCH_VQDOT_VV, MASK_VQDOT_VV) DECLARE_INSN(vqdot_vx, MATCH_VQDOT_VX, MASK_VQDOT_VX) DECLARE_INSN(vqdotsu_vv, MATCH_VQDOTSU_VV, MASK_VQDOTSU_VV) @@ -3857,6 +3947,8 @@ DECLARE_INSN(vqdotsu_vx, MATCH_VQDOTSU_VX, MASK_VQDOTSU_VX) DECLARE_INSN(vqdotu_vv, MATCH_VQDOTU_VV, MASK_VQDOTU_VV) DECLARE_INSN(vqdotu_vx, MATCH_VQDOTU_VX, MASK_VQDOTU_VX) DECLARE_INSN(vqdotus_vx, MATCH_VQDOTUS_VX, MASK_VQDOTUS_VX) +DECLARE_INSN(vqldots_vv, MATCH_VQLDOTS_VV, MASK_VQLDOTS_VV) +DECLARE_INSN(vqldotu_vv, MATCH_VQLDOTU_VV, MASK_VQLDOTU_VV) DECLARE_INSN(vredand_vs, MATCH_VREDAND_VS, MASK_VREDAND_VS) DECLARE_INSN(vredmax_vs, MATCH_VREDMAX_VS, MASK_VREDMAX_VS) DECLARE_INSN(vredmaxu_vs, MATCH_VREDMAXU_VS, MASK_VREDMAXU_VS) @@ -4051,8 +4143,6 @@ DECLARE_CSR(vl, CSR_VL) DECLARE_CSR(vtype, CSR_VTYPE) DECLARE_CSR(vlenb, CSR_VLENB) DECLARE_CSR(sstatus, CSR_SSTATUS) -DECLARE_CSR(sedeleg, CSR_SEDELEG) -DECLARE_CSR(sideleg, CSR_SIDELEG) DECLARE_CSR(sie, CSR_SIE) DECLARE_CSR(stvec, CSR_STVEC) DECLARE_CSR(scounteren, CSR_SCOUNTEREN) diff --git a/riscv/execute.cc b/riscv/execute.cc index 39d5ca4..97c90de 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -201,7 +201,7 @@ static inline reg_t execute_insn_logged(processor_t* p, reg_t pc, insn_fetch_t f return npc; } -bool processor_t::slow_path() +bool processor_t::slow_path() const { return debug || state.single_step != state.STEP_NONE || state.debug_mode || log_commits_enabled || histogram_enabled || in_wfi || check_triggers_icount; @@ -210,6 +210,8 @@ bool processor_t::slow_path() // fetch/decode/execute loop void processor_t::step(size_t n) { + mmu_t* _mmu = mmu; + if (!state.debug_mode) { if (halt_request == HR_REGULAR) { enter_debug_mode(DCSR_CAUSE_DEBUGINT, 0); @@ -224,11 +226,10 @@ void processor_t::step(size_t n) while (n > 0) { size_t instret = 0; reg_t pc = state.pc; - mmu_t* _mmu = mmu; state.prv_changed = false; state.v_changed = false; - #define advance_pc() \ + #define advance_pc() { \ if (unlikely(invalid_pc(pc))) { \ switch (pc) { \ case PC_SERIALIZE_BEFORE: state.serialized = true; break; \ @@ -236,11 +237,11 @@ void processor_t::step(size_t n) default: abort(); \ } \ pc = state.pc; \ - break; \ + goto serialize; \ } else { \ state.pc = pc; \ instret++; \ - } + }} try { @@ -301,19 +302,21 @@ void processor_t::step(size_t n) else while (instret < n) { // Main simulation loop, fast path. - for (auto ic_entry = _mmu->access_icache(pc); ; ) { + for (auto ic_entry = _mmu->access_icache(pc); instret < n; instret++) { auto fetch = ic_entry->data; - pc = execute_insn_fast(this, pc, fetch); ic_entry = ic_entry->next; - if (unlikely(ic_entry->tag != pc)) - break; - if (unlikely(instret + 1 == n)) - break; - instret++; - state.pc = pc; + auto new_pc = execute_insn_fast(this, pc, fetch); + if (unlikely(ic_entry->tag != new_pc)) { + ic_entry = &_mmu->icache[_mmu->icache_index(new_pc)]; + _mmu->icache[_mmu->icache_index(pc)].next = ic_entry; + if (ic_entry->tag != new_pc) { + pc = new_pc; + advance_pc(); + break; + } + } + state.pc = pc = ic_entry->tag; } - - advance_pc(); } } catch(trap_t& t) @@ -360,6 +363,7 @@ void processor_t::step(size_t n) in_wfi = true; } +serialize: state.minstret->bump((state.mcountinhibit->read() & MCOUNTINHIBIT_IR) ? 0 : instret); // Model a hart whose CPI is 1. diff --git a/riscv/insn_template.cc b/riscv/insn_template.cc index 168e2dc..12d564b 100644 --- a/riscv/insn_template.cc +++ b/riscv/insn_template.cc @@ -6,7 +6,8 @@ #define DECODE_MACRO_USAGE_LOGGED 0 #define PROLOGUE \ - reg_t npc = sext_xlen(pc + insn_length(OPCODE)) + reg_t npc = sext_xlen(pc + insn_length(OPCODE)); \ + if (!p->extension_enabled(EXT_ZCA)) assume(insn_length(OPCODE) % 4 == 0) #define EPILOGUE \ trace_opcode(p, OPCODE, insn); \ diff --git a/riscv/insns/amoadd_d.h b/riscv/insns/amoadd_d.h index 8573aa5..f9ccd89 100644 --- a/riscv/insns/amoadd_d.h +++ b/riscv/insns/amoadd_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs + RS2; })); diff --git a/riscv/insns/amoadd_w.h b/riscv/insns/amoadd_w.h index c288b3b..8f1265b 100644 --- a/riscv/insns/amoadd_w.h +++ b/riscv/insns/amoadd_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs + RS2; }))); diff --git a/riscv/insns/amoand_d.h b/riscv/insns/amoand_d.h index 2df7ce2..e44cd21 100644 --- a/riscv/insns/amoand_d.h +++ b/riscv/insns/amoand_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs & RS2; })); diff --git a/riscv/insns/amoand_w.h b/riscv/insns/amoand_w.h index 962165f..05ff2db 100644 --- a/riscv/insns/amoand_w.h +++ b/riscv/insns/amoand_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs & RS2; }))); diff --git a/riscv/insns/amomax_d.h b/riscv/insns/amomax_d.h index ab95da0..7445fe3 100644 --- a/riscv/insns/amomax_d.h +++ b/riscv/insns/amomax_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](int64_t lhs) { return std::max(lhs, int64_t(RS2)); })); diff --git a/riscv/insns/amomax_w.h b/riscv/insns/amomax_w.h index 132c2e0..a2b65fd 100644 --- a/riscv/insns/amomax_w.h +++ b/riscv/insns/amomax_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](int32_t lhs) { return std::max(lhs, int32_t(RS2)); }))); diff --git a/riscv/insns/amomaxu_d.h b/riscv/insns/amomaxu_d.h index e2371aa..32c6d95 100644 --- a/riscv/insns/amomaxu_d.h +++ b/riscv/insns/amomaxu_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return std::max(lhs, RS2); })); diff --git a/riscv/insns/amomaxu_w.h b/riscv/insns/amomaxu_w.h index ebbdd41..030ce30 100644 --- a/riscv/insns/amomaxu_w.h +++ b/riscv/insns/amomaxu_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return std::max(lhs, uint32_t(RS2)); }))); diff --git a/riscv/insns/amomin_d.h b/riscv/insns/amomin_d.h index 419e42e..97f5173 100644 --- a/riscv/insns/amomin_d.h +++ b/riscv/insns/amomin_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](int64_t lhs) { return std::min(lhs, int64_t(RS2)); })); diff --git a/riscv/insns/amomin_w.h b/riscv/insns/amomin_w.h index 749149c..266f574 100644 --- a/riscv/insns/amomin_w.h +++ b/riscv/insns/amomin_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](int32_t lhs) { return std::min(lhs, int32_t(RS2)); }))); diff --git a/riscv/insns/amominu_d.h b/riscv/insns/amominu_d.h index b4bab47..9f67295 100644 --- a/riscv/insns/amominu_d.h +++ b/riscv/insns/amominu_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return std::min(lhs, RS2); })); diff --git a/riscv/insns/amominu_w.h b/riscv/insns/amominu_w.h index 680eef2..34475d7 100644 --- a/riscv/insns/amominu_w.h +++ b/riscv/insns/amominu_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return std::min(lhs, uint32_t(RS2)); }))); diff --git a/riscv/insns/amoor_d.h b/riscv/insns/amoor_d.h index c201d88..0b255d3 100644 --- a/riscv/insns/amoor_d.h +++ b/riscv/insns/amoor_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs | RS2; })); diff --git a/riscv/insns/amoor_w.h b/riscv/insns/amoor_w.h index 0adac5b..6dc2e33 100644 --- a/riscv/insns/amoor_w.h +++ b/riscv/insns/amoor_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs | RS2; }))); diff --git a/riscv/insns/amoswap_d.h b/riscv/insns/amoswap_d.h index 62a95b0..2fb1398 100644 --- a/riscv/insns/amoswap_d.h +++ b/riscv/insns/amoswap_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t UNUSED lhs) { return RS2; })); diff --git a/riscv/insns/amoswap_w.h b/riscv/insns/amoswap_w.h index 819579c..3ca7513 100644 --- a/riscv/insns/amoswap_w.h +++ b/riscv/insns/amoswap_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t UNUSED lhs) { return RS2; }))); diff --git a/riscv/insns/amoxor_d.h b/riscv/insns/amoxor_d.h index a40050f..4f257df 100644 --- a/riscv/insns/amoxor_d.h +++ b/riscv/insns/amoxor_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; WRITE_RD(MMU.amo<uint64_t>(RS1, [&](uint64_t lhs) { return lhs ^ RS2; })); diff --git a/riscv/insns/amoxor_w.h b/riscv/insns/amoxor_w.h index af025d6..6eb7a20 100644 --- a/riscv/insns/amoxor_w.h +++ b/riscv/insns/amoxor_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZAAMO); WRITE_RD(sext32(MMU.amo<uint32_t>(RS1, [&](uint32_t lhs) { return lhs ^ RS2; }))); diff --git a/riscv/insns/beqi.h b/riscv/insns/beqi.h new file mode 100644 index 0000000..14555fe --- /dev/null +++ b/riscv/insns/beqi.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZIBI); + +if (RS1 == insn.b_imm5()) { + set_pc(BRANCH_TARGET); +} diff --git a/riscv/insns/bnei.h b/riscv/insns/bnei.h new file mode 100644 index 0000000..ac557ed --- /dev/null +++ b/riscv/insns/bnei.h @@ -0,0 +1,5 @@ +require_extension(EXT_ZIBI); + +if (RS1 != insn.b_imm5()) { + set_pc(BRANCH_TARGET); +} diff --git a/riscv/insns/c_add.h b/riscv/insns/c_add.h index 796e634..0c97e3e 100644 --- a/riscv/insns/c_add.h +++ b/riscv/insns/c_add.h @@ -1,3 +1,2 @@ require_extension(EXT_ZCA); -require(insn.rvc_rs2() != 0); WRITE_RD(sext_xlen(RVC_RS1 + RVC_RS2)); diff --git a/riscv/insns/c_jalr.h b/riscv/insns/c_jalr.h index 694f183..df91254 100644 --- a/riscv/insns/c_jalr.h +++ b/riscv/insns/c_jalr.h @@ -1,10 +1,6 @@ require_extension(EXT_ZCA); -require(insn.rvc_rs1() != 0); reg_t tmp = npc; set_pc(RVC_RS1 & ~reg_t(1)); WRITE_REG(X_RA, tmp); -if (ZICFILP_xLPE(STATE.v, STATE.prv)) { - STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rvc_rs1()); - serialize(); -} +maybe_set_elp(insn.rvc_rs1()); diff --git a/riscv/insns/c_jr.h b/riscv/insns/c_jr.h index af43dd3..1a1d14e 100644 --- a/riscv/insns/c_jr.h +++ b/riscv/insns/c_jr.h @@ -2,7 +2,4 @@ require_extension(EXT_ZCA); require(insn.rvc_rs1() != 0); set_pc(RVC_RS1 & ~reg_t(1)); -if (ZICFILP_xLPE(STATE.v, STATE.prv)) { - STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rvc_rs1()); - serialize(); -} +maybe_set_elp(insn.rvc_rs1()); diff --git a/riscv/insns/c_mv.h b/riscv/insns/c_mv.h index b227005..8edefc5 100644 --- a/riscv/insns/c_mv.h +++ b/riscv/insns/c_mv.h @@ -1,3 +1,2 @@ require_extension(EXT_ZCA); -require(insn.rvc_rs2() != 0); WRITE_RD(RVC_RS2); diff --git a/riscv/insns/fli_h.h b/riscv/insns/fli_h.h index ddf41a9..71fd64d 100644 --- a/riscv/insns/fli_h.h +++ b/riscv/insns/fli_h.h @@ -1,4 +1,4 @@ -require_extension(EXT_ZFH); +require_either_extension(EXT_ZFH, EXT_ZVFH); require_extension(EXT_ZFA); require_fp; { diff --git a/riscv/insns/flq.h b/riscv/insns/flq.h index 81d225c..6a60c0c 100644 --- a/riscv/insns/flq.h +++ b/riscv/insns/flq.h @@ -1,3 +1,5 @@ require_extension('Q'); require_fp; -WRITE_FRD(MMU.load_float128(RS1 + insn.i_imm())); +uint128_t v = MMU.load<uint128_t>(RS1 + insn.i_imm()); +float128_t f = { uint64_t(v), uint64_t(v >> 64) }; +WRITE_FRD(f); diff --git a/riscv/insns/fsq.h b/riscv/insns/fsq.h index 610960e..7a4bdd8 100644 --- a/riscv/insns/fsq.h +++ b/riscv/insns/fsq.h @@ -1,3 +1,4 @@ require_extension('Q'); require_fp; -MMU.store_float128(RS1 + insn.s_imm(), FRS2); +uint128_t v = FRS2.v[0] | (uint128_t(FRS2.v[1]) << 64); +MMU.store<uint128_t>(RS1 + insn.s_imm(), v); diff --git a/riscv/insns/jalr.h b/riscv/insns/jalr.h index de84e89..abff855 100644 --- a/riscv/insns/jalr.h +++ b/riscv/insns/jalr.h @@ -3,7 +3,4 @@ reg_t tmp = npc; set_pc((RS1 + insn.i_imm()) & ~reg_t(1)); WRITE_RD(tmp); -if (ZICFILP_xLPE(STATE.v, STATE.prv)) { - STATE.elp = ZICFILP_IS_LP_EXPECTED(insn.rs1()); - serialize(); -} +maybe_set_elp(insn.rs1()); diff --git a/riscv/insns/lr_d.h b/riscv/insns/lr_d.h index 214daff..32a16e5 100644 --- a/riscv/insns/lr_d.h +++ b/riscv/insns/lr_d.h @@ -1,3 +1,3 @@ -require_extension('A'); +require_extension(EXT_ZALRSC); require_rv64; WRITE_RD(MMU.load_reserved<int64_t>(RS1)); diff --git a/riscv/insns/lr_w.h b/riscv/insns/lr_w.h index 354590f..fb0005c 100644 --- a/riscv/insns/lr_w.h +++ b/riscv/insns/lr_w.h @@ -1,2 +1,2 @@ -require_extension('A'); +require_extension(EXT_ZALRSC); WRITE_RD(MMU.load_reserved<int32_t>(RS1)); diff --git a/riscv/insns/sc_d.h b/riscv/insns/sc_d.h index ac82c3e..1b6880b 100644 --- a/riscv/insns/sc_d.h +++ b/riscv/insns/sc_d.h @@ -1,4 +1,4 @@ -require_extension('A'); +require_extension(EXT_ZALRSC); require_rv64; bool have_reservation = MMU.store_conditional<uint64_t>(RS1, RS2); diff --git a/riscv/insns/sc_w.h b/riscv/insns/sc_w.h index 48fea4b..6df6a67 100644 --- a/riscv/insns/sc_w.h +++ b/riscv/insns/sc_w.h @@ -1,4 +1,4 @@ -require_extension('A'); +require_extension(EXT_ZALRSC); bool have_reservation = MMU.store_conditional<uint32_t>(RS1, RS2); diff --git a/riscv/insns/sfence_inval_ir.h b/riscv/insns/sfence_inval_ir.h index 6f76a3f..42fb177 100644 --- a/riscv/insns/sfence_inval_ir.h +++ b/riscv/insns/sfence_inval_ir.h @@ -1,4 +1,4 @@ require_extension('S'); require_extension(EXT_SVINVAL); -require_impl(IMPL_MMU); +require(p->has_mmu()); require_privilege_hs_qualified(PRV_S); diff --git a/riscv/insns/sfence_vma.h b/riscv/insns/sfence_vma.h index 7d6c01a..156331d 100644 --- a/riscv/insns/sfence_vma.h +++ b/riscv/insns/sfence_vma.h @@ -1,5 +1,5 @@ require_extension('S'); -require_impl(IMPL_MMU); +require(p->has_mmu()); if (STATE.v) { if (STATE.prv == PRV_U || get_field(STATE.hstatus->read(), HSTATUS_VTVM)) require_novirt(); diff --git a/riscv/insns/sret.h b/riscv/insns/sret.h index efb4fa6..3bbdb82 100644 --- a/riscv/insns/sret.h +++ b/riscv/insns/sret.h @@ -30,7 +30,7 @@ if (ZICFILP_xLPE(prev_virt, prev_prv)) { if (STATE.prv == PRV_M) { STATE.mstatus->write(STATE.mstatus->read() & ~MSTATUS_MDT); if (prev_prv == PRV_U || prev_virt) - STATE.mstatus->write(STATE.mstatus->read() & ~MSTATUS_SDT); + s = set_field(s, SSTATUS_SDT, 0); if (prev_virt && prev_prv == PRV_U) STATE.vsstatus->write(STATE.vsstatus->read() & ~SSTATUS_SDT); } diff --git a/riscv/insns/ssamoswap_d.h b/riscv/insns/ssamoswap_d.h index 10ea5ef..4169ac3 100644 --- a/riscv/insns/ssamoswap_d.h +++ b/riscv/insns/ssamoswap_d.h @@ -1,5 +1,5 @@ require_extension(EXT_ZICFISS); -require_extension('A'); +require_extension(EXT_ZAAMO); require_rv64; DECLARE_XENVCFG_VARS(SSE); diff --git a/riscv/insns/ssamoswap_w.h b/riscv/insns/ssamoswap_w.h index 3cdefc7..d971ebe 100644 --- a/riscv/insns/ssamoswap_w.h +++ b/riscv/insns/ssamoswap_w.h @@ -1,7 +1,6 @@ require_extension(EXT_ZICFISS); -require_extension('A'); +require_extension(EXT_ZAAMO); DECLARE_XENVCFG_VARS(SSE); require_envcfg(SSE); WRITE_RD(sext32(MMU.ssamoswap<uint32_t>(RS1, RS2))); - diff --git a/riscv/insns/vandn_vv.h b/riscv/insns/vandn_vv.h index d85e47d..411c97d 100644 --- a/riscv/insns/vandn_vv.h +++ b/riscv/insns/vandn_vv.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; VI_VV_LOOP ({ diff --git a/riscv/insns/vandn_vx.h b/riscv/insns/vandn_vx.h index 1c66a40..417b8d2 100644 --- a/riscv/insns/vandn_vx.h +++ b/riscv/insns/vandn_vx.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; VI_VX_LOOP ({ diff --git a/riscv/insns/vbrev8_v.h b/riscv/insns/vbrev8_v.h index a6d3cda..19fa723 100644 --- a/riscv/insns/vbrev8_v.h +++ b/riscv/insns/vbrev8_v.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; VI_V_ULOOP ({ diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h index 2b808e0..be3bd9a 100644 --- a/riscv/insns/vfadd_vf.h +++ b/riscv/insns/vfadd_vf.h @@ -1,7 +1,9 @@ // vfadd.vf vd, vs2, rs1 +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_add(rs1, vs2); + vd = VFP_OP_16(add, vs2, rs1); }, { vd = f32_add(rs1, vs2); diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h index ce94921..67ddd5c 100644 --- a/riscv/insns/vfadd_vv.h +++ b/riscv/insns/vfadd_vv.h @@ -1,7 +1,9 @@ // vfadd.vv vd, vs2, vs1 +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_add(vs1, vs2); + vd = VFP_OP_16(add, vs2, vs1); }, { vd = f32_add(vs1, vs2); diff --git a/riscv/insns/vfbdot_vv.h b/riscv/insns/vfbdot_vv.h new file mode 100644 index 0000000..8d4c792 --- /dev/null +++ b/riscv/insns/vfbdot_vv.h @@ -0,0 +1,16 @@ +VI_VFP_BASE; +ZVBDOT_INIT(1); + +switch (P.VU.vsew) { + case 32: { + // This implementation rounds intermediate products to FP32 then sums them + // sequentially; other implementations are also valid. If a more + // realistic scheme (e.g. binary reduction tree, plus final accumulation) + // becomes popular, we might change this implementation accordingly. + require_extension(EXT_ZVFBDOT32F); + auto macc = [](auto a, auto b, auto c) { return f32_add(c, f32_mul(a, b)); }; + ZVBDOT_GENERIC_LOOP(float32_t, float32_t, float32_t, macc); + break; + } + default: require(false); +} diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h index a307d2d..4378bd4 100644 --- a/riscv/insns/vfclass_v.h +++ b/riscv/insns/vfclass_v.h @@ -1,7 +1,7 @@ // vfclass.v vd, vs2, vm VI_VFP_V_LOOP ({ - vd = f16(f16_classify(vs2)); + vd = P.VU.altfmt ? bf16(bf16_classify(vs2)) : f16(f16_classify(vs2)); }, { vd = f32(f32_classify(vs2)); diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h index a703ef0..2707daa 100644 --- a/riscv/insns/vfdiv_vf.h +++ b/riscv/insns/vfdiv_vf.h @@ -1,4 +1,6 @@ // vfdiv.vf vd, vs2, rs1 +VI_NON_ALTFMT_INSN + VI_VFP_VF_LOOP ({ vd = f16_div(vs2, rs1); diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h index c66d751..5f5ed74 100644 --- a/riscv/insns/vfdiv_vv.h +++ b/riscv/insns/vfdiv_vv.h @@ -1,4 +1,6 @@ // vfdiv.vv vd, vs2, vs1 +VI_NON_ALTFMT_INSN + VI_VFP_VV_LOOP ({ vd = f16_div(vs2, vs1); diff --git a/riscv/insns/vfext_vf2.h b/riscv/insns/vfext_vf2.h new file mode 100644 index 0000000..523bb28 --- /dev/null +++ b/riscv/insns/vfext_vf2.h @@ -0,0 +1,14 @@ +static const uint8_t ofp4_to_e4m3[16] = { + 0x00, 0x30, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, // positive values (sign bit 0) + 0x80, 0xb0, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc // negative values (sign bit 1) +}; + +require_extension(EXT_ZVFOFP4MIN); +VI_NON_ALTFMT_INSN +VI_VF_EXT(2, + { + uint_fast8_t packed_ofp4_reg = P.VU.elt<uint8_t>(rs2_num, i / 2); + uint_fast8_t data = ((packed_ofp4_reg >> ((i & 1UL)*4)) & 0xF); + P.VU.elt<uint8_t>(rd_num, i, true) = ofp4_to_e4m3[data]; + } +) diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h index 61578d3..293a06b 100644 --- a/riscv/insns/vfmacc_vf.h +++ b/riscv/insns/vfmacc_vf.h @@ -1,7 +1,10 @@ // vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(rs1, vs2, vd); + vd = VFP_MULADD_16(rs1, vs2, vd); }, { vd = f32_mulAdd(rs1, vs2, vd); diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h index 499b1d4..3fadb66 100644 --- a/riscv/insns/vfmacc_vv.h +++ b/riscv/insns/vfmacc_vv.h @@ -1,7 +1,10 @@ // vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(vs1, vs2, vd); + vd = VFP_MULADD_16(vs1, vs2, vd); }, { vd = f32_mulAdd(vs1, vs2, vd); diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h index 2a01429..258e620 100644 --- a/riscv/insns/vfmadd_vf.h +++ b/riscv/insns/vfmadd_vf.h @@ -1,7 +1,9 @@ // vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i] +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(vd, rs1, vs2); + vd = VFP_MULADD_16(vd, rs1, vs2); }, { vd = f32_mulAdd(vd, rs1, vs2); diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h index 7ef734f..42ece0a 100644 --- a/riscv/insns/vfmadd_vv.h +++ b/riscv/insns/vfmadd_vv.h @@ -1,7 +1,9 @@ // vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i] +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(vd, vs1, vs2); + vd = VFP_MULADD_16(vd, vs1, vs2); }, { vd = f32_mulAdd(vd, vs1, vs2); diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h index c4b74cb..3bd8013 100644 --- a/riscv/insns/vfmax_vf.h +++ b/riscv/insns/vfmax_vf.h @@ -1,7 +1,10 @@ // vfmax + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_max(vs2, rs1); + vd = VFP_OP_16(max, vs2, rs1); }, { vd = f32_max(vs2, rs1); diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h index 6439c89..4801b86 100644 --- a/riscv/insns/vfmax_vv.h +++ b/riscv/insns/vfmax_vv.h @@ -1,7 +1,10 @@ // vfmax + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_max(vs2, vs1); + vd = VFP_OP_16(max, vs2, vs1); }, { vd = f32_max(vs2, vs1); diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h index 1560cdf7..efae5a1 100644 --- a/riscv/insns/vfmin_vf.h +++ b/riscv/insns/vfmin_vf.h @@ -1,7 +1,10 @@ // vfmin vd, vs2, rs1 + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_min(vs2, rs1); + vd = VFP_OP_16(min, vs2, rs1); }, { vd = f32_min(vs2, rs1); diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h index 882a774..9e2bee2 100644 --- a/riscv/insns/vfmin_vv.h +++ b/riscv/insns/vfmin_vv.h @@ -1,7 +1,10 @@ // vfmin vd, vs2, vs1 + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_min(vs2, vs1); + vd = VFP_OP_16(min, vs2, vs1); }, { vd = f32_min(vs2, vs1); diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h index 8af397b..2251e93 100644 --- a/riscv/insns/vfmsac_vf.h +++ b/riscv/insns/vfmsac_vf.h @@ -1,7 +1,11 @@ // vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(rs1, vs2, bf16(vd.v ^ BF16_SIGN)) + : f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN)); }, { vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h index 3bb50e5..3b9cf12 100644 --- a/riscv/insns/vfmsac_vv.h +++ b/riscv/insns/vfmsac_vv.h @@ -1,7 +1,11 @@ // vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(vs1, vs2, bf16(vd.v ^ BF16_SIGN)) + : f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN)); }, { vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h index ab77b4c..5ce6a44 100644 --- a/riscv/insns/vfmsub_vf.h +++ b/riscv/insns/vfmsub_vf.h @@ -1,7 +1,11 @@ // vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(vd, rs1, bf16(vs2.v ^ BF16_SIGN)) + : f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN)); }, { vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h index 3cac937..bedc934 100644 --- a/riscv/insns/vfmsub_vv.h +++ b/riscv/insns/vfmsub_vv.h @@ -1,7 +1,11 @@ // vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(vd, vs1, bf16(vs2.v ^ BF16_SIGN)) + : f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN)); }, { vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h index f5f63e4..93ca216 100644 --- a/riscv/insns/vfmul_vf.h +++ b/riscv/insns/vfmul_vf.h @@ -1,7 +1,9 @@ // vfmul.vf vd, vs2, rs1, vm +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mul(vs2, rs1); + vd = VFP_OP_16(mul, vs2, rs1); }, { vd = f32_mul(vs2, rs1); diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h index 7930fd0..75daad6 100644 --- a/riscv/insns/vfmul_vv.h +++ b/riscv/insns/vfmul_vv.h @@ -1,7 +1,9 @@ // vfmul.vv vd, vs1, vs2, vm +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mul(vs1, vs2); + vd = VFP_OP_16(mul, vs1, vs2); }, { vd = f32_mul(vs1, vs2); diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index 1ad6bc6..65a3cff 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -1,12 +1,6 @@ // vfmv_f_s: rd = vs2[0] (rs1=0) -require_vector(true); -require_fp; -require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) || - (P.VU.vsew == e32 && p->extension_enabled('F')) || - (P.VU.vsew == e64 && p->extension_enabled('D'))); -require(STATE.frm->read() < 0x5); +VI_VFP_COMMON; -reg_t rs2_num = insn.rs2(); uint64_t vs2_0 = 0; const reg_t sew = P.VU.vsew; switch (sew) { diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index 4b1b955..917948d 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -1,19 +1,14 @@ // vfmv_s_f: vd[0] = rs1 (vs2=0) -require_vector(true); -require_fp; -require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) || - (P.VU.vsew == e32 && p->extension_enabled('F')) || - (P.VU.vsew == e64 && p->extension_enabled('D'))); -require(STATE.frm->read() < 0x5); +require_zvfbfa -reg_t vl = P.VU.vl->read(); +VI_VFP_COMMON; if (vl > 0 && P.VU.vstart->read() < vl) { reg_t rd_num = insn.rd(); switch (P.VU.vsew) { case e16: - P.VU.elt<uint16_t>(rd_num, 0, true) = f16(FRS1).v; + P.VU.elt<uint16_t>(rd_num, 0, true) = P.VU.altfmt ? bf16(FRS1).v : f16(FRS1).v; break; case e32: P.VU.elt<uint32_t>(rd_num, 0, true) = f32(FRS1).v; diff --git a/riscv/insns/vfncvt_f_f_q.h b/riscv/insns/vfncvt_f_f_q.h new file mode 100644 index 0000000..abbe5e8 --- /dev/null +++ b/riscv/insns/vfncvt_f_f_q.h @@ -0,0 +1,7 @@ +// vfncvt.f.f.q vd, vs2, vm +VI_VFP_NCVT_FP_TO_OFP8( + { + vd = P.VU.altfmt ? f32_to_e5m2(vs2, false) : f32_to_e4m3(vs2, false); + }, // BODY + { require_extension(EXT_ZVFOFP8MIN); } // CHECK +) diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h index 97de40e..ede707d 100644 --- a/riscv/insns/vfncvt_f_f_w.h +++ b/riscv/insns/vfncvt_f_f_w.h @@ -1,7 +1,7 @@ // vfncvt.f.f.w vd, vs2, vm VI_VFP_NCVT_FP_TO_FP( - { vd = f32_to_f16(vs2); }, // BODY32 - { vd = f64_to_f32(vs2); }, // BODY64 - { require_extension(EXT_ZVFHMIN); }, // CHECK32 - { require_extension('D'); } // CHECK64 + { vd = P.VU.altfmt ? f32_to_bf16(vs2) : f32_to_f16(vs2); }, // BODY32 + { vd = f64_to_f32(vs2); }, // BODY64 + { require_zvfbfa_or_zvfhmin }, // CHECK32 + { require(p->get_isa().get_zvd()); } // CHECK64 ) diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h index 46f2d92..676cd3a 100644 --- a/riscv/insns/vfncvt_f_x_w.h +++ b/riscv/insns/vfncvt_f_x_w.h @@ -1,8 +1,10 @@ // vfncvt.f.x.w vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_NCVT_INT_TO_FP( - { vd = i32_to_f16(vs2); }, // BODY32 - { vd = i64_to_f32(vs2); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK32 - { require_extension('F'); }, // CHECK64 - int // sign + { vd = i32_to_f16(vs2); }, // BODY32 + { vd = i64_to_f32(vs2); }, // BODY64 + { require_extension(EXT_ZVFH); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK64 + int // sign ) diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h index 729fb52..f90d3d7 100644 --- a/riscv/insns/vfncvt_f_xu_w.h +++ b/riscv/insns/vfncvt_f_xu_w.h @@ -1,8 +1,10 @@ // vfncvt.f.xu.w vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_NCVT_INT_TO_FP( - { vd = ui32_to_f16(vs2); }, // BODY32 - { vd = ui64_to_f32(vs2); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK32 - { require_extension('F'); }, // CHECK64 - uint // sign + { vd = ui32_to_f16(vs2); }, // BODY32 + { vd = ui64_to_f32(vs2); }, // BODY64 + { require_extension(EXT_ZVFH); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK64 + uint // sign ) diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h index 93002dc..020a4df 100644 --- a/riscv/insns/vfncvt_rod_f_f_w.h +++ b/riscv/insns/vfncvt_rod_f_f_w.h @@ -1,13 +1,13 @@ // vfncvt.rod.f.f.w vd, vs2, vm VI_VFP_NCVT_FP_TO_FP( - { // BODY32 + { // BODY32 softfloat_roundingMode = softfloat_round_odd; - vd = f32_to_f16(vs2); + vd = P.VU.altfmt ? f32_to_bf16(vs2) : f32_to_f16(vs2); }, - { // BODY64 + { // BODY64 softfloat_roundingMode = softfloat_round_odd; vd = f64_to_f32(vs2); }, - { require_extension(EXT_ZVFH); }, // CHECK32 - { require_extension('F'); } // CHECK64 + { require_zvfbfa_or_zvfh; }, // CHECK32 + { require(p->get_isa().get_zvd()); } // CHECK64 ) diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h index ee47e22..ad9ce21 100644 --- a/riscv/insns/vfncvt_rtz_x_f_w.h +++ b/riscv/insns/vfncvt_rtz_x_f_w.h @@ -1,10 +1,12 @@ // vfncvt.rtz.x.f.w vd, vs2, vm + VI_VFP_NCVT_FP_TO_INT( - { vd = f16_to_i8(vs2, softfloat_round_minMag, true); }, // BODY16 + { vd = P.VU.altfmt ? bf16_to_i8(vs2, softfloat_round_minMag, true) + : f16_to_i8(vs2, softfloat_round_minMag, true); }, // BODY16 { vd = f32_to_i16(vs2, softfloat_round_minMag, true); }, // BODY32 { vd = f64_to_i32(vs2, softfloat_round_minMag, true); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK16 - { require(p->extension_enabled('F')); }, // CHECK32 - { require(p->extension_enabled('D')); }, // CHECK64 + { require_zvfbfa_or_zvfh; }, // CHECK16 + { require(p->get_isa().get_zvf()); }, // CHECK32 + { require(p->get_isa().get_zvd()); }, // CHECK64 int // sign ) diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h index 3d029f3..d258aea 100644 --- a/riscv/insns/vfncvt_rtz_xu_f_w.h +++ b/riscv/insns/vfncvt_rtz_xu_f_w.h @@ -1,10 +1,12 @@ // vfncvt.rtz.xu.f.w vd, vs2, vm + VI_VFP_NCVT_FP_TO_INT( - { vd = f16_to_ui8(vs2, softfloat_round_minMag, true); }, // BODY16 + { vd = P.VU.altfmt ? bf16_to_ui8(vs2, softfloat_round_minMag, true) + : f16_to_ui8(vs2, softfloat_round_minMag, true); }, // BODY16 { vd = f32_to_ui16(vs2, softfloat_round_minMag, true); }, // BODY32 { vd = f64_to_ui32(vs2, softfloat_round_minMag, true); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK16 - { require(p->extension_enabled('F')); }, // CHECK32 - { require(p->extension_enabled('D')); }, // CHECK64 + { require_zvfbfa_or_zvfh; }, // CHECK16 + { require(p->get_isa().get_zvf()); }, // CHECK32 + { require(p->get_isa().get_zvd()); }, // CHECK64 uint // sign ) diff --git a/riscv/insns/vfncvt_sat_f_f_q.h b/riscv/insns/vfncvt_sat_f_f_q.h new file mode 100644 index 0000000..3d545a9 --- /dev/null +++ b/riscv/insns/vfncvt_sat_f_f_q.h @@ -0,0 +1,7 @@ +// vfncvt.sat.f.f.q vd, vs2, vm +VI_VFP_NCVT_FP_TO_OFP8( + { + vd = P.VU.altfmt ? f32_to_e5m2(vs2, true) : f32_to_e4m3(vs2, true); + }, // BODY + { require_extension(EXT_ZVFOFP8MIN); } // CHECK +) diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h index 0da5a75..929ae62 100644 --- a/riscv/insns/vfncvt_x_f_w.h +++ b/riscv/insns/vfncvt_x_f_w.h @@ -1,10 +1,12 @@ // vfncvt.x.f.w vd, vs2, vm + VI_VFP_NCVT_FP_TO_INT( - { vd = f16_to_i8(vs2, softfloat_roundingMode, true); }, // BODY16 + { vd = P.VU.altfmt ? bf16_to_i8(vs2, softfloat_roundingMode, true) + : f16_to_i8(vs2, softfloat_roundingMode, true); }, // BODY16 { vd = f32_to_i16(vs2, softfloat_roundingMode, true); }, // BODY32 { vd = f64_to_i32(vs2, softfloat_roundingMode, true); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK16 - { require(p->extension_enabled('F')); }, // CHECK32 - { require(p->extension_enabled('D')); }, // CHECK64 + { require_zvfbfa_or_zvfh; }, // CHECK16 + { require(p->get_isa().get_zvf()); }, // CHECK32 + { require(p->get_isa().get_zvd()); }, // CHECK64 int // sign ) diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h index da5a52d..c56bb53 100644 --- a/riscv/insns/vfncvt_xu_f_w.h +++ b/riscv/insns/vfncvt_xu_f_w.h @@ -1,10 +1,12 @@ // vfncvt.xu.f.w vd, vs2, vm + VI_VFP_NCVT_FP_TO_INT( - { vd = f16_to_ui8(vs2, softfloat_roundingMode, true); }, // BODY16 + { vd = P.VU.altfmt ? bf16_to_ui8(vs2, softfloat_roundingMode, true) + : f16_to_ui8(vs2, softfloat_roundingMode, true); }, // BODY16 { vd = f32_to_ui16(vs2, softfloat_roundingMode, true); }, // BODY32 { vd = f64_to_ui32(vs2, softfloat_roundingMode, true); }, // BODY64 - { require_extension(EXT_ZVFH); }, // CHECK16 - { require(p->extension_enabled('F')); }, // CHECK32 - { require(p->extension_enabled('D')); }, // CHECK64 + { require_zvfbfa_or_zvfh; }, // CHECK16 + { require(p->get_isa().get_zvf()); }, // CHECK32 + { require(p->get_isa().get_zvd()); }, // CHECK64 uint // sign ) diff --git a/riscv/insns/vfncvtbf16_f_f_w.h b/riscv/insns/vfncvtbf16_f_f_w.h index 4708802..6248328 100644 --- a/riscv/insns/vfncvtbf16_f_f_w.h +++ b/riscv/insns/vfncvtbf16_f_f_w.h @@ -1,5 +1,9 @@ // vfncvtbf16.f.f.w vd, vs2, vm -VI_VFP_NCVT_BF16_TO_FP( - { vd = f32_to_bf16(vs2); }, // BODY16 - { require_extension(EXT_ZVFBFMIN); } // CHECK16 +VI_VFP_NCVT_FP_BF16_OFP8( + { + vd = P.VU.altfmt ? bf16_to_e5m2(vs2, false) : bf16_to_e4m3(vs2, false); + }, // BODY16 + { vd = f32_to_bf16(vs2); }, // BODY32 + { require(p->extension_enabled(EXT_ZVFOFP8MIN)); }, // CHECK16 + { require_extension(EXT_ZVFBFMIN); } // CHECK32 ) diff --git a/riscv/insns/vfncvtbf16_sat_f_f_w.h b/riscv/insns/vfncvtbf16_sat_f_f_w.h new file mode 100644 index 0000000..238e415 --- /dev/null +++ b/riscv/insns/vfncvtbf16_sat_f_f_w.h @@ -0,0 +1,8 @@ +// vfncvtbf16.f.f.w vd, vs2, vm +VI_VFP_NCVT_SAT_BF16_TO_OFP8( + { + vd = P.VU.altfmt ? bf16_to_e5m2(vs2, true) : bf16_to_e4m3(vs2, true); + }, // BODY16 + { require_extension(EXT_ZVFOFP8MIN); } // CHECK16 + ) + diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h index 1b99302..e9688ea 100644 --- a/riscv/insns/vfnmacc_vf.h +++ b/riscv/insns/vfnmacc_vf.h @@ -1,7 +1,12 @@ // vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i] + + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(rs1, bf16(vs2.v ^ BF16_SIGN), bf16(vd.v ^ BF16_SIGN)) + : f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN)); }, { vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h index 7200e06..c8f22be 100644 --- a/riscv/insns/vfnmacc_vv.h +++ b/riscv/insns/vfnmacc_vv.h @@ -1,7 +1,11 @@ // vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vs2.v ^ BF16_SIGN), vs1, bf16(vd.v ^ BF16_SIGN)) + : f16_mulAdd( f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN)); }, { vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h index cb9c217..32754c3 100644 --- a/riscv/insns/vfnmadd_vf.h +++ b/riscv/insns/vfnmadd_vf.h @@ -1,7 +1,11 @@ // vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), rs1, bf16(vs2.v ^ BF16_SIGN)) + : f16_mulAdd( f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN)); }, { vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h index 7160ed7..d71f9ed 100644 --- a/riscv/insns/vfnmadd_vv.h +++ b/riscv/insns/vfnmadd_vv.h @@ -1,7 +1,11 @@ // vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN)); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), vs1, bf16(vs2.v ^ BF16_SIGN)) + : f16_mulAdd( f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN)); }, { vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h index aa6baa3..a69603f 100644 --- a/riscv/insns/vfnmsac_vf.h +++ b/riscv/insns/vfnmsac_vf.h @@ -1,7 +1,11 @@ // vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd); + vd = P.VU.altfmt ? bf16_mulAdd(rs1, bf16(vs2.v ^ BF16_SIGN), vd) + : f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd); }, { vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h index 47db61d..f150641 100644 --- a/riscv/insns/vfnmsac_vv.h +++ b/riscv/insns/vfnmsac_vv.h @@ -1,7 +1,11 @@ // vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vs1.v ^ BF16_SIGN), vs2, vd) + : f16_mulAdd( f16(vs1.v ^ F16_SIGN), vs2, vd); }, { vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h index 43aa9e2..55be4d1 100644 --- a/riscv/insns/vfnmsub_vf.h +++ b/riscv/insns/vfnmsub_vf.h @@ -1,7 +1,11 @@ // vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i] + +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), rs1, vs2) + : f16_mulAdd( f16(vd.v ^ F16_SIGN), rs1, vs2); }, { vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h index 2a45c8f..a9ff58c 100644 --- a/riscv/insns/vfnmsub_vv.h +++ b/riscv/insns/vfnmsub_vv.h @@ -1,7 +1,11 @@ // vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i] + +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2); + vd = P.VU.altfmt ? bf16_mulAdd(bf16(vd.v ^ BF16_SIGN), vs1, vs2) + : f16_mulAdd( f16(vd.v ^ F16_SIGN), vs1, vs2); }, { vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); diff --git a/riscv/insns/vfqbdot_alt_vv.h b/riscv/insns/vfqbdot_alt_vv.h new file mode 100644 index 0000000..f1df781 --- /dev/null +++ b/riscv/insns/vfqbdot_alt_vv.h @@ -0,0 +1,17 @@ +VI_VFP_BASE; +ZVBDOT_INIT(4); + +#define COMMA , + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVFQBDOT8F); + if (P.VU.altfmt) { + ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e5m2>); + } else { + ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e5m2>); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vfqbdot_vv.h b/riscv/insns/vfqbdot_vv.h new file mode 100644 index 0000000..fe3e652 --- /dev/null +++ b/riscv/insns/vfqbdot_vv.h @@ -0,0 +1,17 @@ +VI_VFP_BASE; +ZVBDOT_INIT(4); + +#define COMMA , + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVFQBDOT8F); + if (P.VU.altfmt) { + ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e4m3>); + } else { + ZVBDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e4m3>); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vfqldot_alt_vv.h b/riscv/insns/vfqldot_alt_vv.h new file mode 100644 index 0000000..ea18828 --- /dev/null +++ b/riscv/insns/vfqldot_alt_vv.h @@ -0,0 +1,17 @@ +VI_VFP_BASE; +ZVLDOT_INIT(4); + +#define COMMA , + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVFQLDOT8F); + if (P.VU.altfmt) { + ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e5m2>); + } else { + ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e5m2>); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vfqldot_vv.h b/riscv/insns/vfqldot_vv.h new file mode 100644 index 0000000..b03ec29 --- /dev/null +++ b/riscv/insns/vfqldot_vv.h @@ -0,0 +1,17 @@ +VI_VFP_BASE; +ZVLDOT_INIT(4); + +#define COMMA , + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVFQLDOT8F); + if (P.VU.altfmt) { + ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e5m2 COMMA ofp8_e4m3>); + } else { + ZVLDOT_LOOP(uint8_t, uint8_t, float32_t, zvfqbdot8f_dot_acc<ofp8_e4m3 COMMA ofp8_e4m3>); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h index b283343..399a496 100644 --- a/riscv/insns/vfrdiv_vf.h +++ b/riscv/insns/vfrdiv_vf.h @@ -1,4 +1,6 @@ // vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] +VI_NON_ALTFMT_INSN + VI_VFP_VF_LOOP ({ vd = f16_div(rs1, vs2); diff --git a/riscv/insns/vfrec7_v.h b/riscv/insns/vfrec7_v.h index 69c026b..40bc33c 100644 --- a/riscv/insns/vfrec7_v.h +++ b/riscv/insns/vfrec7_v.h @@ -1,7 +1,7 @@ // vfclass.v vd, vs2, vm VI_VFP_V_LOOP ({ - vd = f16_recip7(vs2); + vd = P.VU.altfmt ? bf16_recip7(vs2) : f16_recip7(vs2); }, { vd = f32_recip7(vs2); diff --git a/riscv/insns/vfrsqrt7_v.h b/riscv/insns/vfrsqrt7_v.h index 2505639..8b3290e 100644 --- a/riscv/insns/vfrsqrt7_v.h +++ b/riscv/insns/vfrsqrt7_v.h @@ -1,7 +1,7 @@ // vfrsqrt7.v vd, vs2, vm VI_VFP_V_LOOP ({ - vd = f16_rsqrte7(vs2); + vd = P.VU.altfmt ? bf16_rsqrte7(vs2) : f16_rsqrte7(vs2); }, { vd = f32_rsqrte7(vs2); diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h index 7fb26a5..2ae5f7c 100644 --- a/riscv/insns/vfrsub_vf.h +++ b/riscv/insns/vfrsub_vf.h @@ -1,7 +1,7 @@ // vfsub.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ - vd = f16_sub(rs1, vs2); + vd = VFP_OP_16(sub, rs1, vs2); }, { vd = f32_sub(rs1, vs2); diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h index ce06185..c84361a 100644 --- a/riscv/insns/vfsgnj_vf.h +++ b/riscv/insns/vfsgnj_vf.h @@ -1,7 +1,8 @@ // vfsgnj vd, vs2, vs1 VI_VFP_VF_LOOP ({ - vd = fsgnj16(vs2.v, rs1.v, false, false); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, false, false) + : fsgnj16(vs2.v, rs1.v, false, false); }, { vd = fsgnj32(vs2.v, rs1.v, false, false); diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h index 722cb29..a31c8ba 100644 --- a/riscv/insns/vfsgnj_vv.h +++ b/riscv/insns/vfsgnj_vv.h @@ -1,7 +1,8 @@ // vfsgnj VI_VFP_VV_LOOP ({ - vd = fsgnj16(vs2.v, vs1.v, false, false); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, false, false) + : fsgnj16(vs2.v, vs1.v, false, false); }, { vd = fsgnj32(vs2.v, vs1.v, false, false); diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h index e489412..9313cf3 100644 --- a/riscv/insns/vfsgnjn_vf.h +++ b/riscv/insns/vfsgnjn_vf.h @@ -1,7 +1,8 @@ // vfsgnn VI_VFP_VF_LOOP ({ - vd = fsgnj16(vs2.v, rs1.v, true, false); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, true, false) + : fsgnj16(vs2.v, rs1.v, true, false); }, { vd = fsgnj32(vs2.v, rs1.v, true, false); diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h index 1d91f69..3b6b270 100644 --- a/riscv/insns/vfsgnjn_vv.h +++ b/riscv/insns/vfsgnjn_vv.h @@ -1,7 +1,8 @@ // vfsgnn VI_VFP_VV_LOOP ({ - vd = fsgnj16(vs2.v, vs1.v, true, false); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, true, false) + : fsgnj16(vs2.v, vs1.v, true, false); }, { vd = fsgnj32(vs2.v, vs1.v, true, false); diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h index 7be164c..081f797 100644 --- a/riscv/insns/vfsgnjx_vf.h +++ b/riscv/insns/vfsgnjx_vf.h @@ -1,7 +1,8 @@ // vfsgnx VI_VFP_VF_LOOP ({ - vd = fsgnj16(vs2.v, rs1.v, false, true); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, rs1.v, false, true) + : fsgnj16(vs2.v, rs1.v, false, true); }, { vd = fsgnj32(vs2.v, rs1.v, false, true); diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h index b04b845..db05439 100644 --- a/riscv/insns/vfsgnjx_vv.h +++ b/riscv/insns/vfsgnjx_vv.h @@ -1,7 +1,8 @@ // vfsgnx VI_VFP_VV_LOOP ({ - vd = fsgnj16(vs2.v, vs1.v, false, true); + vd = P.VU.altfmt ? bfsgnj16(vs2.v, vs1.v, false, true) + : fsgnj16(vs2.v, vs1.v, false, true); }, { vd = fsgnj32(vs2.v, vs1.v, false, true); diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h index 40f3c18..e8374f4 100644 --- a/riscv/insns/vfslide1down_vf.h +++ b/riscv/insns/vfslide1down_vf.h @@ -23,7 +23,7 @@ if (i != vl - 1) { } else { switch (P.VU.vsew) { case e16: - P.VU.elt<float16_t>(rd_num, vl - 1, true) = FRS1_H; + P.VU.elt<float16_t>(rd_num, vl - 1, true) = P.VU.altfmt ? FRS1_BF : FRS1_H; break; case e32: P.VU.elt<float32_t>(rd_num, vl - 1, true) = FRS1_F; diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h index 4e4e499..7ca6bf3 100644 --- a/riscv/insns/vfslide1up_vf.h +++ b/riscv/insns/vfslide1up_vf.h @@ -23,7 +23,7 @@ if (i != 0) { } else { switch (P.VU.vsew) { case e16: - P.VU.elt<float16_t>(rd_num, 0, true) = FRS1_H; + P.VU.elt<float16_t>(rd_num, 0, true) = P.VU.altfmt ? FRS1_BF : FRS1_H; break; case e32: P.VU.elt<float32_t>(rd_num, 0, true) = FRS1_F; diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h index 86f0148..5a866e1 100644 --- a/riscv/insns/vfsqrt_v.h +++ b/riscv/insns/vfsqrt_v.h @@ -1,4 +1,6 @@ // vsqrt.v vd, vd2, vm +VI_NON_ALTFMT_INSN + VI_VFP_V_LOOP ({ vd = f16_sqrt(vs2); diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h index fc6877c..a011ae8 100644 --- a/riscv/insns/vfsub_vf.h +++ b/riscv/insns/vfsub_vf.h @@ -1,7 +1,9 @@ // vfsub.vf vd, vs2, rs1 +require_zvfbfa + VI_VFP_VF_LOOP ({ - vd = f16_sub(vs2, rs1); + vd = VFP_OP_16(sub, vs2, rs1); }, { vd = f32_sub(vs2, rs1); diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h index b0403f1..db4cd5e 100644 --- a/riscv/insns/vfsub_vv.h +++ b/riscv/insns/vfsub_vv.h @@ -1,7 +1,9 @@ // vfsub.vv vd, vs2, vs1 +require_zvfbfa + VI_VFP_VV_LOOP ({ - vd = f16_sub(vs2, vs1); + vd = VFP_OP_16(sub, vs2, vs1); }, { vd = f32_sub(vs2, vs1); diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h index 7255a50..9dd56a2 100644 --- a/riscv/insns/vfwadd_vv.h +++ b/riscv/insns/vfwadd_vv.h @@ -1,4 +1,7 @@ // vfwadd.vv vd, vs2, vs1 + +require_zvfbfa + VI_VFP_VV_LOOP_WIDE ({ vd = f32_add(vs2, vs1); diff --git a/riscv/insns/vfwbdot_vv.h b/riscv/insns/vfwbdot_vv.h new file mode 100644 index 0000000..b8d35a7 --- /dev/null +++ b/riscv/insns/vfwbdot_vv.h @@ -0,0 +1,15 @@ +VI_VFP_BASE; +ZVBDOT_INIT(2); + +switch (P.VU.vsew) { + case 16: { + if (P.VU.altfmt) { + require_extension(EXT_ZVFWBDOT16BF); + ZVBDOT_LOOP(uint16_t, uint16_t, float32_t, zvfwbdot16bf_dot_acc); + } else { + require(false); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h index 111a231..253b92d 100644 --- a/riscv/insns/vfwcvt_f_f_v.h +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -1,7 +1,7 @@ // vfwcvt.f.f.v vd, vs2, vm VI_VFP_WCVT_FP_TO_FP( - { vd = f16_to_f32(vs2); }, // BODY16 - { vd = f32_to_f64(vs2); }, // BODY32 - { require_extension(EXT_ZVFHMIN); }, // CHECK16 - { require_extension('D'); } // CHECK32 + { vd = P.VU.altfmt ? bf16_to_f32(vs2) : f16_to_f32(vs2); }, // BODY16 + { vd = f32_to_f64(vs2); }, // BODY32 + { require_zvfbfa_or_zvfhmin }, // CHECK16 + { require(p->get_isa().get_zvd()); } // CHECK32 ) diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h index c7678dc..76a0696 100644 --- a/riscv/insns/vfwcvt_f_x_v.h +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -1,10 +1,11 @@ // vfwcvt.f.x.v vd, vs2, vm + VI_VFP_WCVT_INT_TO_FP( - { vd = i32_to_f16(vs2); }, // BODY8 + { vd = P.VU.altfmt ? i32_to_bf16(vs2) : i32_to_f16(vs2); }, // BODY8 { vd = i32_to_f32(vs2); }, // BODY16 { vd = i32_to_f64(vs2); }, // BODY32 - { require(p->extension_enabled(EXT_ZVFH)); }, // CHECK8 - { require_extension('F'); }, // CHECK16 - { require_extension('D'); }, // CHECK32 + { require_zvfbfa_or_zvfh; }, // CHECK8 + { require(p->get_isa().get_zvf()); }, // CHECK64 + { require(p->get_isa().get_zvd()); }, // CHECK64 int // sign ) diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h index e3b7e9f..f322068 100644 --- a/riscv/insns/vfwcvt_f_xu_v.h +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -1,10 +1,11 @@ // vfwcvt.f.xu.v vd, vs2, vm + VI_VFP_WCVT_INT_TO_FP( - { vd = ui32_to_f16(vs2); }, // BODY8 + { vd = P.VU.altfmt ? ui32_to_bf16(vs2) : ui32_to_f16(vs2); }, // BODY8 { vd = ui32_to_f32(vs2); }, // BODY16 { vd = ui32_to_f64(vs2); }, // BODY32 - { require(p->extension_enabled(EXT_ZVFH)); }, // CHECK8 - { require_extension('F'); }, // CHECK16 - { require_extension('D'); }, // CHECK32 + { require_zvfbfa_or_zvfh; }, // CHECK8 + { require(p->get_isa().get_zvf()); }, // CHECK32 + { require(p->get_isa().get_zvd()); }, // CHECK64 uint // sign ) diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h index 9caf617..3ed454b 100644 --- a/riscv/insns/vfwcvt_rtz_x_f_v.h +++ b/riscv/insns/vfwcvt_rtz_x_f_v.h @@ -1,8 +1,10 @@ // vfwcvt.rtz.x.f.v vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_WCVT_FP_TO_INT( { vd = f16_to_i32(vs2, softfloat_round_minMag, true); }, // BODY16 { vd = f32_to_i64(vs2, softfloat_round_minMag, true); }, // BODY32 { require_extension(EXT_ZVFH); }, // CHECK16 - { require_extension('F'); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK32 int // sign ) diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h index a25d847..e0c737c 100644 --- a/riscv/insns/vfwcvt_rtz_xu_f_v.h +++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h @@ -1,8 +1,10 @@ // vfwcvt.rtz,xu.f.v vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_WCVT_FP_TO_INT( { vd = f16_to_ui32(vs2, softfloat_round_minMag, true); }, // BODY16 { vd = f32_to_ui64(vs2, softfloat_round_minMag, true); }, // BODY32 { require_extension(EXT_ZVFH); }, // CHECK16 - { require_extension('F'); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK32 uint // sign ) diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h index 2d536ad..b974c86 100644 --- a/riscv/insns/vfwcvt_x_f_v.h +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -1,8 +1,10 @@ // vfwcvt.x.f.v vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_WCVT_FP_TO_INT( { vd = f16_to_i32(vs2, softfloat_roundingMode, true); }, // BODY16 { vd = f32_to_i64(vs2, softfloat_roundingMode, true); }, // BODY32 { require_extension(EXT_ZVFH); }, // CHECK16 - { require_extension('F'); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK32 int // sign ) diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h index 37201f5..7cf0dab 100644 --- a/riscv/insns/vfwcvt_xu_f_v.h +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -1,8 +1,10 @@ // vfwcvt.xu.f.v vd, vs2, vm +VI_NON_ALTFMT_INSN + VI_VFP_WCVT_FP_TO_INT( { vd = f16_to_ui32(vs2, softfloat_roundingMode, true); }, // BODY16 { vd = f32_to_ui64(vs2, softfloat_roundingMode, true); }, // BODY32 { require_extension(EXT_ZVFH); }, // CHECK16 - { require_extension('F'); }, // CHECK32 + { require(p->get_isa().get_zvf()); }, // CHECK32 uint // sign ) diff --git a/riscv/insns/vfwcvtbf16_f_f_v.h b/riscv/insns/vfwcvtbf16_f_f_v.h index ee9a59c..7a4dca4 100644 --- a/riscv/insns/vfwcvtbf16_f_f_v.h +++ b/riscv/insns/vfwcvtbf16_f_f_v.h @@ -1,5 +1,7 @@ // vfwcvtbf16.f.f.v vd, vs2, vm -VI_VFP_WCVT_FP_TO_BF16( - { vd = bf16_to_f32(vs2); }, // BODY16 - { require_extension(EXT_ZVFBFMIN); } // CHECK16 +VI_VFP_WCVT_OFP8_BF16_FP( + { vd = P.VU.altfmt ? e5m2_to_bf16(vs2) : e4m3_to_bf16(vs2); }, // BODY8 + { vd = bf16_to_f32(vs2); }, // BODY16 + { require(p->extension_enabled(EXT_ZVFOFP8MIN)); }, // CHECK8 + { require_extension(EXT_ZVFBFMIN); } // CHECK16 ) diff --git a/riscv/insns/vfwldot_vv.h b/riscv/insns/vfwldot_vv.h new file mode 100644 index 0000000..63a4e47 --- /dev/null +++ b/riscv/insns/vfwldot_vv.h @@ -0,0 +1,15 @@ +VI_VFP_BASE; +ZVLDOT_INIT(2); + +switch (P.VU.vsew) { + case 16: { + if (P.VU.altfmt) { + require_extension(EXT_ZVFWLDOT16BF); + ZVLDOT_LOOP(uint16_t, uint16_t, float32_t, zvfwbdot16bf_dot_acc); + } else { + require(false); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h index bcbfe74..728678c 100644 --- a/riscv/insns/vghsh_vv.h +++ b/riscv/insns/vghsh_vv.h @@ -2,9 +2,13 @@ #include "zvk_ext_macros.h" +const uint32_t EGS = 4; + require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require(P.VU.vl->read() % EGS == 0); +VI_CHECK_SSS(true) VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h index 820b396..0d223e8 100644 --- a/riscv/insns/vgmul_vv.h +++ b/riscv/insns/vgmul_vv.h @@ -2,9 +2,13 @@ #include "zvk_ext_macros.h" +const uint32_t EGS = 4; + require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require(P.VU.vl->read() % EGS == 0); +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index 49c804c..00155db 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -21,23 +21,22 @@ for (reg_t i = 0; i < vl; ++i) { } } - bool use_ori = (insn.v_vm() == 0) && !do_mask; + // Bypass masked-off elements + if ((insn.v_vm() == 0) && !do_mask) + continue; + switch (sew) { case e8: - P.VU.elt<uint8_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint8_t>(rd_num, i) : cnt; + P.VU.elt<uint8_t>(rd_num, i, true) = cnt; break; case e16: - P.VU.elt<uint16_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint16_t>(rd_num, i) : cnt; + P.VU.elt<uint16_t>(rd_num, i, true) = cnt; break; case e32: - P.VU.elt<uint32_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint32_t>(rd_num, i) : cnt; + P.VU.elt<uint32_t>(rd_num, i, true) = cnt; break; default: - P.VU.elt<uint64_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint64_t>(rd_num, i) : cnt; + P.VU.elt<uint64_t>(rd_num, i, true) = cnt; break; } diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h index e9a87cf..49129f7 100644 --- a/riscv/insns/vmandn_mm.h +++ b/riscv/insns/vmandn_mm.h @@ -1,2 +1,2 @@ // vmandn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 & ~vs1); +VI_LOOP_MASK(vs2 & !vs1); diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index a4d7c50..adb5d0a 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -1,7 +1,7 @@ // vmfeq.vf vd, vs2, fs1 VI_VFP_VF_LOOP_CMP ({ - res = f16_eq(vs2, rs1); + res = VFP_OP_16(eq, vs2, rs1); }, { res = f32_eq(vs2, rs1); diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index b08ce98..97b7a0d 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -1,7 +1,9 @@ // vmfeq.vv vd, vs2, vs1 +require_zvfbfa + VI_VFP_VV_LOOP_CMP ({ - res = f16_eq(vs2, vs1); + res = VFP_OP_16(eq, vs2, vs1); }, { res = f32_eq(vs2, vs1); diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index ab4df5c..eb4bdce 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,7 +1,7 @@ // vmfge.vf vd, vs2, rs1 VI_VFP_VF_LOOP_CMP ({ - res = f16_le(rs1, vs2); + res = VFP_OP_16(le, rs1, vs2); }, { res = f32_le(rs1, vs2); diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index dcc3ea3..bfcf251 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,7 +1,7 @@ // vmfgt.vf vd, vs2, rs1 VI_VFP_VF_LOOP_CMP ({ - res = f16_lt(rs1, vs2); + res = VFP_OP_16(lt, rs1, vs2); }, { res = f32_lt(rs1, vs2); diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index a942705..9415516 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -1,7 +1,7 @@ // vmfle.vf vd, vs2, rs1 VI_VFP_VF_LOOP_CMP ({ - res = f16_le(vs2, rs1); + res = VFP_OP_16(le, vs2, rs1); }, { res = f32_le(vs2, rs1); diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index dd6f81d..5fe2d44 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,7 +1,9 @@ // vmfle.vv vd, vs2, rs1 +require_zvfbfa + VI_VFP_VV_LOOP_CMP ({ - res = f16_le(vs2, vs1); + res = VFP_OP_16(le, vs2, vs1); }, { res = f32_le(vs2, vs1); diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index 110dbd1..62173bb 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,7 +1,7 @@ // vmflt.vf vd, vs2, rs1 VI_VFP_VF_LOOP_CMP ({ - res = f16_lt(vs2, rs1); + res = VFP_OP_16(lt, vs2, rs1); }, { res = f32_lt(vs2, rs1); diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index 35f8d70..b019b9c 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,7 +1,9 @@ // vmflt.vv vd, vs2, vs1 +require_zvfbfa + VI_VFP_VV_LOOP_CMP ({ - res = f16_lt(vs2, vs1); + res = VFP_OP_16(lt, vs2, vs1); }, { res = f32_lt(vs2, vs1); diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index 1b61d57..74b788f 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -1,7 +1,7 @@ // vmfne.vf vd, vs2, rs1 VI_VFP_VF_LOOP_CMP ({ - res = !f16_eq(vs2, rs1); + res = !VFP_OP_16(eq, vs2, rs1); }, { res = !f32_eq(vs2, rs1); diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index 4447c3c..017206c 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -1,7 +1,9 @@ // vmfne.vv vd, vs2, rs1 +require_zvfbfa + VI_VFP_VV_LOOP_CMP ({ - res = !f16_eq(vs2, vs1); + res = !VFP_OP_16(eq, vs2, vs1); }, { res = !f32_eq(vs2, vs1); diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h index 5a3ab09..4659e2f 100644 --- a/riscv/insns/vmnand_mm.h +++ b/riscv/insns/vmnand_mm.h @@ -1,2 +1,2 @@ // vmnand.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 & vs1)); +VI_LOOP_MASK(!(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h index ab93378..37327c0 100644 --- a/riscv/insns/vmnor_mm.h +++ b/riscv/insns/vmnor_mm.h @@ -1,2 +1,2 @@ // vmnor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 | vs1)); +VI_LOOP_MASK(!(vs2 | vs1)); diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h index 23026f5..71acc05 100644 --- a/riscv/insns/vmorn_mm.h +++ b/riscv/insns/vmorn_mm.h @@ -1,2 +1,2 @@ // vmorn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 | ~vs1); +VI_LOOP_MASK(vs2 | !vs1); diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h index e861a33..273d3e8 100644 --- a/riscv/insns/vmulh_vv.h +++ b/riscv/insns/vmulh_vv.h @@ -1,4 +1,6 @@ // vmulh vd, vs2, vs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VV_LOOP ({ vd = ((int128_t)vs2 * vs1) >> sew; diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h index b6b5503..aaf591c 100644 --- a/riscv/insns/vmulh_vx.h +++ b/riscv/insns/vmulh_vx.h @@ -1,4 +1,6 @@ // vmulh vd, vs2, rs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VX_LOOP ({ vd = ((int128_t)vs2 * rs1) >> sew; diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h index e1c0ba6..3903d52 100644 --- a/riscv/insns/vmulhsu_vv.h +++ b/riscv/insns/vmulhsu_vv.h @@ -1,4 +1,6 @@ // vmulhsu.vv vd, vs2, vs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VV_SU_LOOP({ vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew; }) diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h index 4619ea8..b8210bc 100644 --- a/riscv/insns/vmulhsu_vx.h +++ b/riscv/insns/vmulhsu_vx.h @@ -1,4 +1,6 @@ // vmulhsu.vx vd, vs2, rs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VX_SU_LOOP({ vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew; }) diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h index 0ff488c..5e44aec 100644 --- a/riscv/insns/vmulhu_vv.h +++ b/riscv/insns/vmulhu_vv.h @@ -1,4 +1,6 @@ // vmulhu vd, vs2, vs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VV_ULOOP ({ vd = ((uint128_t)vs2 * vs1) >> sew; diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h index 672ad32..35e6ed6 100644 --- a/riscv/insns/vmulhu_vx.h +++ b/riscv/insns/vmulhu_vx.h @@ -1,4 +1,6 @@ // vmulhu vd ,vs2, rs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VX_ULOOP ({ vd = ((uint128_t)vs2 * rs1) >> sew; diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h index 0736d5b..8db61c2 100644 --- a/riscv/insns/vmxnor_mm.h +++ b/riscv/insns/vmxnor_mm.h @@ -1,2 +1,2 @@ // vmnxor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 ^ vs1)); +VI_LOOP_MASK(!(vs2 ^ vs1)); diff --git a/riscv/insns/vqbdots_vv.h b/riscv/insns/vqbdots_vv.h new file mode 100644 index 0000000..55c3dd2 --- /dev/null +++ b/riscv/insns/vqbdots_vv.h @@ -0,0 +1,23 @@ +ZVBDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQBDOT8I); + if (P.VU.altfmt) { + ZVBDOT_SIMPLE_LOOP(int8_t, int8_t, uint32_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint8_t, int8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQBDOT16I); + if (P.VU.altfmt) { + ZVBDOT_SIMPLE_LOOP(int16_t, int16_t, uint64_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint16_t, int16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vqbdotu_vv.h b/riscv/insns/vqbdotu_vv.h new file mode 100644 index 0000000..a73d568 --- /dev/null +++ b/riscv/insns/vqbdotu_vv.h @@ -0,0 +1,23 @@ +ZVBDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQBDOT8I); + if (P.VU.altfmt) { + ZVBDOT_SIMPLE_LOOP(int8_t, uint8_t, uint32_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint8_t, uint8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQBDOT16I); + if (P.VU.altfmt) { + ZVBDOT_SIMPLE_LOOP(int16_t, uint16_t, uint64_t); + } else { + ZVBDOT_SIMPLE_LOOP(uint16_t, uint16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vqldots_vv.h b/riscv/insns/vqldots_vv.h new file mode 100644 index 0000000..ce6376a --- /dev/null +++ b/riscv/insns/vqldots_vv.h @@ -0,0 +1,23 @@ +ZVLDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQLDOT8I); + if (P.VU.altfmt) { + ZVLDOT_SIMPLE_LOOP(int8_t, int8_t, uint32_t); + } else { + ZVLDOT_SIMPLE_LOOP(uint8_t, int8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQLDOT16I); + if (P.VU.altfmt) { + ZVLDOT_SIMPLE_LOOP(int16_t, int16_t, uint64_t); + } else { + ZVLDOT_SIMPLE_LOOP(uint16_t, int16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vqldotu_vv.h b/riscv/insns/vqldotu_vv.h new file mode 100644 index 0000000..2b674b1 --- /dev/null +++ b/riscv/insns/vqldotu_vv.h @@ -0,0 +1,23 @@ +ZVLDOT_INIT(4); + +switch (P.VU.vsew) { + case 8: { + require_extension(EXT_ZVQLDOT8I); + if (P.VU.altfmt) { + ZVLDOT_SIMPLE_LOOP(int8_t, uint8_t, uint32_t); + } else { + ZVLDOT_SIMPLE_LOOP(uint8_t, uint8_t, uint32_t); + } + break; + } + case 16: { + require_extension(EXT_ZVQLDOT16I); + if (P.VU.altfmt) { + ZVLDOT_SIMPLE_LOOP(int16_t, uint16_t, uint64_t); + } else { + ZVLDOT_SIMPLE_LOOP(uint16_t, uint16_t, uint64_t); + } + break; + } + default: require(false); +} diff --git a/riscv/insns/vrev8_v.h b/riscv/insns/vrev8_v.h index f26c5a0..e39c5c0 100644 --- a/riscv/insns/vrev8_v.h +++ b/riscv/insns/vrev8_v.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; VI_V_ULOOP ({ diff --git a/riscv/insns/vrol_vv.h b/riscv/insns/vrol_vv.h index fb2e483..a2ac832 100644 --- a/riscv/insns/vrol_vv.h +++ b/riscv/insns/vrol_vv.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; // 'mask' selects the low log2(vsew) bits of the shift amount, // to limit the maximum shift to "vsew - 1" bits. diff --git a/riscv/insns/vrol_vx.h b/riscv/insns/vrol_vx.h index b0c89a2..8e4b41b 100644 --- a/riscv/insns/vrol_vx.h +++ b/riscv/insns/vrol_vx.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; // 'mask' selects the low log2(vsew) bits of the shift amount, // to limit the maximum shift to "vsew - 1" bits. diff --git a/riscv/insns/vror_vi.h b/riscv/insns/vror_vi.h index 1269c3d..6ae9fcd 100644 --- a/riscv/insns/vror_vi.h +++ b/riscv/insns/vror_vi.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; // 'mask' selects the low log2(vsew) bits of the shift amount, // to limit the maximum shift to "vsew - 1" bits. diff --git a/riscv/insns/vror_vv.h b/riscv/insns/vror_vv.h index c649c6d..276d7ec 100644 --- a/riscv/insns/vror_vv.h +++ b/riscv/insns/vror_vv.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; // 'mask' selects the low log2(vsew) bits of the shift amount, // to limit the maximum shift to "vsew - 1" bits. diff --git a/riscv/insns/vror_vx.h b/riscv/insns/vror_vx.h index 50c8e5c..98e1248 100644 --- a/riscv/insns/vror_vx.h +++ b/riscv/insns/vror_vx.h @@ -2,7 +2,7 @@ #include "zvk_ext_macros.h" -require_zvbb; +require_zvkb; // 'mask' selects the low log2(vsew) bits of the shift amount, // to limit the maximum shift to "vsew - 1" bits. diff --git a/riscv/insns/vsm3c_vi.h b/riscv/insns/vsm3c_vi.h index b3e8121..f9375a5 100644 --- a/riscv/insns/vsm3c_vi.h +++ b/riscv/insns/vsm3c_vi.h @@ -3,6 +3,7 @@ #include "zvksh_ext_macros.h" require_vsm3_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h index dd6cb52..388b79f 100644 --- a/riscv/insns/vsm3me_vv.h +++ b/riscv/insns/vsm3me_vv.h @@ -13,6 +13,7 @@ (ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6)) require_vsm3_constraints; +VI_CHECK_SSS(true) VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h index 8f52e68..dd6f67d 100644 --- a/riscv/insns/vsm4k_vi.h +++ b/riscv/insns/vsm4k_vi.h @@ -15,6 +15,7 @@ static constexpr uint32_t zvksed_ck[32] = { }; require_vsm4_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h index 44011eb..8db1050 100644 --- a/riscv/insns/vsm4r_vs.h +++ b/riscv/insns/vsm4r_vs.h @@ -3,8 +3,10 @@ #include "zvksed_ext_macros.h" require_vsm4_constraints; +require_align(insn.rd(), P.VU.vflmul); +require_vs2_align_eglmul(128); // No overlap of vd and vs2. -require(insn.rd() != insn.rs2()); +require_noover_eglmul(insn.rd(), insn.rs2()); VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h index 9a18cec..18afee6 100644 --- a/riscv/insns/vsm4r_vv.h +++ b/riscv/insns/vsm4r_vv.h @@ -2,7 +2,9 @@ #include "zvksed_ext_macros.h" + require_vsm4_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h index c1d0a57..bacd757 100644 --- a/riscv/insns/vsmul_vv.h +++ b/riscv/insns/vsmul_vv.h @@ -1,4 +1,6 @@ // vsmul.vv vd, vs2, vs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VV_LOOP ({ VRM xrm = P.VU.get_vround_mode(); diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h index c2e531c..62dfa7c 100644 --- a/riscv/insns/vsmul_vx.h +++ b/riscv/insns/vsmul_vx.h @@ -1,4 +1,6 @@ // vsmul.vx vd, vs2, rs1 +require(p->extension_enabled('V') || P.VU.vsew < e64); + VI_VX_LOOP ({ VRM xrm = P.VU.get_vround_mode(); diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h index 5c58927..4cf616d 100644 --- a/riscv/insns/vsra_vi.h +++ b/riscv/insns/vsra_vi.h @@ -1,5 +1,5 @@ // vsra.vi vd, vs2, zimm5 VI_VI_LOOP ({ - vd = vs2 >> (simm5 & (sew - 1) & 0x1f); + vd = vs2 >> (insn.v_zimm5() & (sew - 1)); }) diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h index cbdf47a..12f1240 100644 --- a/riscv/insns/vssra_vi.h +++ b/riscv/insns/vssra_vi.h @@ -1,8 +1,8 @@ -// vssra.vi vd, vs2, simm5 +// vssra.vi vd, vs2, zimm5 VI_VI_LOOP ({ VRM xrm = P.VU.get_vround_mode(); - int sh = simm5 & (sew - 1); + int sh = insn.v_zimm5() & (sew - 1); int128_t val = vs2; INT_ROUNDING(val, xrm, sh); diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h index 74fa37c..a2de49e 100644 --- a/riscv/insns/vssrl_vi.h +++ b/riscv/insns/vssrl_vi.h @@ -1,4 +1,4 @@ -// vssra.vi vd, vs2, simm5 +// vssra.vi vd, vs2, zimm5 VI_VI_ULOOP ({ VRM xrm = P.VU.get_vround_mode(); diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h index 13b5eb4..866cd78 100644 --- a/riscv/insns/vwsll_vi.h +++ b/riscv/insns/vwsll_vi.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(false); VI_ZVK_VI_WIDENING_ULOOP({ const reg_t shift = zimm5 & ((2 * sew) - 1); diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h index 5a64c6c..180fe97 100644 --- a/riscv/insns/vwsll_vv.h +++ b/riscv/insns/vwsll_vv.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(true); VI_ZVK_VV_WIDENING_ULOOP({ const reg_t shift = (vs1 & ((2 * sew) - 1)); diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h index 5264e80..4137d39 100644 --- a/riscv/insns/vwsll_vx.h +++ b/riscv/insns/vwsll_vx.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(false); VI_ZVK_VX_WIDENING_ULOOP({ const reg_t shift = (rs1 & ((2 * sew) - 1)); diff --git a/riscv/insns/wrs_nto.h b/riscv/insns/wrs_nto.h index 710e670..7a4fe67 100644 --- a/riscv/insns/wrs_nto.h +++ b/riscv/insns/wrs_nto.h @@ -1,3 +1,5 @@ +require_extension(EXT_ZAWRS); + if (get_field(STATE.mstatus->read(), MSTATUS_TW)) { require_privilege(PRV_M); } else if (STATE.v) { diff --git a/riscv/insns/wrs_sto.h b/riscv/insns/wrs_sto.h index 4e71aa0..24d37a7 100644 --- a/riscv/insns/wrs_sto.h +++ b/riscv/insns/wrs_sto.h @@ -1 +1,3 @@ +require_extension(EXT_ZAWRS); + // WRS.STO stalls for a short duration diff --git a/riscv/interactive.cc b/riscv/interactive.cc index 9afc718..55406b8 100644 --- a/riscv/interactive.cc +++ b/riscv/interactive.cc @@ -72,6 +72,12 @@ processor_t *sim_t::get_core(const std::string& i) return get_core(p); } +static void do_write(int fd, const void* buf, size_t n) +{ + auto res = write(fd, buf, n); + (void) res; +} + static void clear_str(bool noncanonical, int fd, std::string target_str) { if (noncanonical) @@ -83,7 +89,7 @@ static void clear_str(bool noncanonical, int fd, std::string target_str) clear_motion += ' '; } clear_motion += '\r'; - (void) write(fd, clear_motion.c_str(), clear_motion.size() + 1); + do_write(fd, clear_motion.c_str(), clear_motion.size() + 1); } } @@ -96,7 +102,7 @@ static void send_key(bool noncanonical, int fd, keybuffer_t key_code, const int { key_motion += (char) ((key_code >> (i * BITS_PER_CHAR)) & 0xff); } - (void) write(fd, key_motion.c_str(), len); + do_write(fd, key_motion.c_str(), len); } } @@ -144,7 +150,7 @@ static std::string readline(int fd) cursor_pos--; s.erase(cursor_pos, 1); if (noncanonical) - (void) write(fd, s.c_str(), s.size() + 1); + do_write(fd, s.c_str(), s.size() + 1); // move cursor by left arrow key for (unsigned i = 0; i < s.size() - cursor_pos; i++) { send_key(noncanonical, fd, KEYCODE_LEFT, 3); @@ -176,7 +182,7 @@ static std::string readline(int fd) history_index = std::min(history_commands.size(), history_index + 1); s = history_commands[history_commands.size() - history_index]; if (noncanonical) - (void) write(fd, s.c_str(), s.size() + 1); + do_write(fd, s.c_str(), s.size() + 1); cursor_pos = s.size(); } key_buffer = 0; @@ -192,7 +198,7 @@ static std::string readline(int fd) s = history_commands[history_commands.size() - history_index]; } if (noncanonical) - (void) write(fd, s.c_str(), s.size() + 1); + do_write(fd, s.c_str(), s.size() + 1); cursor_pos = s.size(); } key_buffer = 0; @@ -221,7 +227,7 @@ static std::string readline(int fd) break; case KEYCODE_ENTER: if (noncanonical) - (void) write(fd, &ch, 1); + do_write(fd, &ch, 1); if (s.size() > initial_s_len && (history_commands.size() == 0 || s != history_commands[history_commands.size() - 1])) { history_commands.push_back(s); } @@ -236,7 +242,7 @@ static std::string readline(int fd) s.insert(cursor_pos, 1, ch); cursor_pos++; if (noncanonical) - (void) write(fd, s.c_str(), s.size() + 1); + do_write(fd, s.c_str(), s.size() + 1); // send left arrow key to move cursor for (unsigned i = 0; i < s.size() - cursor_pos; i++) { send_key(noncanonical, fd, KEYCODE_LEFT, 3); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index ea64660..ff2867a 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -34,9 +34,13 @@ typedef enum { EXT_ZKR, EXT_ZMMUL, EXT_ZVFH, + EXT_ZVFBFA, EXT_ZVFHMIN, + EXT_ZVFOFP4MIN, + EXT_ZVFOFP8MIN, EXT_SMEPMP, EXT_SMSTATEEN, + EXT_SMPMPMT, EXT_SMRNMI, EXT_SSCOFPMF, EXT_SVADU, @@ -44,12 +48,16 @@ typedef enum { EXT_SVNAPOT, EXT_SVPBMT, EXT_SVINVAL, + EXT_SVUKTE, + EXT_SVRSW60T59B, EXT_ZDINX, EXT_ZFA, EXT_ZFBFMIN, EXT_ZFINX, EXT_ZHINX, EXT_ZHINXMIN, + EXT_ZIBI, + EXT_ZICCID, EXT_ZICBOM, EXT_ZICBOZ, EXT_ZICNTR, @@ -57,6 +65,7 @@ typedef enum { EXT_ZIHPM, EXT_ZILSD, EXT_ZVBB, + EXT_ZVKB, EXT_ZVBC, EXT_ZVFBFMIN, EXT_ZVFBFWMA, @@ -67,6 +76,15 @@ typedef enum { EXT_ZVKSED, EXT_ZVKSH, EXT_ZVQDOTQ, + EXT_ZVQBDOT8I, + EXT_ZVQBDOT16I, + EXT_ZVFQBDOT8F, + EXT_ZVFWBDOT16BF, + EXT_ZVFBDOT32F, + EXT_ZVQLDOT8I, + EXT_ZVQLDOT16I, + EXT_ZVFQLDOT8F, + EXT_ZVFWLDOT16BF, EXT_SSTC, EXT_ZAAMO, EXT_ZALRSC, @@ -90,16 +108,12 @@ typedef enum { EXT_SMMPM, EXT_SMNPM, EXT_SSNPM, + EXT_SMAIA, + EXT_SSAIA, NUM_ISA_EXTENSIONS } isa_extension_t; typedef enum { - IMPL_MMU_SV32, - IMPL_MMU_SV39, - IMPL_MMU_SV48, - IMPL_MMU_SV57, - IMPL_MMU_SBARE, - IMPL_MMU, IMPL_MMU_VMID, IMPL_MMU_ASID, } impl_extension_t; diff --git a/riscv/jtag_dtm.cc b/riscv/jtag_dtm.cc index 9ca38af..7f9cfe4 100644 --- a/riscv/jtag_dtm.cc +++ b/riscv/jtag_dtm.cc @@ -199,6 +199,6 @@ void jtag_dtm_t::update_dr() } D(fprintf(stderr, "dmi=0x%lx\n", dmi)); - rti_remaining = required_rti_cycles; + rti_remaining = op == DMI_OP_NOP ? 0 : required_rti_cycles; } } diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 01017f6..6b4a571 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -6,9 +6,10 @@ #include "simif.h" #include "processor.h" #include "decode_macros.h" +#include "platform.h" -mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc) - : sim(sim), proc(proc), +mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz) + : sim(sim), proc(proc), blocksz(cache_blocksz), #ifdef RISCV_ENABLE_DUAL_ENDIAN target_big_endian(endianness == endianness_big), #endif @@ -38,6 +39,7 @@ void mmu_t::flush_tlb() memset(tlb_insn, -1, sizeof(tlb_insn)); memset(tlb_load, -1, sizeof(tlb_load)); memset(tlb_store, -1, sizeof(tlb_store)); + memset(pte_cache, -1, sizeof(pte_cache)); flush_icache(); } @@ -52,6 +54,16 @@ void throw_access_exception(bool virt, reg_t addr, access_type type) } } +[[noreturn]] void throw_page_fault_exception(bool virt, reg_t addr, access_type type) +{ + switch (type) { + case FETCH: throw trap_instruction_page_fault(virt, addr, 0, 0); + case LOAD: throw trap_load_page_fault(virt, addr, 0, 0); + case STORE: throw trap_store_page_fault(virt, addr, 0, 0); + default: abort(); + } +} + reg_t mmu_t::translate(mem_access_info_t access_info, reg_t len) { reg_t addr = access_info.transformed_vaddr; @@ -95,18 +107,29 @@ mmu_t::insn_parcel_t mmu_t::fetch_slow_path(reg_t vaddr) auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_insn, vaddr, TLB_FLAGS); auto access_info = generate_access_info(vaddr, FETCH, {}); - check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt); + + if (check_triggers_fetch) + check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt); if (!tlb_hit) { paddr = translate(access_info, sizeof(insn_parcel_t)); host_addr = (uintptr_t)sim->addr_to_mem(paddr); + if (proc->extension_enabled(EXT_ZICCID)) { + // Maintain exclusion with all store TLBs + for (auto [_, p2] : sim->get_harts()) + p2->mmu->flush_stlb_ppn(paddr >> PGSHIFT); + + tlb_insn_reverse_tags.insert(paddr >> PGSHIFT); + } + refill_tlb(vaddr, paddr, (char*)host_addr, FETCH); } auto res = perform_intrapage_fetch(vaddr, host_addr, paddr); - check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(res)); + if (check_triggers_fetch) + check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(res)); return res; } @@ -140,8 +163,8 @@ reg_t reg_from_bytes(size_t len, const uint8_t* bytes) bool mmu_t::mmio_ok(reg_t paddr, access_type UNUSED type) { // Disallow access to debug region when not in debug mode - static_assert(DEBUG_START == 0); - if (/* paddr >= DEBUG_START && */ paddr <= DEBUG_END && proc && !proc->state.debug_mode) + reg_t debug_start = DEBUG_START; // suppress -Wtype-limits + if (paddr >= debug_start && paddr - debug_start < DEBUG_SIZE && proc && !proc->state.debug_mode) return false; return true; @@ -229,16 +252,17 @@ void mmu_t::load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_ { reg_t vaddr = access_info.vaddr; auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_load, vaddr, TLB_FLAGS); - if (!tlb_hit || access_info.flags.is_special_access()) { + bool special = access_info.flags.is_special_access() && !access_info.flags.lr; + if (!tlb_hit || special) { paddr = translate(access_info, len); host_addr = (uintptr_t)sim->addr_to_mem(paddr); - if (!access_info.flags.is_special_access()) + if (!special) refill_tlb(vaddr, paddr, (char*)host_addr, LOAD); + } - if (access_info.flags.lr && !sim->reservable(paddr)) { - throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); - } + if (access_info.flags.lr && !sim->reservable(paddr)) { + throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); } perform_intrapage_load(vaddr, host_addr, paddr, len, bytes, access_info.flags); @@ -263,7 +287,9 @@ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate auto access_info = generate_access_info(original_addr, LOAD, xlate_flags); reg_t transformed_addr = access_info.transformed_vaddr; - check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt); + + if (check_triggers_load) + check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt); if ((transformed_addr & (len - 1)) == 0) { load_slow_path_intrapage(len, bytes, access_info); @@ -283,12 +309,14 @@ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate } } - while (len > sizeof(reg_t)) { - check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), bytes)); - len -= sizeof(reg_t); - bytes += sizeof(reg_t); + if (check_triggers_load) { + while (len > sizeof(reg_t)) { + check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(sizeof(reg_t), bytes)); + len -= sizeof(reg_t); + bytes += sizeof(reg_t); + } + check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(len, bytes)); } - check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(len, bytes)); if (proc && unlikely(proc->get_log_commits_enabled())) proc->state.log_mem_read.push_back(std::make_tuple(original_addr, 0, len)); @@ -315,6 +343,14 @@ void mmu_t::store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_acces paddr = translate(access_info, len); host_addr = (uintptr_t)sim->addr_to_mem(paddr); + if (proc && proc->extension_enabled(EXT_ZICCID)) { + // Maintain exclusion with all instruction TLBs + for (auto [_, p2] : sim->get_harts()) + p2->mmu->flush_itlb_ppn(paddr >> PGSHIFT); + + tlb_store_reverse_tags.insert(paddr >> PGSHIFT); + } + if (!access_info.flags.is_special_access()) refill_tlb(vaddr, paddr, (char*)host_addr, STORE); } @@ -340,7 +376,8 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes auto access_info = generate_access_info(original_addr, STORE, xlate_flags); reg_t transformed_addr = access_info.transformed_vaddr; - if (actually_store) { + + if (actually_store && check_triggers_store) { reg_t trig_len = len; const uint8_t* trig_bytes = bytes; while (trig_len > sizeof(reg_t)) { @@ -369,8 +406,47 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes store_slow_path_intrapage(len, bytes, access_info, actually_store); } - if (proc && unlikely(proc->get_log_commits_enabled())) - proc->state.log_mem_write.push_back(std::make_tuple(original_addr, reg_from_bytes(len, bytes), len)); + if (actually_store && proc && unlikely(proc->get_log_commits_enabled())) { + // amocas.q sends len == 16, reg_from_bytes only supports up to 8 + // bytes per conversion. Make multiple entries in the log + reg_t offset = 0; + const auto reg_size = sizeof(reg_t); + while (unlikely(len > reg_size)) { + proc->state.log_mem_write.push_back(std::make_tuple(original_addr + offset, reg_from_bytes(reg_size, bytes + offset), reg_size)); + offset += reg_size; + len -= reg_size; + } + proc->state.log_mem_write.push_back(std::make_tuple(original_addr + offset, reg_from_bytes(len, bytes + offset), len)); + } +} + +bool mmu_t::flush_tlb_ppn(reg_t ppn, dtlb_entry_t* tlb, reverse_tags_t& filter) +{ + if (!filter.contains(ppn)) + return false; + + filter.clear(); + + for (size_t i = 0; i < TLB_ENTRIES; i++) { + auto entry_ppn = tlb[i].data.target_addr >> PGSHIFT; + if (entry_ppn == ppn) + tlb[i].tag = -1; + else if (tlb[i].tag != (reg_t)-1) + filter.insert(entry_ppn); + } + + return true; +} + +void mmu_t::flush_stlb_ppn(reg_t ppn) +{ + flush_tlb_ppn(ppn, tlb_store, tlb_store_reverse_tags); +} + +void mmu_t::flush_itlb_ppn(reg_t ppn) +{ + if (flush_tlb_ppn(ppn, tlb_insn, tlb_insn_reverse_tags)) + flush_icache(); } tlb_entry_t mmu_t::refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type) @@ -414,12 +490,15 @@ bool mmu_t::pmp_ok(reg_t addr, reg_t len, access_type type, reg_t mode, bool hlv if (!proc || proc->n_pmp == 0) return true; + reg_t gran = reg_t(1) << proc->lg_pmp_granularity; + auto first_addr_aligned = addr & -gran; + auto last_addr_aligned = (addr + len - 1) & -gran; + for (size_t i = 0; i < proc->n_pmp; i++) { - // Check each 4-byte sector of the access + // Check each PMP-granularity sector of the access bool any_match = false; bool all_match = true; - for (reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) { - reg_t cur_addr = addr + offset; + for (reg_t cur_addr = first_addr_aligned; cur_addr <= last_addr_aligned; cur_addr += gran) { bool match = proc->state.pmpaddr[i]->match4(cur_addr); any_match |= match; all_match &= match; @@ -495,6 +574,8 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty if (pte & PTE_RSVD) { break; + } else if (!proc->extension_enabled(EXT_SVRSW60T59B) && (pte & PTE_SVRSW60T59B)) { + break; } else if (!proc->extension_enabled(EXT_SVNAPOT) && (pte & PTE_N)) { break; } else if (!pbmte && (pte & PTE_PBMT)) { @@ -523,7 +604,7 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty if ((pte & ad) != ad) { if (hade) { // set accessed and possibly dirty bits - pte_store(pte_paddr, pte | ad, gva, virt, type, vm.ptesize); + pte_store(pte_paddr, pte | ad, gva, virt, trap_type, vm.ptesize); } else { // take exception if access or possibly dirty bit is not set. break; @@ -549,6 +630,35 @@ reg_t mmu_t::s2xlate(reg_t gva, reg_t gpa, access_type type, access_type trap_ty } } +bool mmu_t::svukte_qualified(mem_access_info_t access_info) +{ + state_t* state = proc->get_state(); + + if (access_info.effective_priv != PRV_U) + return false; + + bool ukte = get_field(state->senvcfg->read(), SENVCFG_UKTE); + if (access_info.flags.forced_virt && state->prv == PRV_U) + ukte = get_field(state->hstatus->read(), HSTATUS_HUKTE); + + if (!ukte) + return false; + + reg_t mode_mask = proc->get_xlen() == 32 ? SATP32_MODE : SATP64_MODE; + if (get_field(proc->get_state()->satp->readvirt(access_info.effective_virt), mode_mask) == 0) + return false; + + return true; +} + +bool mmu_t::svukte_fault(reg_t addr, mem_access_info_t access_info) +{ + if (!svukte_qualified(access_info)) + return false; + + return addr >> (proc->get_xlen() - 1); +} + reg_t mmu_t::walk(mem_access_info_t access_info) { access_type type = access_info.type; @@ -571,6 +681,10 @@ reg_t mmu_t::walk(mem_access_info_t access_info) if (vm.levels == 0) return s2xlate(addr, addr & ((reg_t(2) << (proc->xlen-1))-1), type, type, virt, hlvx, false) & ~page_mask; // zero-extend from xlen + if (svukte_fault(addr, access_info)) { + throw_page_fault_exception(virt, addr, type); + } + bool s_mode = mode == PRV_S; bool sum = proc->state.sstatus->readvirt(virt) & MSTATUS_SUM; bool mxr = (proc->state.sstatus->readvirt(false) | proc->state.sstatus->readvirt(virt)) & MSTATUS_MXR; @@ -599,6 +713,8 @@ reg_t mmu_t::walk(mem_access_info_t access_info) if (pte & PTE_RSVD) { break; + } else if (!proc->extension_enabled(EXT_SVRSW60T59B) && (pte & PTE_SVRSW60T59B)) { + break; } else if (!proc->extension_enabled(EXT_SVNAPOT) && (pte & PTE_N)) { break; } else if (!pbmte && (pte & PTE_PBMT)) { @@ -661,12 +777,7 @@ reg_t mmu_t::walk(mem_access_info_t access_info) } } - switch (type) { - case FETCH: throw trap_instruction_page_fault(virt, addr, 0, 0); - case LOAD: throw trap_load_page_fault(virt, addr, 0, 0); - case STORE: throw trap_store_page_fault(virt, addr, 0, 0); - default: abort(); - } + throw_page_fault_exception(virt, addr, type); } void mmu_t::register_memtracer(memtracer_t* t) diff --git a/riscv/mmu.h b/riscv/mmu.h index 86f06ab..bd8bfd3 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -3,6 +3,7 @@ #ifndef _RISCV_MMU_H #define _RISCV_MMU_H +#include "bloom_filter.h" #include "decode.h" #include "trap.h" #include "common.h" @@ -18,7 +19,6 @@ // virtual memory configuration #define PGSHIFT 12 const reg_t PGSIZE = 1 << PGSHIFT; -#define MAX_PADDR_BITS 64 // observability hooks for load, store and fetch // intentionally empty not to cause runtime overhead @@ -43,7 +43,7 @@ struct insn_fetch_t struct icache_entry_t { reg_t tag; - struct icache_entry_t* next; + icache_entry_t* next; insn_fetch_t data; }; @@ -57,6 +57,11 @@ struct dtlb_entry_t { reg_t tag; }; +struct pte_cache_entry_t { + reg_t paddr; + reg_t pte; +}; + struct xlate_flags_t { const bool forced_virt : 1 {false}; const bool hlvx : 1 {false}; @@ -79,6 +84,7 @@ struct mem_access_info_t { }; void throw_access_exception(bool virt, reg_t addr, access_type type); +[[noreturn]] void throw_page_fault_exception(bool virt, reg_t addr, access_type type); // this class implements a processor's port into the virtual memory system. // an MMU and instruction cache are maintained for simulator performance. @@ -89,7 +95,7 @@ private: mem_access_info_t generate_access_info(reg_t addr, access_type type, xlate_flags_t xlate_flags); public: - mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc); + mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz); ~mmu_t(); template<typename T> @@ -129,7 +135,7 @@ public: T ss_load(reg_t addr) { if ((addr & (sizeof(T) - 1)) != 0) throw trap_store_access_fault((proc) ? proc->state.v : false, addr, 0, 0); - return load<T>(addr, {.forced_virt=false, .hlvx=false, .lr=false, .ss_access=true}); + return load<T>(addr, {.ss_access=true}); } template<typename T> @@ -156,7 +162,7 @@ public: void ss_store(reg_t addr, T val) { if ((addr & (sizeof(T) - 1)) != 0) throw trap_store_access_fault((proc) ? proc->state.v : false, addr, 0, 0); - store<T>(addr, val, {.forced_virt=false, .hlvx=false, .lr=false, .ss_access=true}); + store<T>(addr, val, {.ss_access=true}); } // AMO/Zicbom faults should be reported as store faults @@ -188,13 +194,9 @@ public: // for shadow stack amoswap template<typename T> T ssamoswap(reg_t addr, reg_t value) { - bool forced_virt = false; - bool hlvx = false; - bool lr = false; - bool ss_access = true; - store_slow_path(addr, sizeof(T), nullptr, {forced_virt, hlvx, lr, ss_access}, false, true); - auto data = load<T>(addr, {forced_virt, hlvx, lr, ss_access}); - store<T>(addr, value, {forced_virt, hlvx, lr, ss_access}); + store_slow_path(addr, sizeof(T), nullptr, {.ss_access=true}, false, true); + auto data = load<T>(addr, {.ss_access=true}); + store<T>(addr, value, {.ss_access=true}); return data; } @@ -209,28 +211,6 @@ public: }) } - void store_float128(reg_t addr, float128_t val) - { - if (unlikely(addr & (sizeof(float128_t)-1)) && !is_misaligned_enabled()) { - throw trap_store_address_misaligned((proc) ? proc->state.v : false, addr, 0, 0); - } - - store<uint64_t>(addr, val.v[0]); - store<uint64_t>(addr + 8, val.v[1]); - } - - float128_t load_float128(reg_t addr) - { - if (unlikely(addr & (sizeof(float128_t)-1)) && !is_misaligned_enabled()) { - throw trap_load_address_misaligned((proc) ? proc->state.v : false, addr, 0, 0); - } - - float128_t res; - res.v[0] = load<uint64_t>(addr); - res.v[1] = load<uint64_t>(addr + 8); - return res; - } - void cbo_zero(reg_t addr) { auto access_info = generate_access_info(addr, STORE, {}); reg_t transformed_addr = access_info.transformed_vaddr; @@ -272,7 +252,10 @@ public: store_slow_path(vaddr, size, nullptr, {}, false, true); } - reg_t paddr = translate(generate_access_info(vaddr, STORE, {}), 1); + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_store, vaddr); + if (!tlb_hit) + paddr = translate(generate_access_info(vaddr, STORE, {}), 1); + if (sim->reservable(paddr)) return load_reservation_address == paddr; else @@ -292,7 +275,7 @@ public: return have_reservation; } - static const reg_t ICACHE_ENTRIES = 1024; + static const reg_t ICACHE_ENTRIES = 4096; inline size_t icache_index(reg_t addr) { @@ -303,7 +286,7 @@ public: T ALWAYS_INLINE fetch_jump_table(reg_t addr) { T res = 0; for (size_t i = 0; i < sizeof(T) / sizeof(insn_parcel_t); i++) - res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t)); + res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t) * 8); // table accesses use data endianness, not instruction (little) endianness return target_big_endian ? to_be(res) : res; @@ -312,21 +295,11 @@ public: inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry) { insn_bits_t insn = fetch_insn_parcel(addr); + unsigned length = insn_length(insn); - int length = insn_length(insn); - - if (likely(length == 4)) { - insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; - } else if (length == 2) { - // entire instruction already fetched - } else if (length == 6) { - insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; - insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32; - } else { - static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t"); - insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; - insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32; - insn |= (insn_bits_t)fetch_insn_parcel(addr + 6) << 48; + for (unsigned pos = sizeof(insn_parcel_t); pos < length; pos += sizeof(insn_parcel_t)) { + insn |= (insn_bits_t)fetch_insn_parcel(addr + pos) << (8 * pos); + length = insn_length(insn); } insn_fetch_t fetch = {proc->decode_insn(insn), insn}; @@ -357,8 +330,7 @@ public: inline insn_fetch_t load_insn(reg_t addr) { - icache_entry_t entry; - return refill_icache(addr, &entry)->data; + return refill_icache(addr, &icache[icache_index(addr)])->data; } std::tuple<bool, uintptr_t, reg_t> ALWAYS_INLINE access_tlb(const dtlb_entry_t* tlb, reg_t vaddr, reg_t allowed_flags = 0, reg_t required_flags = 0) @@ -397,11 +369,6 @@ public: return target_big_endian? target_endian<T>::to_be(n) : target_endian<T>::to_le(n); } - void set_cache_blocksz(reg_t size) - { - blocksz = size; - } - private: simif_t* sim; processor_t* proc; @@ -424,6 +391,17 @@ private: dtlb_entry_t tlb_store[TLB_ENTRIES]; dtlb_entry_t tlb_insn[TLB_ENTRIES]; + static const reg_t PTE_CACHE_ENTRIES = 251; + pte_cache_entry_t pte_cache[PTE_CACHE_ENTRIES]; + + typedef bloom_filter_t<reg_t, simple_hash1, simple_hash2, TLB_ENTRIES * 16, 3> reverse_tags_t; + reverse_tags_t tlb_store_reverse_tags; + reverse_tags_t tlb_insn_reverse_tags; + + bool flush_tlb_ppn(reg_t ppn, dtlb_entry_t* tlb, reverse_tags_t& filter); + void flush_itlb_ppn(reg_t ppn); + void flush_stlb_ppn(reg_t ppn); + // finish translation on a TLB miss and update the TLB tlb_entry_t refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type); const char* fill_from_mmio(reg_t vaddr, reg_t paddr); @@ -453,6 +431,8 @@ private: check_triggers(operation, address, virt, address, data); } void check_triggers(triggers::operation_t operation, reg_t address, bool virt, reg_t tval, std::optional<reg_t> data); + bool svukte_qualified(mem_access_info_t access_info); + bool svukte_fault(reg_t addr, mem_access_info_t access_info); reg_t translate(mem_access_info_t access_info, reg_t len); reg_t pte_load(reg_t pte_paddr, reg_t addr, bool virt, access_type trap_type, size_t ptesize) { @@ -471,6 +451,9 @@ private: template<typename T> inline reg_t pte_load(reg_t pte_paddr, reg_t addr, bool virt, access_type trap_type) { + if (auto [hit, pte] = pte_cache_access(pte_paddr); hit) + return pte; + const size_t ptesize = sizeof(T); if (!pmp_ok(pte_paddr, ptesize, LOAD, PRV_S, false)) @@ -483,7 +466,10 @@ private: } else if (!mmio_load(pte_paddr, ptesize, (uint8_t*)&target_pte)) { throw_access_exception(virt, addr, trap_type); } - return from_target(target_pte); + + auto res = from_target(target_pte); + pte_cache_insert(pte_paddr, res); + return res; } template<typename T> inline void pte_store(reg_t pte_paddr, reg_t new_pte, reg_t addr, bool virt, access_type trap_type) @@ -500,6 +486,20 @@ private: } else if (!mmio_store(pte_paddr, ptesize, (uint8_t*)&target_pte)) { throw_access_exception(virt, addr, trap_type); } + + pte_cache_insert(pte_paddr, new_pte); + } + + std::tuple<bool, reg_t> pte_cache_access(reg_t key) + { + auto e = pte_cache[key % PTE_CACHE_ENTRIES]; + return std::make_tuple(e.paddr == key, e.pte); + } + + void pte_cache_insert(reg_t key, reg_t value) + { + if (value & PTE_V) + pte_cache[key % PTE_CACHE_ENTRIES] = {key, value}; } inline insn_parcel_t fetch_insn_parcel(reg_t addr) { @@ -513,7 +513,7 @@ private: { return proc != nullptr && !(proc->state.mnstatus && !get_field(proc->state.mnstatus->read(), MNSTATUS_NMIE)) - && !proc->state.debug_mode + && (!proc->state.debug_mode || get_field(proc->state.dcsr->read(), DCSR_MPRVEN)) && get_field(proc->state.mstatus->read(), MSTATUS_MPRV); } diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index 15e0873..4ae9dbe 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -93,8 +93,8 @@ void ns16550_t::update_interrupt(void) uint8_t interrupts = 0; /* Handle clear rx */ - if (lcr & UART_FCR_CLEAR_RCVR) { - lcr &= ~UART_FCR_CLEAR_RCVR; + if (fcr & UART_FCR_CLEAR_RCVR) { + fcr &= ~UART_FCR_CLEAR_RCVR; while (!rx_queue.empty()) { rx_queue.pop(); } @@ -102,8 +102,8 @@ void ns16550_t::update_interrupt(void) } /* Handle clear tx */ - if (lcr & UART_FCR_CLEAR_XMIT) { - lcr &= ~UART_FCR_CLEAR_XMIT; + if (fcr & UART_FCR_CLEAR_XMIT) { + fcr &= ~UART_FCR_CLEAR_XMIT; lsr |= UART_LSR_TEMT | UART_LSR_THRE; } @@ -361,4 +361,4 @@ ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base } } -REGISTER_DEVICE(ns16550, ns16550_parse_from_fdt, ns16550_generate_dts) +REGISTER_BUILTIN_DEVICE(ns16550, ns16550_parse_from_fdt, ns16550_generate_dts) diff --git a/riscv/opcodes.h b/riscv/opcodes.h index 065934a..2ca7332 100644 --- a/riscv/opcodes.h +++ b/riscv/opcodes.h @@ -130,6 +130,16 @@ static uint32_t csrrs(unsigned int rd, unsigned int rs1, unsigned int csr) { return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRS; } +static uint32_t csrrc(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused)); +static uint32_t csrrc(unsigned int rd, unsigned int rs1, unsigned int csr) { + return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRC; +} + +static uint32_t csrrw(unsigned int rd, unsigned int rs1, unsigned int csr) __attribute__ ((unused)); +static uint32_t csrrw(unsigned int rd, unsigned int rs1, unsigned int csr) { + return (csr << 20) | (rs1 << 15) | (rd << 7) | MATCH_CSRRW; +} + static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) __attribute__ ((unused)); static uint32_t fsw(unsigned int src, unsigned int base, uint16_t offset) { diff --git a/riscv/platform.h b/riscv/platform.h index c8a5bf4..5b794da 100644 --- a/riscv/platform.h +++ b/riscv/platform.h @@ -19,5 +19,7 @@ #define NS16550_INTERRUPT_ID 1 #define EXT_IO_BASE 0x40000000 #define DRAM_BASE 0x80000000 +#define DEBUG_START 0x0 +#define DEBUG_SIZE 0x1000 #endif diff --git a/riscv/plic.cc b/riscv/plic.cc index b6d204b..0310538 100644 --- a/riscv/plic.cc +++ b/riscv/plic.cc @@ -436,4 +436,4 @@ plic_t* plic_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, cons return nullptr; } -REGISTER_DEVICE(plic, plic_parse_from_fdt, plic_generate_dts) +REGISTER_BUILTIN_DEVICE(plic, plic_parse_from_fdt, plic_generate_dts) diff --git a/riscv/processor.cc b/riscv/processor.cc index 7f2603a..80a47d9 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -34,7 +34,8 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, const cfg_t *cfg, simif_t* sim, uint32_t id, bool halt_on_reset, FILE* log_file, std::ostream& sout_) -: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), sim(sim), id(id), xlen(0), +: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), + sim(sim), id(id), xlen(isa.get_max_xlen()), histogram_enabled(false), log_commits_enabled(false), log_file(log_file), sout_(sout_.rdbuf()), halt_on_reset(halt_on_reset), in_wfi(false), check_triggers_icount(false), @@ -61,25 +62,22 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, VU.vlenb = isa.get_vlen() / 8; VU.vstart_alu = 0; - register_base_instructions(); - mmu = new mmu_t(sim, cfg->endianness, this); - - disassembler = new disassembler_t(&isa); - for (auto e : isa.get_extensions()) - register_extension(find_extension(e.c_str())()); + mmu = new mmu_t(sim, cfg->endianness, this, cfg->cache_blocksz); set_pmp_granularity(cfg->pmpgranularity); set_pmp_num(cfg->pmpregions); - if (isa.get_max_xlen() == 32) - set_mmu_capability(IMPL_MMU_SV32); - else if (isa.get_max_xlen() == 64) - set_mmu_capability(IMPL_MMU_SV57); - + set_max_vaddr_bits(0); set_impl(IMPL_MMU_ASID, true); set_impl(IMPL_MMU_VMID, true); reset(); + + register_base_instructions(); + + disassembler = new disassembler_t(&isa); + for (auto e : isa.get_extensions()) + register_extension(find_extension(e.c_str())()); } processor_t::~processor_t() @@ -145,6 +143,7 @@ void processor_t::enable_log_commits() { log_commits_enabled = true; mmu->flush_tlb(); // the TLB caches this setting + build_opcode_map(); } void processor_t::reset() @@ -214,37 +213,75 @@ void processor_t::set_pmp_granularity(reg_t gran) lg_pmp_granularity = ctz(gran); } -void processor_t::set_mmu_capability(int cap) +void processor_t::set_max_vaddr_bits(unsigned n) { - switch (cap) { - case IMPL_MMU_SV32: - set_impl(IMPL_MMU_SV32, true); - set_impl(IMPL_MMU, true); + switch (n) { + case 0: break; - case IMPL_MMU_SV57: - set_impl(IMPL_MMU_SV57, true); - [[fallthrough]]; - case IMPL_MMU_SV48: - set_impl(IMPL_MMU_SV48, true); - [[fallthrough]]; - case IMPL_MMU_SV39: - set_impl(IMPL_MMU_SV39, true); - set_impl(IMPL_MMU, true); + case 32: + if (isa.get_max_xlen() != 32) + abort(); break; - default: - set_impl(IMPL_MMU_SV32, false); - set_impl(IMPL_MMU_SV39, false); - set_impl(IMPL_MMU_SV48, false); - set_impl(IMPL_MMU_SV57, false); - set_impl(IMPL_MMU, false); + case 39: + case 48: + case 57: + if (isa.get_max_xlen() != 64) + abort(); break; + default: + abort(); } + + max_vaddr_bits = n; +} + +reg_t processor_t::select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const +{ + // nonstandard interrupts have highest priority + if (enabled_interrupts >> (IRQ_LCOF + 1)) + enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1); + // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI + else if (enabled_interrupts & MIP_MEIP) + enabled_interrupts = MIP_MEIP; + else if (enabled_interrupts & MIP_MSIP) + enabled_interrupts = MIP_MSIP; + else if (enabled_interrupts & MIP_MTIP) + enabled_interrupts = MIP_MTIP; + else if (enabled_interrupts & MIP_SEIP) + enabled_interrupts = MIP_SEIP; + else if (enabled_interrupts & MIP_SSIP) + enabled_interrupts = MIP_SSIP; + else if (enabled_interrupts & MIP_STIP) + enabled_interrupts = MIP_STIP; + else if (enabled_interrupts & MIP_LCOFIP) + enabled_interrupts = MIP_LCOFIP; + else if (enabled_interrupts & MIP_VSEIP) + enabled_interrupts = MIP_VSEIP; + else if (enabled_interrupts & MIP_VSSIP) + enabled_interrupts = MIP_VSSIP; + else if (enabled_interrupts & MIP_VSTIP) + enabled_interrupts = MIP_VSTIP; + + return enabled_interrupts; } void processor_t::take_interrupt(reg_t pending_interrupts) { + reg_t s_pending_interrupts = 0; + reg_t vstopi = 0; + reg_t vs_pending_interrupt = 0; + + if (extension_enabled_const(EXT_SSAIA)) { + s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read(); + vstopi = state.vstopi->read(); + // Legacy VS interrupts (VSEIP/VSTIP/VSSIP) come in through pending_interrupts but are shifted + // down 1 in vstopi. AIA-extended and VTI are not shifted. Clear S bits (VS shifted down by 1). + vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0; + vs_pending_interrupt &= ~MIP_S_MASK; + } + // Do nothing if no pending interrupts - if (!pending_interrupts) { + if (!pending_interrupts && !s_pending_interrupts && !vs_pending_interrupt) { return; } @@ -260,46 +297,20 @@ void processor_t::take_interrupt(reg_t pending_interrupts) const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read(); const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE); const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie); - enabled_interrupts = pending_interrupts & deleg_to_hs & -hs_enabled; + enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled; if (state.v && enabled_interrupts == 0) { // VS-ints have least priority and can only be taken with virt enabled const reg_t deleg_to_vs = state.hideleg->read(); const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie); - enabled_interrupts = pending_interrupts & deleg_to_vs & -vs_enabled; + enabled_interrupts = ((pending_interrupts & deleg_to_vs) | vs_pending_interrupt) & -vs_enabled; } } const bool nmie = !(state.mnstatus && !get_field(state.mnstatus->read(), MNSTATUS_NMIE)); if (!state.debug_mode && nmie && enabled_interrupts) { - // nonstandard interrupts have highest priority - if (enabled_interrupts >> (IRQ_LCOF + 1)) - enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1); - // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI - else if (enabled_interrupts & MIP_MEIP) - enabled_interrupts = MIP_MEIP; - else if (enabled_interrupts & MIP_MSIP) - enabled_interrupts = MIP_MSIP; - else if (enabled_interrupts & MIP_MTIP) - enabled_interrupts = MIP_MTIP; - else if (enabled_interrupts & MIP_SEIP) - enabled_interrupts = MIP_SEIP; - else if (enabled_interrupts & MIP_SSIP) - enabled_interrupts = MIP_SSIP; - else if (enabled_interrupts & MIP_STIP) - enabled_interrupts = MIP_STIP; - else if (enabled_interrupts & MIP_LCOFIP) - enabled_interrupts = MIP_LCOFIP; - else if (enabled_interrupts & MIP_VSEIP) - enabled_interrupts = MIP_VSEIP; - else if (enabled_interrupts & MIP_VSSIP) - enabled_interrupts = MIP_VSSIP; - else if (enabled_interrupts & MIP_VSTIP) - enabled_interrupts = MIP_VSTIP; - else - abort(); - + reg_t selected_interrupt = select_an_interrupt_with_default_priority(enabled_interrupts); if (check_triggers_icount) TM.detect_icount_match(); - throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(enabled_interrupts)); + throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(selected_interrupt)); } } @@ -327,7 +338,7 @@ void processor_t::set_privilege(reg_t prv, bool virt) state.v_changed = state.v != state.prev_v; } -const char* processor_t::get_privilege_string() +const char* processor_t::get_privilege_string() const { if (state.debug_mode) return "D"; @@ -403,7 +414,7 @@ void processor_t::take_trap(trap_t& t, reg_t epc) bool supv_double_trap = false; if (interrupt) { vsdeleg = (curr_virt && state.prv <= PRV_S) ? state.hideleg->read() : 0; - hsdeleg = (state.prv <= PRV_S) ? state.mideleg->read() : 0; + hsdeleg = (state.prv <= PRV_S) ? (state.mideleg->read() | state.nonvirtual_sip->read()) : 0; bit &= ~((reg_t)1 << (max_xlen - 1)); } else { vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg->read() & state.hedeleg->read()) : 0; @@ -420,9 +431,17 @@ void processor_t::take_trap(trap_t& t, reg_t epc) if (supv_double_trap) vsdeleg = hsdeleg = 0; } - if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) { + bool vti = false; + if (extension_enabled_const(EXT_SSAIA)) { + const reg_t hvictl = state.csrmap[CSR_HVICTL]->read(); + const reg_t iid = get_field(hvictl, HVICTL_IID); + // It is possible that hvictl is injecting VSEIP (10) and hvictl.DPR is causing mip.VSEIP to be picked over VTI. + // Check vstopi == hvictl.iid + vti = (hvictl & HVICTL_VTI) && iid != IRQ_S_EXT && iid == bit && get_field(state.vstopi->read(), MTOPI_IID) == iid; + } + if ((state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) || vti) { // Handle the trap in VS-mode - const reg_t adjusted_cause = interrupt ? bit - 1 : bit; // VSSIP -> SSIP, etc + const reg_t adjusted_cause = interrupt && bit <= IRQ_VS_EXT && !vti ? bit - 1 : bit; // VSSIP -> SSIP, etc; reg_t vector = (state.vstvec->read() & 1) && interrupt ? 4 * adjusted_cause : 0; state.pc = (state.vstvec->read() & ~(reg_t)1) + vector; state.vscause->write(adjusted_cause | (interrupt ? interrupt_bit : 0)); @@ -546,6 +565,14 @@ void processor_t::check_if_lpad_required() } } +reg_t processor_t::set_lpad_expected(reg_t pc) +{ + auto p = this; + if (ZICFILP_xLPE(state.v, state.prv)) + state.elp = elp_t::LP_EXPECTED; + return pc; +} + void processor_t::disasm(insn_t insn) { uint64_t bits = insn.bits(); @@ -581,13 +608,6 @@ void processor_t::disasm(insn_t insn) } } -int processor_t::paddr_bits() -{ - unsigned max_xlen = isa.get_max_xlen(); - assert(xlen == max_xlen); - return max_xlen == 64 ? 50 : 34; -} - void processor_t::put_csr(int which, reg_t val) { val = zext_xlen(val); @@ -628,46 +648,55 @@ reg_t illegal_instruction(processor_t UNUSED *p, insn_t insn, reg_t UNUSED pc) throw trap_illegal_instruction(insn.bits() & 0xffffffffULL); } -insn_func_t processor_t::decode_insn(insn_t insn) +reg_t processor_t::throw_instruction_address_misaligned(reg_t pc) { - // look up opcode in hash table - size_t idx = insn.bits() % OPCODE_CACHE_SIZE; - auto [hit, desc] = opcode_cache[idx].lookup(insn.bits()); + throw trap_instruction_address_misaligned(state.v, pc, 0, 0); +} - bool rve = extension_enabled('E'); +insn_func_t processor_t::decode_insn(insn_t insn) +{ + const auto& pool = opcode_map[insn.bits() % std::size(opcode_map)]; - if (unlikely(!hit)) { - // fall back to linear search - auto matching = [insn_bits = insn.bits()](const insn_desc_t &d) { - return (insn_bits & d.mask) == d.match; - }; - auto p = std::find_if(custom_instructions.begin(), - custom_instructions.end(), matching); - if (p == custom_instructions.end()) { - p = std::find_if(instructions.begin(), instructions.end(), matching); - assert(p != instructions.end()); + for (auto p = pool.begin(); ; ++p) { + if ((insn.bits() & p->mask) == p->match) { + return p->func; } - desc = &*p; - opcode_cache[idx].replace(insn.bits(), desc); } - - return desc->func(xlen, rve, log_commits_enabled); } -void processor_t::register_insn(insn_desc_t desc, bool is_custom) { +void processor_t::register_insn(insn_desc_t desc, std::vector<insn_desc_t>& pool) { assert(desc.fast_rv32i && desc.fast_rv64i && desc.fast_rv32e && desc.fast_rv64e && desc.logged_rv32i && desc.logged_rv64i && desc.logged_rv32e && desc.logged_rv64e); - if (is_custom) - custom_instructions.push_back(desc); - else - instructions.push_back(desc); + pool.push_back(desc); } void processor_t::build_opcode_map() { - for (size_t i = 0; i < OPCODE_CACHE_SIZE; i++) - opcode_cache[i].reset(); + bool rve = extension_enabled('E'); + bool zca = extension_enabled(EXT_ZCA); + const size_t N = std::size(opcode_map); + + auto build_one = [&](const insn_desc_t& desc) { + auto func = desc.func(xlen, rve, log_commits_enabled); + if (!zca && insn_length(desc.match) % 4) + func = &::illegal_instruction; + + auto stride = std::min(N, size_t(1) << ctz(~desc.mask)); + for (size_t i = desc.match & (stride - 1); i < N; i += stride) { + if ((desc.match % N) == (i & desc.mask)) + opcode_map[i].push_back({desc.match, desc.mask, func}); + } + }; + + for (auto& p : opcode_map) + p.clear(); + + for (auto& d : custom_instructions) + build_one(d); + + for (auto& d : instructions) + build_one(d); } void processor_t::register_extension(extension_t *x) { diff --git a/riscv/processor.h b/riscv/processor.h index 6b611d7..18ac08f 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -61,6 +61,13 @@ struct insn_desc_t static const insn_desc_t illegal_instruction; }; +struct opcode_map_entry_t +{ + insn_bits_t match; + insn_bits_t mask; + insn_func_t func; +}; + // regnum, data typedef std::map<reg_t, freg_t> commit_log_reg_t; @@ -70,6 +77,7 @@ typedef std::vector<std::tuple<reg_t, uint64_t, uint8_t>> commit_log_mem_t; // architectural state of a RISC-V hart struct state_t { + void add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg); void reset(processor_t* const proc, reg_t max_isa); void add_csr(reg_t addr, const csr_t_p& csr); @@ -96,6 +104,8 @@ struct state_t wide_counter_csr_t_p mcycle; mie_csr_t_p mie; mip_csr_t_p mip; + csr_t_p nonvirtual_sip; + csr_t_p nonvirtual_sie; csr_t_p medeleg; csr_t_p mideleg; csr_t_p mcounteren; @@ -149,6 +159,7 @@ struct state_t bool debug_mode; mseccfg_csr_t_p mseccfg; + csr_t_p mseccfgh; static const int max_pmp = 64; pmpaddr_csr_t_p pmpaddr[max_pmp]; @@ -173,6 +184,11 @@ struct state_t csr_t_p ssp; + csr_t_p mvien; + mvip_csr_t_p mvip; + csr_t_p hvictl; + csr_t_p vstopi; + bool serialized; // whether timer CSRs are in a well-defined state // When true, execute a single instruction and then enter debug mode. This @@ -198,47 +214,6 @@ struct state_t void csr_init(processor_t* const proc, reg_t max_isa); }; -class opcode_cache_entry_t { - public: - opcode_cache_entry_t() - { - reset(); - } - - void reset() - { - for (size_t i = 0; i < associativity; i++) { - tag[i] = 0; - contents[i] = &insn_desc_t::illegal_instruction; - } - } - - void replace(insn_bits_t opcode, const insn_desc_t* desc) - { - for (size_t i = associativity - 1; i > 0; i--) { - tag[i] = tag[i-1]; - contents[i] = contents[i-1]; - } - - tag[0] = opcode; - contents[0] = desc; - } - - std::tuple<bool, const insn_desc_t*> lookup(insn_bits_t opcode) - { - for (size_t i = 0; i < associativity; i++) - if (tag[i] == opcode) - return std::tuple(true, contents[i]); - - return std::tuple(false, nullptr); - } - - private: - static const size_t associativity = 4; - insn_bits_t tag[associativity]; - const insn_desc_t* contents[associativity]; -}; - // this class represents one processor in a RISC-V machine. class processor_t : public abstract_device_t { @@ -249,8 +224,8 @@ public: FILE *log_file, std::ostream& sout_); // because of command line option --log and -s we need both ~processor_t(); - const isa_parser_t &get_isa() { return isa; } - const cfg_t &get_cfg() { return *cfg; } + const isa_parser_t &get_isa() const & { return isa; } + const cfg_t &get_cfg() const & { return *cfg; } void set_debug(bool value); void set_histogram(bool value); @@ -265,6 +240,7 @@ public: mmu_t* get_mmu() { return mmu; } state_t* get_state() { return &state; } unsigned get_xlen() const { return xlen; } + unsigned paddr_bits() { return isa.get_max_xlen() == 64 ? 56 : 34; } unsigned get_const_xlen() const { // Any code that assumes a const xlen should use this method to // document that assumption. If Spike ever changes to allow @@ -314,6 +290,9 @@ public: extension_enable_table[ext] = enable && isa.extension_enabled(ext); } void set_impl(uint8_t impl, bool val) { impl_table[impl] = val; } + bool has_mmu() const { return max_vaddr_bits != 0; } + unsigned get_max_vaddr_bits() const { return max_vaddr_bits; } + void set_max_vaddr_bits(unsigned); bool supports_impl(uint8_t impl) const { return impl_table[impl]; } @@ -321,25 +300,23 @@ public: const int ialign = extension_enabled(EXT_ZCA) ? 16 : 32; return ~(reg_t)(ialign == 16 ? 0 : 2); } - void check_pc_alignment(reg_t pc) { - if (unlikely(pc & ~pc_alignment_mask())) - throw trap_instruction_address_misaligned(state.v, pc, 0, 0); - } + reg_t throw_instruction_address_misaligned(reg_t pc); reg_t legalize_privilege(reg_t); void set_privilege(reg_t, bool); - const char* get_privilege_string(); + const char* get_privilege_string() const; void update_histogram(reg_t pc); const disassembler_t* get_disassembler() { return disassembler; } FILE *get_log_file() { return log_file; } void register_base_insn(insn_desc_t insn) { - register_insn(insn, false /* is_custom */); + register_insn(insn, instructions); } void register_custom_insn(insn_desc_t insn) { - register_insn(insn, true /* is_custom */); + register_insn(insn, custom_instructions); } void register_extension(extension_t*); + void build_opcode_map(); // MMIO slave interface bool load(reg_t addr, size_t len, uint8_t* bytes) override; @@ -349,8 +326,8 @@ public: // When true, display disassembly of each instruction that's executed. bool debug; // When true, take the slow simulation path. - bool slow_path(); - bool halted() { return state.debug_mode; } + bool slow_path() const; + bool halted() const { return state.debug_mode; } enum { HR_NONE, /* Halt request is inactive. */ HR_REGULAR, /* Regular halt request/debug interrupt. */ @@ -369,6 +346,9 @@ public: bool is_waiting_for_interrupt() { return in_wfi; }; void check_if_lpad_required(); + reg_t set_lpad_expected(reg_t pc); + + reg_t select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const; private: const isa_parser_t isa; @@ -381,6 +361,7 @@ private: state_t state; uint32_t id; unsigned xlen; + unsigned max_vaddr_bits; bool histogram_enabled; bool log_commits_enabled; FILE *log_file; @@ -395,20 +376,17 @@ private: std::bitset<NUM_ISA_EXTENSIONS> extension_dynamic; mutable std::bitset<NUM_ISA_EXTENSIONS> extension_assumed_const; + std::vector<opcode_map_entry_t> opcode_map[128]; std::vector<insn_desc_t> instructions; std::vector<insn_desc_t> custom_instructions; std::unordered_map<reg_t,uint64_t> pc_histogram; - static const size_t OPCODE_CACHE_SIZE = 4095; - opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE]; - void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); } void take_interrupt(reg_t mask); // take first enabled interrupt in mask void take_trap(trap_t& t, reg_t epc); // take an exception void take_trigger_action(triggers::action_t action, reg_t breakpoint_tval, reg_t epc, bool virt); void disasm(insn_t insn); // disassemble and print an instruction - void register_insn(insn_desc_t, bool); - int paddr_bits(); + void register_insn(insn_desc_t, std::vector<insn_desc_t>& pool); void enter_debug_mode(uint8_t cause, uint8_t ext_cause); @@ -420,7 +398,6 @@ private: friend class extension_t; void parse_priv_string(const char*); - void build_opcode_map(); void register_base_instructions(); insn_func_t decode_insn(insn_t insn); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 7fd9890..8df8739 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -18,6 +18,7 @@ riscv_install_prog_srcs = \ riscv_install_hdrs = \ abstract_device.h \ abstract_interrupt_controller.h \ + bloom_filter.h \ cachesim.h \ cfg.h \ common.h \ @@ -33,11 +34,13 @@ riscv_install_hdrs = \ entropy_source.h \ extension.h \ isa_parser.h \ + jtag_dtm.h \ log_file.h \ memtracer.h \ mmu.h \ platform.h \ processor.h \ + remote_bitbang.h \ rocc.h \ sim.h \ simif.h \ @@ -958,6 +961,14 @@ riscv_insn_ext_zicond = \ czero_eqz \ czero_nez \ +riscv_insn_ext_zvfofp4min = \ + vfext_vf2 \ + +riscv_insn_ext_zvfofp8min = \ + vfncvt_f_f_q \ + vfncvt_sat_f_f_q \ + vfncvtbf16_sat_f_f_w \ + riscv_insn_ext_zfbfmin = \ fcvt_bf16_s \ fcvt_s_bf16 \ @@ -1072,10 +1083,29 @@ riscv_insn_ext_zvksh = \ vsm3c_vi \ vsm3me_vv \ +riscv_insn_ext_zvbdot = \ + vqbdotu_vv \ + vqbdots_vv \ + vfwbdot_vv \ + vfbdot_vv \ + vfqbdot_vv \ + vfqbdot_alt_vv \ + +riscv_insn_ext_zvldot = \ + vqldotu_vv \ + vqldots_vv \ + vfwldot_vv \ + vfqldot_vv \ + vfqldot_alt_vv \ + riscv_insn_ext_zimop = \ mop_r_N \ mop_rr_N \ +riscv_insn_ext_zibi = \ + beqi \ + bnei \ + riscv_insn_ext_zcmop = \ c_mop_N \ @@ -1111,13 +1141,15 @@ riscv_insn_list = \ $(riscv_insn_ext_b) \ $(riscv_insn_ext_a) \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ + $(riscv_insn_ext_zvfofp4min) \ + $(riscv_insn_ext_zvfofp8min) \ $(riscv_insn_ext_bf16) \ $(riscv_insn_ext_cmo) \ $(riscv_insn_ext_d_zfa) \ $(riscv_insn_ext_f_zfa) \ $(riscv_insn_ext_h) \ $(riscv_insn_ext_k) \ - $(riscv_insn_ext_q) \ + $(if $(HAVE_INT128),$(riscv_insn_ext_q),) \ $(riscv_insn_ext_q_zfa) \ $(riscv_insn_ext_zacas) \ $(riscv_insn_ext_zabha) \ @@ -1128,9 +1160,12 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ $(riscv_insn_ext_zvk) \ + $(riscv_insn_ext_zvbdot) \ + $(riscv_insn_ext_zvldot) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ + $(riscv_insn_ext_zibi) \ $(riscv_insn_ext_zimop) \ $(riscv_insn_ext_zcmop) \ $(riscv_insn_ext_zicfilp) \ diff --git a/riscv/sim.cc b/riscv/sim.cc index fd1c6fb..4eb5ed0 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -88,7 +88,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, #ifndef RISCV_ENABLE_DUAL_ENDIAN if (cfg->endianness != endianness_little) { - fputs("Big-endian support has not been prroperly enabled; " + fputs("Big-endian support has not been properly enabled; " "please rebuild the riscv-isa-sim project using " "\"configure --enable-dual-endian\".\n", stderr); @@ -96,7 +96,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, } #endif - debug_mmu = new mmu_t(this, cfg->endianness, NULL); + debug_mmu = new mmu_t(this, cfg->endianness, NULL, cfg->cache_blocksz); // When running without using a dtb, skip the fdt-based configuration steps if (!dtb_enabled) { @@ -118,8 +118,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, // particular, the default device tree configuration that you get without // setting the dtb_file argument has one. std::vector<device_factory_sargs_t> device_factories = { - {clint_factory, {}}, // clint must be element 0 - {plic_factory, {}}, // plic must be element 1 + {clint_factory, {}}, + {plic_factory, {}}, {ns16550_factory, {}}}; device_factories.insert(device_factories.end(), plugin_device_factories.begin(), @@ -214,16 +214,16 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, // handle mmu-type const char *mmu_type; rc = fdt_parse_mmu_type(fdt, cpu_offset, &mmu_type); + procs[cpu_idx]->set_max_vaddr_bits(0); if (rc == 0) { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SBARE); if (strncmp(mmu_type, "riscv,sv32", strlen("riscv,sv32")) == 0) { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV32); + procs[cpu_idx]->set_max_vaddr_bits(32); } else if (strncmp(mmu_type, "riscv,sv39", strlen("riscv,sv39")) == 0) { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV39); + procs[cpu_idx]->set_max_vaddr_bits(39); } else if (strncmp(mmu_type, "riscv,sv48", strlen("riscv,sv48")) == 0) { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV48); + procs[cpu_idx]->set_max_vaddr_bits(48); } else if (strncmp(mmu_type, "riscv,sv57", strlen("riscv,sv57")) == 0) { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SV57); + procs[cpu_idx]->set_max_vaddr_bits(57); } else if (strncmp(mmu_type, "riscv,sbare", strlen("riscv,sbare")) == 0) { // has been set in the beginning } else { @@ -233,8 +233,6 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, << mmu_type << ").\n"; exit(1); } - } else { - procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SBARE); } procs[cpu_idx]->reset(); @@ -253,10 +251,15 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, std::shared_ptr<abstract_device_t> dev_ptr(device); add_device(device_base, dev_ptr); - if (i == 0) // clint_factory + if (dynamic_cast<clint_t*>(&*dev_ptr)) { + assert(!clint); clint = std::static_pointer_cast<clint_t>(dev_ptr); - else if (i == 1) // plic_factory + } + + if (dynamic_cast<plic_t*>(&*dev_ptr)) { + assert(!plic); plic = std::static_pointer_cast<plic_t>(dev_ptr); + } } } } @@ -273,7 +276,7 @@ int sim_t::run() if (!debug && log) set_procs_debug(true); - htif_t::set_expected_xlen(harts[0]->get_isa().get_max_xlen()); + htif_t::set_expected_xlen(harts.begin()->second->get_isa().get_max_xlen()); // htif_t::run() will repeatedly call back into sim_t::idle(), each // invocation of which will advance target time @@ -337,22 +340,16 @@ void sim_t::set_procs_debug(bool value) procs[i]->set_debug(value); } -static bool paddr_ok(reg_t addr) -{ - static_assert(MAX_PADDR_BITS == 8 * sizeof(addr)); - return true; -} - bool sim_t::mmio_load(reg_t paddr, size_t len, uint8_t* bytes) { - if (paddr + len < paddr || !paddr_ok(paddr + len - 1)) + if (paddr + len < paddr) return false; return bus.load(paddr, len, bytes); } bool sim_t::mmio_store(reg_t paddr, size_t len, const uint8_t* bytes) { - if (paddr + len < paddr || !paddr_ok(paddr + len - 1)) + if (paddr + len < paddr) return false; return bus.store(paddr, len, bytes); } @@ -403,12 +400,20 @@ void sim_t::set_rom() } char* sim_t::addr_to_mem(reg_t paddr) { - if (!paddr_ok(paddr)) - return NULL; - auto desc = bus.find_device(paddr >> PGSHIFT << PGSHIFT, PGSIZE); - if (auto mem = dynamic_cast<abstract_mem_t*>(desc.second)) - return mem->contents(paddr - desc.first); - return NULL; + auto page_offset = paddr % PGSIZE; + auto page_addr = paddr - page_offset; + + if (auto it = addr_to_mem_cache.find(page_addr); it != addr_to_mem_cache.end()) + return it->second + page_offset; + + auto desc = bus.find_device(page_addr, PGSIZE); + if (auto mem = dynamic_cast<abstract_mem_t*>(desc.second)) { + auto res = mem->contents(page_addr - desc.first); + addr_to_mem_cache.insert({page_addr, res}); + return res + page_offset; + } + + return nullptr; } const char* sim_t::get_symbol(uint64_t paddr) diff --git a/riscv/sim.h b/riscv/sim.h index da04a88..8a96395 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -13,6 +13,7 @@ #include <fesvr/htif.h> #include <vector> #include <map> +#include <unordered_map> #include <string> #include <memory> #include <sys/types.h> @@ -73,6 +74,7 @@ private: std::vector<std::pair<reg_t, abstract_mem_t*>> mems; std::vector<processor_t*> procs; std::map<size_t, processor_t*> harts; + std::unordered_map<reg_t, char*> addr_to_mem_cache; std::pair<reg_t, reg_t> initrd_range; std::string dts; std::string dtb; diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index b6a4b92..7f5256c 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -4,6 +4,8 @@ #define _RISCV_V_EXT_MACROS_H #include "vector_unit.h" +#include "zvbdot.h" +#include <functional> // // vector: masking skip helper @@ -58,11 +60,23 @@ static inline bool is_overlapped_widen(const int astart, int asize, } } -#define VI_NARROW_CHECK_COMMON \ +#define VI_NON_ALTFMT_INSN \ + require(P.VU.altfmt == 0); \ + +#define require_zvfbfa \ + require(P.VU.altfmt == 0 || p->extension_enabled(EXT_ZVFBFA)); \ + +#define require_zvfbfa_or_zvfh \ + require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFH); \ + +#define require_zvfbfa_or_zvfhmin \ + require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFHMIN); \ + +#define VI_NARROW_CHECK_COMMON(factor) \ require_vector(true); \ - require(P.VU.vflmul <= 4); \ - require(P.VU.vsew * 2 <= P.VU.ELEN); \ - require_align(insn.rs2(), P.VU.vflmul * 2); \ + require(P.VU.vflmul <= (8 / factor)); \ + require(P.VU.vsew * factor <= P.VU.ELEN); \ + require_align(insn.rs2(), P.VU.vflmul * factor); \ require_align(insn.rd(), P.VU.vflmul); \ require_vm; \ @@ -75,7 +89,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_CHECK_ST_INDEX(elt_width) \ require_vector(false); \ - require(elt_width <= P.VU.ELEN); \ + require(elt_width <= std::min(P.VU.ELEN, (reg_t)P.get_xlen())); \ float vemul = ((float)elt_width / P.VU.vsew * P.VU.vflmul); \ require(vemul >= 0.125 && vemul <= 8); \ reg_t UNUSED emul = vemul < 1 ? 1 : vemul; \ @@ -171,12 +185,19 @@ static inline bool is_overlapped_widen(const int astart, int asize, } #define VI_CHECK_SDS(is_vs1) \ - VI_NARROW_CHECK_COMMON; \ + VI_NARROW_CHECK_COMMON(2); \ if (insn.rd() != insn.rs2()) \ require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \ if (is_vs1) \ require_align(insn.rs1(), P.VU.vflmul); \ +#define VI_CHECK_SQS(is_vs1) \ + VI_NARROW_CHECK_COMMON(4); \ + if (insn.rd() != insn.rs2()) \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 4); \ + if (is_vs1) \ + require_align(insn.rs1(), P.VU.vflmul); \ + #define VI_CHECK_REDUCTION(is_wide) \ require_vector(true); \ if (is_wide) { \ @@ -200,7 +221,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, require_vector(true); \ reg_t vl = P.VU.vl->read(); \ reg_t UNUSED sew = P.VU.vsew; \ - reg_t rd_num = insn.rd(); \ + reg_t UNUSED rd_num = insn.rd(); \ reg_t UNUSED rs1_num = insn.rs1(); \ reg_t rs2_num = insn.rs2(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { @@ -336,7 +357,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_PARAMS(x) \ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \ - type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ + type_sew_t<x>::type UNUSED simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); #define XV_PARAMS(x) \ @@ -435,7 +456,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VFP_VV_CMP_PARAMS(width) #define VFP_VF_CMP_PARAMS(width) \ - float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \ + float##width##_t rs1 = f##width(READ_FREG(rs1_num), P.VU.altfmt); \ float##width##_t UNUSED vs2 = P.VU.elt<float##width##_t>(rs2_num, i); #define VFP_VF_PARAMS(width) \ @@ -545,7 +566,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VX_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VI_MERGE_LOOP(BODY) \ VI_CHECK_SSS(false); \ @@ -661,7 +682,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VV_U_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VV_LOOP(BODY) \ VI_CHECK_SSS(true) \ @@ -679,7 +700,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VV_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_V_ULOOP(BODY) \ VI_CHECK_SSS(false) \ @@ -715,7 +736,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VX_U_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VX_LOOP(BODY) \ VI_CHECK_SSS(false) \ @@ -733,7 +754,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VX_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VI_ULOOP(BODY) \ VI_CHECK_SSS(false) \ @@ -751,7 +772,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VI_U_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VI_LOOP(BODY) \ VI_CHECK_SSS(false) \ @@ -769,7 +790,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VI_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END // signed unsigned operation loop (e.g. mulhsu) #define VI_VV_SU_LOOP(BODY) \ @@ -788,7 +809,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VV_SU_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END #define VI_VX_SU_LOOP(BODY) \ VI_CHECK_SSS(false) \ @@ -806,7 +827,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, VX_SU_PARAMS(e64); \ BODY; \ } \ - VI_LOOP_END + VI_LOOP_END // narrow operation loop #define VI_VV_LOOP_NARROW(BODY) \ @@ -1381,12 +1402,9 @@ VI_VX_ULOOP({ \ } \ P.VU.vstart->write(0); -// vector: sign/unsiged extension -#define VI_VV_EXT(div, type) \ +#define VI_EXT_CHECK(div) \ require(insn.rd() != insn.rs2()); \ require_vm; \ - reg_t from = P.VU.vsew / div; \ - require(from >= e8 && from <= e64); \ require(((float)P.VU.vflmul / div) >= 0.125 && ((float)P.VU.vflmul / div) <= 8 ); \ require_align(insn.rd(), P.VU.vflmul); \ require_align(insn.rs2(), P.VU.vflmul / div); \ @@ -1394,10 +1412,15 @@ VI_VX_ULOOP({ \ require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ } else { \ require_noover_widen(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul / div); \ - } \ + } + +// vector: sign/unsiged extension +#define VI_VV_EXT(div, type) \ + reg_t from = P.VU.vsew / div; \ + require(from >= e8 && from <= e64); \ + VI_EXT_CHECK(div); \ + VI_LOOP_BASE \ reg_t pat = (((P.VU.vsew >> 3) << 4) | from >> 3); \ - VI_GENERAL_LOOP_BASE \ - VI_LOOP_ELEMENT_SKIP(); \ switch (pat) { \ case 0x21: \ P.VU.elt<type##16_t>(rd_num, i, true) = P.VU.elt<type##8_t>(rs2_num, i); \ @@ -1420,34 +1443,37 @@ VI_VX_ULOOP({ \ default: \ break; \ } \ - VI_LOOP_END + VI_LOOP_END // // vector: vfp helper // -#define VI_VFP_COMMON \ +#define VI_VFP_BASE \ require_fp; \ - require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFH)) || \ - (P.VU.vsew == e32 && p->get_isa().get_zvf()) || \ - (P.VU.vsew == e64 && p->get_isa().get_zvd())); \ require_vector(true); \ - require(STATE.frm->read() < 0x5); \ reg_t UNUSED vl = P.VU.vl->read(); \ reg_t UNUSED rd_num = insn.rd(); \ reg_t UNUSED rs1_num = insn.rs1(); \ reg_t UNUSED rs2_num = insn.rs2(); \ - softfloat_roundingMode = STATE.frm->read(); + softfloat_roundingMode = VFP_RM + +#define VI_VFP_COMMON \ + VI_VFP_BASE; \ + require((P.VU.vsew == e16 && p->extension_enabled(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFH)) || \ + (P.VU.vsew == e32 && p->get_isa().get_zvf()) || \ + (P.VU.vsew == e64 && p->get_isa().get_zvd())); \ + +// for now only support the divisor of two +#define VI_VF_EXT(div, BODY) \ + require(div == 2 && P.VU.vsew == 8); \ + VI_EXT_CHECK(div); \ + VI_LOOP_BASE \ + BODY; \ + VI_LOOP_END #define VI_VFP_BF16_COMMON \ - require_fp; \ + VI_VFP_BASE; \ require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFBFWMA))); \ - require_vector(true); \ - require(STATE.frm->read() < 0x5); \ - reg_t UNUSED vl = P.VU.vl->read(); \ - reg_t UNUSED rd_num = insn.rd(); \ - reg_t UNUSED rs1_num = insn.rs1(); \ - reg_t UNUSED rs2_num = insn.rs2(); \ - softfloat_roundingMode = STATE.frm->read(); #define VI_VFP_LOOP_BASE \ VI_VFP_COMMON \ @@ -1608,6 +1634,7 @@ VI_VX_ULOOP({ \ #define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \ VI_CHECK_REDUCTION(false) \ + VI_NON_ALTFMT_INSN \ VI_VFP_COMMON \ switch (P.VU.vsew) { \ case e16: { \ @@ -1638,6 +1665,7 @@ VI_VX_ULOOP({ \ #define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \ VI_CHECK_REDUCTION(true) \ + VI_NON_ALTFMT_INSN \ VI_VFP_COMMON \ require((P.VU.vsew == e16 && p->get_isa().get_zvf()) || \ (P.VU.vsew == e32 && p->get_isa().get_zvd())); \ @@ -1670,6 +1698,12 @@ VI_VX_ULOOP({ \ break; \ }; \ +#define VFP_OP_16(op, rs1, vs2) \ + (P.VU.altfmt ? bf16_##op(rs1, vs2) : f16_##op(rs1, vs2)) + +#define VFP_MULADD_16(rs1, vs2, vd) \ + (P.VU.altfmt ? bf16_mulAdd(rs1, vs2, vd) : f16_mulAdd(rs1, vs2, vd)) + #define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \ VI_CHECK_SSS(false); \ VI_VFP_LOOP_BASE \ @@ -1732,6 +1766,7 @@ VI_VX_ULOOP({ \ VI_VFP_LOOP_CMP_BASE \ switch (P.VU.vsew) { \ case e16: { \ + require_zvfbfa_or_zvfh; \ VFP_VF_CMP_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ @@ -1755,14 +1790,24 @@ VI_VX_ULOOP({ \ }; \ VI_VFP_LOOP_CMP_END \ +#define VI_CHECK_VFP_WIDE \ + if (P.VU.vsew == e16) \ + require(p->get_isa().get_zvf()); \ + else if (P.VU.vsew == e32) \ + require(p->get_isa().get_zvd()); \ + #define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(false); \ + VI_CHECK_VFP_WIDE \ VI_VFP_LOOP_BASE \ switch (P.VU.vsew) { \ case e16: { \ + require_zvfbfa_or_zvfh; \ float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \ - float32_t rs1 = f16_to_f32(FRS1_H); \ + float32_t vs2 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)) \ + : f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \ + float32_t rs1 = P.VU.altfmt ? bf16_to_f32(FRS1_BF) \ + : f16_to_f32(FRS1_H); \ BODY16; \ set_fp_exceptions; \ break; \ @@ -1803,12 +1848,16 @@ VI_VX_ULOOP({ \ #define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DSS(true); \ + VI_CHECK_VFP_WIDE \ VI_VFP_LOOP_BASE \ switch (P.VU.vsew) { \ case e16: { \ + require_zvfbfa; \ float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \ - float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \ + float32_t vs2 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<float16_t>(rs2_num, i)) \ + : f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \ + float32_t vs1 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<float16_t>(rs1_num, i)) \ + : f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \ BODY16; \ set_fp_exceptions; \ break; \ @@ -1849,12 +1898,15 @@ VI_VX_ULOOP({ \ #define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(false); \ + VI_CHECK_VFP_WIDE \ VI_VFP_LOOP_BASE \ switch (P.VU.vsew) { \ case e16: { \ + require_zvfbfa; \ float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ - float32_t rs1 = f16_to_f32(FRS1_H); \ + float32_t rs1 = P.VU.altfmt ? bf16_to_f32(FRS1_BF) \ + : f16_to_f32(FRS1_H); \ BODY16; \ set_fp_exceptions; \ break; \ @@ -1875,12 +1927,14 @@ VI_VX_ULOOP({ \ #define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \ VI_CHECK_DDS(true); \ + VI_CHECK_VFP_WIDE \ VI_VFP_LOOP_BASE \ switch (P.VU.vsew) { \ case e16: { \ float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ - float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \ + float32_t vs1 = P.VU.altfmt ? bf16_to_f32(P.VU.elt<bfloat16_t>(rs1_num, i)) \ + : f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \ BODY16; \ set_fp_exceptions; \ break; \ @@ -1900,14 +1954,7 @@ VI_VX_ULOOP({ \ VI_VFP_LOOP_END #define VI_VFP_LOOP_SCALE_BASE \ - require_fp; \ - require_vector(true); \ - require(STATE.frm->read() < 0x5); \ - reg_t vl = P.VU.vl->read(); \ - reg_t rd_num = insn.rd(); \ - reg_t UNUSED rs1_num = insn.rs1(); \ - reg_t rs2_num = insn.rs2(); \ - softfloat_roundingMode = STATE.frm->read(); \ + VI_VFP_BASE; \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); @@ -1921,6 +1968,7 @@ VI_VX_ULOOP({ \ #define VI_VFP_CVT_INT_TO_FP(BODY16, BODY32, BODY64, sign) \ VI_CHECK_SSS(false); \ + VI_NON_ALTFMT_INSN \ VI_VFP_COMMON \ switch (P.VU.vsew) { \ case e16: \ @@ -1945,6 +1993,7 @@ VI_VX_ULOOP({ \ #define VI_VFP_CVT_FP_TO_INT(BODY16, BODY32, BODY64, sign) \ VI_CHECK_SSS(false); \ + VI_NON_ALTFMT_INSN \ VI_VFP_COMMON \ switch (P.VU.vsew) { \ case e16: \ @@ -1982,17 +2031,32 @@ VI_VX_ULOOP({ \ break; \ } -#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \ +// FIXME +#define VI_VFP_WCVT_OFP8_BF16_FP(BODY8, BODY16, CHECK8, CHECK16) \ VI_CHECK_DSS(false); \ switch (P.VU.vsew) { \ + case e8: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(8, 16), CHECK8, BODY8); } \ + break; \ case e16: \ - { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK16, BODY16); } \ break; \ default: \ require(0); \ break; \ } +#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \ +VI_CHECK_DSS(false); \ +switch (P.VU.vsew) { \ +case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \ + break; \ +default: \ + require(0); \ + break; \ +} + #define VI_VFP_WCVT_INT_TO_FP(BODY8, BODY16, BODY32, \ CHECK8, CHECK16, CHECK32, \ sign) \ @@ -2043,6 +2107,42 @@ VI_VX_ULOOP({ \ break; \ } +#define VI_VFP_NCVT_FP_TO_OFP8(BODY, CHECK) \ + VI_CHECK_SQS(false); \ + switch (P.VU.vsew) { \ + case e8: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 8), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + +#define VI_VFP_NCVT_FP_BF16_OFP8(BODY16, BODY32, CHECK16, CHECK32) \ + VI_CHECK_SDS(false); \ + switch (P.VU.vsew) { \ + case e8: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 8), CHECK16, BODY16); } \ + break; \ + case e16: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK32, BODY32); } \ + break; \ + default: \ + require(0); \ + break; \ + } + +#define VI_VFP_NCVT_SAT_BF16_TO_OFP8(BODY, CHECK) \ + VI_CHECK_SDS(false); \ + switch (P.VU.vsew) { \ + case e8: \ + { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 8), CHECK, BODY); } \ + break; \ + default: \ + require(0); \ + break; \ + } + #define VI_VFP_NCVT_BF16_TO_FP(BODY, CHECK) \ VI_CHECK_SDS(false); \ switch (P.VU.vsew) { \ @@ -2089,6 +2189,78 @@ VI_VX_ULOOP({ \ break; \ } +#define ZVLDOT_INIT(widen) \ + require_vector(true); \ + require(P.VU.vstart->read() == 0); \ + require_align(insn.rs1(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_vm; \ + require_noover(insn.rd(), 1, insn.rs1(), P.VU.vflmul); \ + require_noover(insn.rd(), 1, insn.rs2(), P.VU.vflmul) + +#define ZVBDOT_INIT(widen) \ + require_vector(true); \ + unsigned vd_eew = P.VU.vsew * (widen); \ + unsigned vd_emul = std::max(1U, unsigned((8 * vd_eew) / P.VU.VLEN)); \ + unsigned vs2 = insn.rs2() & ~7; \ + unsigned ci = (insn.rs2() & 7) * 8; \ + require(P.VU.vstart->read() == 0); \ + require(P.VU.vflmul == 1); \ + require(ci * vd_eew < P.VU.VLEN); \ + require_align(insn.rd(), vd_emul); \ + require_vm; \ + require_noover(insn.rd(), vd_emul, insn.rs1(), 1); \ + require_noover(insn.rd(), vd_emul, vs2, 8) + +template<typename a_t, typename b_t, typename c_t> +c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_t c, std::function<c_t(a_t, b_t, c_t)> macc) +{ + for (size_t i = 0; i < a.size(); i++) + c = macc(a[i], b[i], c); + return c; +} + +#define ZVLDOT_LOOP(a_t, b_t, c_t, dot) \ + std::vector<a_t> a(P.VU.vl->read(), a_t()); \ + std::vector<b_t> b(P.VU.vl->read(), b_t()); \ + for (reg_t i = 0; i < a.size(); i++) { \ + VI_LOOP_ELEMENT_SKIP(); \ + a[i] = P.VU.elt<a_t>(insn.rs1(), i); \ + b[i] = P.VU.elt<b_t>(insn.rs2(), i); \ + } \ + auto& acc = P.VU.elt<c_t>(insn.rd(), 0, true); \ + acc = dot(a, b, acc) + +#define ZVLDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) \ + auto dot = std::bind(generic_dot_product<a_t, b_t, c_t>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, macc); \ + ZVLDOT_LOOP(a_t, b_t, c_t, dot) + +#define ZVLDOT_SIMPLE_LOOP(a_t, b_t, c_t) \ + auto macc = [](auto a, auto b, auto c) { return c + decltype(c)(a) * decltype(c)(b); }; \ + ZVLDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) + +#define ZVBDOT_LOOP(a_t, b_t, c_t, dot) \ + for (reg_t idx = 0; idx < 8; idx++) { \ + reg_t i = ci + idx; \ + VI_LOOP_ELEMENT_SKIP(); \ + std::vector<a_t> a(P.VU.vl->read(), a_t()); \ + std::vector<b_t> b(P.VU.vl->read(), b_t()); \ + for (reg_t k = 0; k < a.size(); k++) { \ + a[k] = P.VU.elt<a_t>(insn.rs1(), k); \ + b[k] = P.VU.elt<b_t>(vs2 + idx, k); \ + } \ + auto& acc = P.VU.elt<c_t>(insn.rd(), i, true); \ + acc = dot(a, b, acc); \ + } + +#define ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) \ + auto dot = std::bind(generic_dot_product<a_t, b_t, c_t>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, macc); \ + ZVBDOT_LOOP(a_t, b_t, c_t, dot) + +#define ZVBDOT_SIMPLE_LOOP(a_t, b_t, c_t) \ + auto macc = [](auto a, auto b, auto c) { return c + decltype(c)(a) * decltype(c)(b); }; \ + ZVBDOT_GENERIC_LOOP(a_t, b_t, c_t, macc) + #define P_SET_OV(ov) \ if (ov) P.VU.vxsat->write(1); diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index 7c6633c..a7ba018 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -38,10 +38,36 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new vlmax = (VLEN/vsew) * vflmul; vta = extract64(newType, 6, 1); vma = extract64(newType, 7, 1); + altfmt = extract64(newType, 8, 1); + + bool ill_altfmt = true; + if (altfmt) { + if (p->extension_enabled(EXT_ZVQBDOT8I) && vsew == 8) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVQBDOT16I) && vsew == 16) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFQBDOT8F) && vsew == 8) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFWBDOT16BF) && vsew == 16) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVQLDOT8I) && vsew == 8) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVQLDOT16I) && vsew == 16) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFQLDOT8F) && vsew == 8) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFWLDOT16BF) && vsew == 16) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFBFA) && (vsew == 16 || vsew == 8)) + ill_altfmt = false; + else if (p->extension_enabled(EXT_ZVFOFP8MIN) && vsew == 8) + ill_altfmt = false; + } vill = !(vflmul >= 0.125 && vflmul <= 8) || vsew > std::min(vflmul, 1.0f) * ELEN - || (newType >> 8) != 0 + || (newType >> 9) != 0 + || (altfmt && ill_altfmt) || (rd == 0 && rs1 == 0 && old_vlmax != vlmax); if (vill) { @@ -64,94 +90,10 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new } vstart->write_raw(0); - setvl_count++; return vl->read(); } -template<class T> T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write) { - assert(vsew != 0); - assert((VLEN >> 3)/sizeof(T) > 0); - reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T)); - vReg += n / elts_per_reg; - n = n % elts_per_reg; -#ifdef WORDS_BIGENDIAN - // "V" spec 0.7.1 requires lower indices to map to lower significant - // bits when changing SEW, thus we need to index from the end on BE. - n ^= elts_per_reg - 1; -#endif - reg_referenced[vReg] = 1; - - if (unlikely(p->get_log_commits_enabled() && is_write)) +void vectorUnit_t::log_elt_write_if_needed(reg_t vReg) const { + if (unlikely(p->get_log_commits_enabled())) p->get_state()->log_reg_write[((vReg) << 4) | 2] = {0, 0}; - - T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3)); - return regStart[n]; -} - -// The logic differences between 'elt()' and 'elt_group()' come from -// the fact that, while 'elt()' requires that the element is fully -// contained in a single vector register, the element group may span -// multiple registers in a single register group (LMUL>1). -// -// Notes: -// - We do NOT check that a single element - i.e., the T in the element -// group type std::array<T, N> - fits within a single register, or that -// T is smaller or equal to VSEW. Implementations of the instructions -// sometimes use a different T than what the specification suggests. -// Instructon implementations should 'require()' what the specification -// dictates. -// - We do NOT check that 'vReg' is a valid register group, or that -// 'n+1' element groups fit in the register group 'vReg'. It is -// the responsibility of the caller to validate those preconditions. -template<typename EG> EG& -vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) { -#ifdef WORDS_BIGENDIAN - fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n", - stderr); - abort(); -#endif - using T = typename EG::value_type; - constexpr std::size_t N = std::tuple_size<EG>::value; - assert(N > 0); - - assert(vsew != 0); - constexpr reg_t elt_group_size = N * sizeof(T); - const reg_t reg_group_size = (VLEN >> 3) * vflmul; - assert(((n + 1) * elt_group_size) <= reg_group_size); - - const reg_t start_byte = n * elt_group_size; - const reg_t bytes_per_reg = VLEN >> 3; - - // Inclusive first/last register indices. - const reg_t reg_first = vReg + start_byte / bytes_per_reg; - const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg; - - // Element groups per register groups - for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) { - reg_referenced[vidx] = 1; - - if (unlikely(p->get_log_commits_enabled() && is_write)) { - p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0}; - } - } - - return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte); } - -template signed char& vectorUnit_t::elt<signed char>(reg_t, reg_t, bool); -template short& vectorUnit_t::elt<short>(reg_t, reg_t, bool); -template int& vectorUnit_t::elt<int>(reg_t, reg_t, bool); -template long& vectorUnit_t::elt<long>(reg_t, reg_t, bool); -template long long& vectorUnit_t::elt<long long>(reg_t, reg_t, bool); -template uint8_t& vectorUnit_t::elt<uint8_t>(reg_t, reg_t, bool); -template uint16_t& vectorUnit_t::elt<uint16_t>(reg_t, reg_t, bool); -template uint32_t& vectorUnit_t::elt<uint32_t>(reg_t, reg_t, bool); -template uint64_t& vectorUnit_t::elt<uint64_t>(reg_t, reg_t, bool); -template float16_t& vectorUnit_t::elt<float16_t>(reg_t, reg_t, bool); -template float32_t& vectorUnit_t::elt<float32_t>(reg_t, reg_t, bool); -template float64_t& vectorUnit_t::elt<float64_t>(reg_t, reg_t, bool); - -template EGU32x4_t& vectorUnit_t::elt_group<EGU32x4_t>(reg_t, reg_t, bool); -template EGU32x8_t& vectorUnit_t::elt_group<EGU32x8_t>(reg_t, reg_t, bool); -template EGU64x4_t& vectorUnit_t::elt_group<EGU64x4_t>(reg_t, reg_t, bool); -template EGU8x16_t& vectorUnit_t::elt_group<EGU8x16_t>(reg_t, reg_t, bool); diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h index 0e80618..88d4399 100644 --- a/riscv/vector_unit.h +++ b/riscv/vector_unit.h @@ -87,26 +87,87 @@ using EGU8x16_t = std::array<uint8_t, 16>; class vectorUnit_t { public: - processor_t* p; - void *reg_file; - char reg_referenced[NVPR]; - int setvl_count; - reg_t vlmax; - reg_t vlenb; - csr_t_p vxsat; - vector_csr_t_p vxrm, vstart, vl, vtype; - reg_t vma, vta; - reg_t vsew; - float vflmul; - reg_t ELEN, VLEN; - bool vill; - bool vstart_alu; + processor_t* p = nullptr; + void *reg_file = nullptr; + int setvl_count = 0; + reg_t vlmax = 0; + reg_t vlenb = 0; + csr_t_p vxsat = 0; + vector_csr_t_p vxrm = 0, vstart = 0, vl = 0, vtype = 0; + reg_t vma = 0, vta = 0; + reg_t vsew = 0; + float vflmul = 0; + reg_t altfmt = 0; + reg_t ELEN = 0, VLEN = 0; + bool vill = false; + bool vstart_alu = false; // vector element for various SEW - template<class T> T& elt(reg_t vReg, reg_t n, bool is_write = false); + template<typename T> T& elt(reg_t vReg, reg_t n, bool is_write = false) { + assert(vsew != 0); + assert((VLEN >> 3)/sizeof(T) > 0); + reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T)); + vReg += n / elts_per_reg; + n = n % elts_per_reg; +#ifdef WORDS_BIGENDIAN + // "V" spec 0.7.1 requires lower indices to map to lower significant + // bits when changing SEW, thus we need to index from the end on BE. + n ^= elts_per_reg - 1; +#endif + if (is_write) + log_elt_write_if_needed(vReg); + + T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3)); + return regStart[n]; + } + // vector element group access, where EG is a std::array<T, N>. + // The logic differences between 'elt()' and 'elt_group()' come from + // the fact that, while 'elt()' requires that the element is fully + // contained in a single vector register, the element group may span + // multiple registers in a single register group (LMUL>1). + // + // Notes: + // - We do NOT check that a single element - i.e., the T in the element + // group type std::array<T, N> - fits within a single register, or that + // T is smaller or equal to VSEW. Implementations of the instructions + // sometimes use a different T than what the specification suggests. + // Instructon implementations should 'require()' what the specification + // dictates. + // - We do NOT check that 'vReg' is a valid register group, or that + // 'n+1' element groups fit in the register group 'vReg'. It is + // the responsibility of the caller to validate those preconditions. + template<typename EG> EG& - elt_group(reg_t vReg, reg_t n, bool is_write = false); + elt_group(reg_t vReg, reg_t n, bool is_write = false) { +#ifdef WORDS_BIGENDIAN + fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n", + stderr); + abort(); +#endif + using T = typename EG::value_type; + constexpr std::size_t N = std::tuple_size<EG>::value; + assert(N > 0); + + assert(vsew != 0); + constexpr reg_t elt_group_size = N * sizeof(T); + const reg_t reg_group_size = (VLEN >> 3) * vflmul; + assert(((n + 1) * elt_group_size) <= reg_group_size); + + const reg_t start_byte = n * elt_group_size; + const reg_t bytes_per_reg = VLEN >> 3; + + // Inclusive first/last register indices. + const reg_t reg_first = vReg + start_byte / bytes_per_reg; + const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg; + + // Element groups per register groups + for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) + if (is_write) + log_elt_write_if_needed(vidx); + + return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte); + } bool mask_elt(reg_t vReg, reg_t n) { @@ -119,31 +180,15 @@ public: e = (e & ~(1U << (n % 8))) | (value << (n % 8)); } +private: + + void log_elt_write_if_needed(reg_t vReg) const; + public: void reset(); - vectorUnit_t(): - p(0), - reg_file(0), - reg_referenced{0}, - setvl_count(0), - vlmax(0), - vlenb(0), - vxsat(0), - vxrm(0), - vstart(0), - vl(0), - vtype(0), - vma(0), - vta(0), - vsew(0), - vflmul(0), - ELEN(0), - VLEN(0), - vill(false), - vstart_alu(false) { - } + vectorUnit_t() {} ~vectorUnit_t() { free(reg_file); diff --git a/riscv/zvbdot.h b/riscv/zvbdot.h new file mode 100644 index 0000000..67a204b --- /dev/null +++ b/riscv/zvbdot.h @@ -0,0 +1,59 @@ +#ifndef _RISCV_ZVBDOT_H +#define _RISCV_ZVBDOT_H + +#include "bulknormdot.h" +#include <vector> +#include <algorithm> + +static inline float32_t f32_add_odd(float32_t a, float32_t b) +{ + auto rm = softfloat_roundingMode; + auto flags = softfloat_exceptionFlags; + + softfloat_roundingMode = softfloat_round_odd; + softfloat_exceptionFlags = 0; + + auto res = f32_add(a, b); + + if (softfloat_exceptionFlags & softfloat_flag_overflow) { + res.v++; // FLT_MAX -> INF + } + + auto new_flags = softfloat_exceptionFlags & (softfloat_flag_overflow | softfloat_flag_invalid); + + softfloat_roundingMode = rm; + softfloat_exceptionFlags = flags | new_flags; + + return res; +} + +static inline float32_t zvfwbdot16bf_dot_acc(const std::vector<uint16_t>& a, const std::vector<uint16_t>& b, float32_t c) +{ + std::vector<bf16_t> fa(a.size()); + std::transform(a.begin(), a.end(), fa.begin(), [](auto f) { return f; }); + + std::vector<bf16_t> fb(b.size()); + std::transform(b.begin(), b.end(), fb.begin(), [](auto f) { return f; }); + + DotConfig cfg(a.size(), int_log2(a.size()) + ((a.size() & (a.size() - 1)) != 0)); + auto res = bulk_norm_dot_bf16(cfg, &fa[0], &fb[0]); + softfloat_exceptionFlags |= res.flags; + return f32_add_odd(f32(res.out), c); +} + +template<typename A, typename B> +float32_t zvfqbdot8f_dot_acc(const std::vector<uint8_t>& a, const std::vector<uint8_t>& b, float32_t c) +{ + std::vector<A> fa(a.size()); + std::transform(a.begin(), a.end(), fa.begin(), [](auto f) { return f; }); + + std::vector<B> fb(b.size()); + std::transform(b.begin(), b.end(), fb.begin(), [](auto f) { return f; }); + + DotConfig cfg(a.size(), int_log2(a.size()) + ((a.size() & (a.size() - 1)) != 0)); + auto res = bulk_norm_dot_ofp8(cfg, &fa[0], &fb[0]); + softfloat_exceptionFlags |= res.flags; + return f32_add_odd(f32(res.out), c); +} + +#endif diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index f094629..e96e0a8 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -13,6 +13,14 @@ // Predicate Macros // +// Ensures that the ZVKB extension (vector crypto bitmanip subset) is present, +// and the vector unit is enabled and in a valid state. +#define require_zvkb \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKB); \ + } while (0) + // Ensures that the ZVBB extension (vector crypto bitmanip) is present, // and the vector unit is enabled and in a valid state. #define require_zvbb \ @@ -86,6 +94,32 @@ // (LMUL * VLEN) <= EGW #define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul)) +// Ensures that a register index is aligned to EMUL +// evaluated as EGW / VLEN. +// The check is only enabled if this value is greater +// than one (no index alignment check required for fractional EMUL) +#define require_vreg_align_eglmul(EGW, VREG_NUM) \ + do { \ + float vfeglmul = EGW / P.VU.VLEN; \ + if (vfeglmul > 1) { \ + require_align(VREG_NUM, vfeglmul); \ + }\ + } while (0) + +#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2()) + +// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide +// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register +// group. +// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits. +#define require_noover_eglmul(vd, vs2) \ + do { \ + int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \ + int aligned_vd = vd / vd_emul; \ + int aligned_vs2 = vs2 / vd_emul; \ + require(aligned_vd != aligned_vs2); \ + } while (0) + // Checks that the vector unit state (vtype and vl) can be interpreted // as element groups with EEW=32, EGS=4 (four 32-bits elements per group), // for an effective element group width of EGW=128 bits. diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h index db705c7..d94ddc2 100644 --- a/riscv/zvkned_ext_macros.h +++ b/riscv/zvkned_ext_macros.h @@ -2,6 +2,7 @@ // the RISC-V Zvkned extension (vector AES single round). #include "insns/aes_common.h" +#include "zvk_ext_macros.h" #ifndef RISCV_ZVKNED_EXT_MACROS_H_ #define RISCV_ZVKNED_EXT_MACROS_H_ @@ -9,16 +10,22 @@ // vaes*.vs instruction constraints: // - Zvkned is enabled // - EGW (128) <= LMUL * VLEN +// - vd is LMUL aligned +// - vs2 is ceil(EGW / VLEN) aligned // - vd and vs2 cannot overlap // // The constraint that vstart and vl are both EGS (4) aligned // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vaes_vs_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ - require(insn.rd() != insn.rs2()); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vs2_align_eglmul(128); \ + require_noover_eglmul(insn.rd(), insn.rs2()); \ } while (false) // vaes*.vv instruction constraints. Those are the same as the .vs ones, @@ -30,17 +37,24 @@ // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vaes_vv_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + VI_CHECK_SSS(false) \ } while (false) // vaeskf*.vi instruction constraints. Those are the same as the .vv ones. #define require_vaeskf_vi_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + VI_CHECK_SSS(false) \ } while (false) #define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0)) diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h index b50818b..98236b0 100644 --- a/riscv/zvknh_ext_macros.h +++ b/riscv/zvknh_ext_macros.h @@ -15,6 +15,7 @@ // macros. #define require_vsha2_common_constraints \ do { \ + VI_CHECK_SSS(true) \ require(P.VU.vsew == 32 || P.VU.vsew == 64); \ require(insn.rd() != insn.rs1()); \ require(insn.rd() != insn.rs2()); \ diff --git a/riscv/zvksed_ext_macros.h b/riscv/zvksed_ext_macros.h index 46e399b..3ffa272 100644 --- a/riscv/zvksed_ext_macros.h +++ b/riscv/zvksed_ext_macros.h @@ -16,9 +16,12 @@ // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vsm4_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvksed; \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ } while (false) // Returns a uint32_t value constructed from the 4 bytes (uint8_t) diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h index 71c5a09..c4549da 100644 --- a/riscv/zvksh_ext_macros.h +++ b/riscv/zvksh_ext_macros.h @@ -16,9 +16,12 @@ // is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros. #define require_vsm3_constraints \ do { \ + const uint32_t EGS = 8; \ require_zvksh; \ require(P.VU.vsew == 32); \ require_egw_fits(256); \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ require(insn.rd() != insn.rs2()); \ } while (false) diff --git a/softfloat/bf16_to_e4m3.c b/softfloat/bf16_to_e4m3.c new file mode 100644 index 0000000..032f21b --- /dev/null +++ b/softfloat/bf16_to_e4m3.c @@ -0,0 +1,48 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +e4m3_t bf16_to_e4m3( bfloat16_t a, bool saturationMode ) +{ + return f32_to_e4m3( bf16_to_f32( a ), saturationMode ); +} + diff --git a/softfloat/bf16_to_e5m2.c b/softfloat/bf16_to_e5m2.c new file mode 100644 index 0000000..3d30ec9 --- /dev/null +++ b/softfloat/bf16_to_e5m2.c @@ -0,0 +1,48 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +e5m2_t bf16_to_e5m2( bfloat16_t a, bool saturationMode ) +{ + return f32_to_e5m2( bf16_to_f32( a ), saturationMode ); +} + diff --git a/softfloat/e4m3_to_bf16.c b/softfloat/e4m3_to_bf16.c new file mode 100644 index 0000000..d1ca770 --- /dev/null +++ b/softfloat/e4m3_to_bf16.c @@ -0,0 +1,92 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float16_t e4m3_to_bf16( float8_t a ) +{ + union ui8_f8 uA; + uint_fast8_t uiA; + bool sign; + int_fast8_t exp; + uint_fast8_t frac; + struct commonNaN commonNaN; + uint_fast16_t uiZ; + struct exp8_sig8 normExpSig; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signE4M3UI( uiA ); + exp = expE4M3UI( uiA ); + frac = fracE4M3UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( (exp == 0xF) && (frac == 0x7) ) { + /* NaN */ + softfloat_E4M3UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToBF16UI( &commonNaN ); + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! exp ) { + if ( ! frac ) { + /* zero */ + uiZ = packToBF16UI( sign, 0, 0 ); + goto uiZ; + } + /* subnormal */ + normExpSig = softfloat_normSubnormalE4M3Sig( frac ); + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToBF16UI( sign, exp + 0x78, (uint_fast16_t) frac<<4); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/e4m3_to_f16.c b/softfloat/e4m3_to_f16.c new file mode 100644 index 0000000..d086b3c --- /dev/null +++ b/softfloat/e4m3_to_f16.c @@ -0,0 +1,92 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float16_t e4m3_to_f16( float8_t a ) +{ + union ui8_f8 uA; + uint_fast8_t uiA; + bool sign; + int_fast8_t exp; + uint_fast8_t frac; + struct commonNaN commonNaN; + uint_fast16_t uiZ; + struct exp8_sig8 normExpSig; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signE4M3UI( uiA ); + exp = expE4M3UI( uiA ); + frac = fracE4M3UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( (exp == 0xF) && (frac == 0x7) ) { + /* NaN */ + softfloat_E4M3UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToF16UI( &commonNaN ); + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! exp ) { + if ( ! frac ) { + /* zero */ + uiZ = packToF16UI( sign, 0, 0 ); + goto uiZ; + } + /* subnormal */ + normExpSig = softfloat_normSubnormalE4M3Sig( frac ); + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToF16UI( sign, exp + 0x8, (uint_fast16_t) frac<<7); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/e5m2_to_bf16.c b/softfloat/e5m2_to_bf16.c new file mode 100644 index 0000000..6b4db1f --- /dev/null +++ b/softfloat/e5m2_to_bf16.c @@ -0,0 +1,98 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float16_t e5m2_to_bf16( float8_t a ) +{ + union ui8_f8 uA; + uint_fast8_t uiA; + bool sign; + int_fast8_t exp; + uint_fast8_t frac; + struct commonNaN commonNaN; + uint_fast16_t uiZ; + struct exp8_sig8 normExpSig; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signE5M2UI( uiA ); + exp = expE5M2UI( uiA ); + frac = fracE5M2UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0x1F ) { + if ( frac ) { + /* NaN */ + softfloat_E5M2UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToBF16UI( &commonNaN ); + } else { + /* Inf */ + uiZ = packToBF16UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! exp ) { + if ( ! frac ) { + /* zero */ + uiZ = packToBF16UI( sign, 0, 0 ); + goto uiZ; + } + /* subnormal */ + normExpSig = softfloat_normSubnormalE5M2Sig( frac ); + //TODO: might be unnecessary to implement a dedicated function + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToBF16UI( sign, exp + 0x70, (uint_fast16_t) frac<<5); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/e5m2_to_f16.c b/softfloat/e5m2_to_f16.c new file mode 100644 index 0000000..d0d9acf --- /dev/null +++ b/softfloat/e5m2_to_f16.c @@ -0,0 +1,98 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float16_t e5m2_to_f16( float8_t a ) +{ + union ui8_f8 uA; + uint_fast8_t uiA; + bool sign; + int_fast8_t exp; + uint_fast8_t frac; + struct commonNaN commonNaN; + uint_fast16_t uiZ; + struct exp8_sig8 normExpSig; + union ui16_f16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signE5M2UI( uiA ); + exp = expE5M2UI( uiA ); + frac = fracE5M2UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xF ) { + if ( frac ) { + /* NaN */ + softfloat_E5M2UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToF16UI( &commonNaN ); + } else { + /* Inf */ + uiZ = packToF16UI( sign, 0x1F, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( ! exp ) { + if ( ! frac ) { + /* zero */ + uiZ = packToF16UI( sign, 0, 0 ); + goto uiZ; + } + /* subnormal */ + normExpSig = softfloat_normSubnormalE5M2Sig( frac ); + //TODO: might be unnecessary to implement a dedicated function + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uiZ = packToF16UI( sign, exp, (uint_fast16_t) frac<<8); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/f16_to_e4m3.c b/softfloat/f16_to_e4m3.c new file mode 100644 index 0000000..d050d83 --- /dev/null +++ b/softfloat/f16_to_e4m3.c @@ -0,0 +1,89 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +e4m3_t f16_to_e4m3( float16_t a, bool saturationMode ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + bool sign; + int_fast8_t exp; + uint_fast16_t frac, frac8; + struct commonNaN commonNaN; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF16UI( uiA ); + exp = expF16UI( uiA ); + frac = fracF16UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0x1F ) { + if ( frac ) { // nan + softfloat_f16UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToE4M3UI( &commonNaN ); + } else { + uiZ = saturationMode + ? packToE4M3UI( sign, 0xF, 0x6 ) + : softfloat_commonNaNToE4M3UI(&commonNaN); + } + goto uiZ; + } + /* Use additional 4 bits for rounding. We will have 3+4 bits including the sticky bit*/ + frac8 = frac>>3 | ((frac & 0x7) != 0); + if ( !(exp | frac8) ) { + uiZ = packToE4M3UI( sign, 0, 0 ); + goto uiZ; + } + + /* Add the implicit leading 1 to the fraction and shift exp by (15-7)+1 */ + return softfloat_roundPackToE4M3( sign, exp - 0x9, frac8 | 0x80, saturationMode); +uiZ: + uZ.ui = uiZ; + return uZ.f; +} + diff --git a/softfloat/f16_to_e5m2.c b/softfloat/f16_to_e5m2.c new file mode 100644 index 0000000..deee4a0 --- /dev/null +++ b/softfloat/f16_to_e5m2.c @@ -0,0 +1,89 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +e5m2_t f16_to_e5m2( float16_t a, bool saturationMode ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + bool sign; + int_fast8_t exp; + uint_fast16_t frac, frac8; + struct commonNaN commonNaN; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF16UI( uiA ); + exp = expF16UI( uiA ); + frac = fracF16UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0x1F ) { + if ( frac ) { // nan + softfloat_f16UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToE5M2UI( &commonNaN ); + } else { + /* If saturation mode is enabled, convert Inf to the max value of E5M2, otherwise Inf */ + uiZ = saturationMode + ? packToE5M2UI( sign, 0x1D, 0x3 ) + : packToE5M2UI( sign, 0x1F, 0x0 ); + } + goto uiZ; + } + /* Use additional 4 bits for rounding. We will have 2+4 bits including the sticky bit*/ + frac8 = frac>>4 | ((frac & 0xF) != 0); + if ( !(exp | frac8) ) { + uiZ = packToE5M2UI( sign, 0, 0 ); + goto uiZ; + } + + /* Add the implicit leading 1 to the fraction and shift exp by 1 */ + return softfloat_roundPackToE5M2( sign, exp - 1, frac8 | 0x40, saturationMode); +uiZ: + uZ.ui = uiZ; + return uZ.f; +} + diff --git a/softfloat/f32_to_e4m3.c b/softfloat/f32_to_e4m3.c new file mode 100644 index 0000000..2b96c66 --- /dev/null +++ b/softfloat/f32_to_e4m3.c @@ -0,0 +1,90 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float8_t f32_to_e4m3( float32_t a, bool saturationMode ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + uint_fast16_t frac8; + struct commonNaN commonNaN; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF32UI( uiA ); + exp = expF32UI( uiA ); + frac = fracF32UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_f32UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToE4M3UI( &commonNaN ); + } else { + uiZ = saturationMode + ? packToE4M3UI( sign, 0xF, 0x6 ) + : softfloat_commonNaNToE4M3UI(&commonNaN); + } + goto uiZ; + } + /* Use additional 4 bits for rounding. We will have 3+4 bits including the sticky bit*/ + frac8 = frac>>16 | ((frac & 0xFFFF) != 0); + if ( !(exp | frac8) ) { + uiZ = packToE4M3UI( sign, 0, 0 ); + goto uiZ; + } + + /* Add the implicit leading 1 to the fraction and shift exp by (127-7)+1 */ + return softfloat_roundPackToE4M3( sign, exp - 121, frac8 | 0x80, saturationMode); +uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/f32_to_e5m2.c b/softfloat/f32_to_e5m2.c new file mode 100644 index 0000000..b03cf53 --- /dev/null +++ b/softfloat/f32_to_e5m2.c @@ -0,0 +1,91 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float8_t f32_to_e5m2( float32_t a, bool saturationMode ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + uint_fast16_t frac8; + struct commonNaN commonNaN; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF32UI( uiA ); + exp = expF32UI( uiA ); + frac = fracF32UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( exp == 0xFF ) { + if ( frac ) { // nan + softfloat_f32UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToE5M2UI( &commonNaN ); + } else { + /* If saturation mode is enabled, convert Inf to the max value of E5M2, otherwise Inf */ + uiZ = saturationMode + ? packToE5M2UI( sign, 0x1E, 0x3 ) + : packToE5M2UI( sign, 0x1F, 0x0 ); + } + goto uiZ; + } + /* Use additional 4 bits for rounding. We will have 2+4 bits including the sticky bit*/ + frac8 = frac>>17 | ((frac & 0x1FFFF) != 0); + if ( !(exp | frac8) ) { + uiZ = packToE5M2UI( sign, 0, 0 ); + goto uiZ; + } + + /* Add the implicit leading 1 to the fraction and shift exp by (127-15)+1 */ + return softfloat_roundPackToE5M2( sign, exp - 113, frac8 | 0x40, saturationMode); +uiZ: + uZ.ui = uiZ; + return uZ.f; +} + diff --git a/softfloat/f32_to_i8.c b/softfloat/f32_to_i8.c new file mode 100644 index 0000000..2b7fb0e --- /dev/null +++ b/softfloat/f32_to_i8.c @@ -0,0 +1,58 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "specialize.h" +#include "softfloat.h" + +int_fast8_t f32_to_i8( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); + + + if (sig32 > INT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromPosOverflow; + } else if (sig32 < INT8_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/f32_to_ui8.c b/softfloat/f32_to_ui8.c new file mode 100644 index 0000000..8fdc8fc --- /dev/null +++ b/softfloat/f32_to_ui8.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "specialize.h" +#include "softfloat.h" + +uint_fast8_t f32_to_ui8( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui8_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/softfloat/internals.h b/softfloat/internals.h index f397ce5..e01da60 100644 --- a/softfloat/internals.h +++ b/softfloat/internals.h @@ -4,7 +4,7 @@ This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. -Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2025 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,6 +46,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern "C" { #endif +union ui8_f8 { uint8_t ui; float8_t f; }; +typedef union ui8_f8 ui8_e4m3; +typedef union ui8_f8 ui8_e5m2; union ui16_bf16 { uint16_t ui; bfloat16_t f; }; union ui16_f16 { uint16_t ui; float16_t f; }; union ui32_f32 { uint32_t ui; float32_t f; }; @@ -85,6 +88,39 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ + +/* + * OCP F8 + * E4M3 |sign(1)|exp(4)|frac(3)| + * E5M2 |sign(1)|exp(5)|frac(2)| + */ + +#define signF8UI( a ) ((bool) ((uint8_t) (a)>>7)) +#define signE4M3UI( a ) signF8UI( a ) +#define signE5M2UI( a ) signF8UI( a ) +#define expE4M3UI( a ) ((int_fast8_t) ((a)>>3) & 0xF) +#define fracE4M3UI( a ) ((a) & 0x7) +#define packToE4M3UI( sign, exp, sig ) (((uint8_t) (sign)<<7) + ((uint16_t) (exp)<<3) + (sig)) +#define expE5M2UI( a ) ((int_fast8_t) ((a)>>2) & 0x1F) +#define fracE5M2UI( a ) ((a) & 0x3) +#define packToE5M2UI( sign, exp, sig ) (((uint8_t) (sign)<<7) + ((uint16_t) (exp)<<2) + (sig)) + +/* + * | | E4M3 | E5M2 | + * | Inf | N/A | S.11111.00 | + * | NaN | S.1111.111 | S.11111.{01, 10, 11} | +*/ +#define isNaNE4M3UI( a ) ((~(a) & 0x7F) == 0) +#define isInfE5M2UI( a ) ((~(a) & 0x78) == 0) +#define isNaNE5M2UI( a ) (((~(a) & 0x78) == 0) && ((a) & 0x11)) + +struct exp8_sig8 { int_fast8_t exp; uint_fast8_t sig; }; +struct exp8_sig8 softfloat_normSubnormalE4M3Sig( uint_fast8_t ); +struct exp8_sig8 softfloat_normSubnormalE5M2Sig( uint_fast8_t ); +float8_t softfloat_roundPackToE4M3( bool, int_fast16_t, uint_fast16_t, bool ); +float8_t softfloat_roundPackToE5M2( bool, int_fast16_t, uint_fast16_t, bool ); +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ #define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15)) #define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF) #define fracBF16UI( a ) ((a) & 0x07F) @@ -169,6 +205,28 @@ float64_t /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ +/*this is for bf16 + *b16 |sign(1)|exp(8)|frac(7)| + *f32 |sign(1)|exp(8)|fraction(23 bits)| + */ +#define F32_EXP_BITS 8 +#define F32_EXP_BIAS ((1 << (F32_EXP_BITS - 1)) - 1) +#define F32_SIG_BITS 23 +#define F32_EXP_MASK (((uint32_t)1 << F32_EXP_BITS) - 1) +#define F32_SIG_MASK (((uint32_t)1 << F32_SIG_BITS) - 1) +#define BF16_SIG_BITS 7 +#define BF16_EXP_BIAS F32_EXP_BIAS +#define BF16_IMPLICIT_ONE (1 << BF16_SIG_BITS) + +#define SIG(n) (n.v & 0x7F) +#define EXP_T(n) ((n.v >>7) & (F32_EXP_MASK)) +#define SIGN(n) (n.v >> 15) +#define SPECIAL(n) (EXP_T(n) == F32_EXP_MASK) +#define INF(n) (SPECIAL(n) && SIG(n)==0) +#define NAN_T(n) (SPECIAL(n) && SIG(n)!=0) + + + struct exp32_sig64 { int_fast32_t exp; uint64_t sig; }; struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t ); diff --git a/softfloat/s_normSubnormalE4M3Sig.c b/softfloat/s_normSubnormalE4M3Sig.c new file mode 100644 index 0000000..7475202 --- /dev/null +++ b/softfloat/s_normSubnormalE4M3Sig.c @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" +#include "internals.h" + +struct exp8_sig8 softfloat_normSubnormalE4M3Sig( uint_fast8_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig8 z; + + shiftDist = softfloat_countLeadingZeros8[(uint8_t) sig] - 4; + z.exp = 1 - shiftDist; + z.sig = sig<<shiftDist; + return z; + +} + diff --git a/softfloat/s_normSubnormalE5M2Sig.c b/softfloat/s_normSubnormalE5M2Sig.c new file mode 100644 index 0000000..af1316d --- /dev/null +++ b/softfloat/s_normSubnormalE5M2Sig.c @@ -0,0 +1,51 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" +#include "internals.h" + +struct exp8_sig8 softfloat_normSubnormalE5M2Sig( uint_fast8_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig8 z; + + shiftDist = ( sig & 0x2 ) ? 1 : 2; + z.exp = 1 - shiftDist; + z.sig = sig<<shiftDist; + return z; + +} diff --git a/softfloat/s_roundPackToE4M3.c b/softfloat/s_roundPackToE4M3.c new file mode 100644 index 0000000..01b4fdb --- /dev/null +++ b/softfloat/s_roundPackToE4M3.c @@ -0,0 +1,119 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "softfloat.h" +#include "specialize.h" + +float8_t softfloat_roundPackToE4M3( bool sign, int_fast16_t exp, uint_fast16_t sig, bool saturationMode ) +{ + uint_fast8_t roundingMode; + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + bool isTiny; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundingMode = softfloat_roundingMode; + roundNearEven = (roundingMode == softfloat_round_near_even); + roundIncrement = 0x8; + if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { + roundIncrement = + (roundingMode + == (sign ? softfloat_round_min : softfloat_round_max)) + ? 0xF + : 0; + } + roundBits = sig & 0xF; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0xE <= (unsigned int) exp ) { + /* Here we use the outer if condition to cover both overflow and underflow */ + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + isTiny = + (softfloat_detectTininess == softfloat_tininess_beforeRounding) + || (exp < -1) || (sig + roundIncrement < 0x100); + sig = softfloat_shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0xF; + if ( isTiny && roundBits ) { + softfloat_raiseFlags( softfloat_flag_underflow ); + } + } else if ( 0xE < exp || (sig + roundIncrement >= (0xF0 + roundNearEven)) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + /* roundNearEven needs a further check of tiebreaker since the threshold is odd */ + softfloat_raiseFlags( + softfloat_flag_overflow | softfloat_flag_inexact ); + /* If saturation mode is enabled, convert to the max value of E4M3, otherwise NaN. */ + uiZ = (saturationMode || !roundIncrement) + ? packToE4M3UI( sign, 0xF, 0x6 ) + : softfloat_commonNaNToE4M3UI(&commonNaN); + goto uiZ; + + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>4; + if ( roundBits ) { + softfloat_exceptionFlags |= softfloat_flag_inexact; +#ifdef SOFTFLOAT_ROUND_ODD + if ( roundingMode == softfloat_round_odd ) { + sig |= 1; + goto packReturn; + } +#endif + } + sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven); + if ( ! sig ) exp = 0; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + packReturn: + uiZ = packToE4M3UI( sign, exp, sig ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/s_roundPackToE5M2.c b/softfloat/s_roundPackToE5M2.c new file mode 100644 index 0000000..731c012 --- /dev/null +++ b/softfloat/s_roundPackToE5M2.c @@ -0,0 +1,117 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2025 The Regents of the University of California. All rights +reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "softfloat.h" + +float8_t softfloat_roundPackToE5M2( bool sign, int_fast16_t exp, uint_fast16_t sig, bool saturationMode ) +{ + uint_fast8_t roundingMode; + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + bool isTiny; + uint_fast8_t uiZ; + union ui8_f8 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundingMode = softfloat_roundingMode; + roundNearEven = (roundingMode == softfloat_round_near_even); + roundIncrement = 0x8; + if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { + roundIncrement = + (roundingMode + == (sign ? softfloat_round_min : softfloat_round_max)) + ? 0xF + : 0; + } + roundBits = sig & 0xF; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0x1D <= (unsigned int) exp ) { + /* Here we use the outer if condition to cover both overflow and underflow */ + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + isTiny = + (softfloat_detectTininess == softfloat_tininess_beforeRounding) + || (exp < -1) || (sig + roundIncrement < 0x80); + sig = softfloat_shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0xF; + if ( isTiny && roundBits ) { + softfloat_raiseFlags( softfloat_flag_underflow ); + } + } else if ( (0x1D < exp) || (0x80 <= sig + roundIncrement) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + softfloat_raiseFlags( + softfloat_flag_overflow | softfloat_flag_inexact ); + /* If saturation mode is enabled, convert to the max value of E5M2, otherwise Inf */ + uiZ = saturationMode + ? packToE5M2UI( sign, 0x1E, 0x3 ) + : packToE5M2UI( sign, 0x1F, 0x0 ) - ! roundIncrement; + goto uiZ; + + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>4; + if ( roundBits ) { + softfloat_exceptionFlags |= softfloat_flag_inexact; +#ifdef SOFTFLOAT_ROUND_ODD + if ( roundingMode == softfloat_round_odd ) { + sig |= 1; + goto packReturn; + } +#endif + } + sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven); + if ( ! sig ) exp = 0; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + packReturn: + uiZ = packToE5M2UI( sign, exp, sig ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index 9c57404..269434b 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -4,7 +4,7 @@ This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. -Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2025 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -122,6 +122,10 @@ float128_t ui64_to_f128( uint64_t ); #endif void ui64_to_extF80M( uint64_t, extFloat80_t * ); void ui64_to_f128M( uint64_t, float128_t * ); +bfloat16_t e4m3_to_bf16( e4m3_t ); +float16_t e4m3_to_f16( e4m3_t ); +float16_t e5m2_to_f16( e5m2_t ); +bfloat16_t e5m2_to_bf16( e5m2_t ); bfloat16_t i32_to_bf16( int32_t ); float16_t i32_to_f16( int32_t ); float32_t i32_to_f32( int32_t ); @@ -194,6 +198,8 @@ uint_fast8_t bf16_to_ui8( bfloat16_t, uint_fast8_t, bool ); uint_fast32_t bf16_to_ui32( bfloat16_t, uint_fast8_t, bool ); int_fast8_t bf16_to_i8( bfloat16_t, uint_fast8_t, bool ); int_fast32_t bf16_to_i32( bfloat16_t, uint_fast8_t, bool ); +e4m3_t bf16_to_e4m3( bfloat16_t, bool ); +e5m2_t bf16_to_e5m2( bfloat16_t, bool ); float32_t bf16_to_f32( bfloat16_t ); float64_t bf16_to_f64( bfloat16_t ); bfloat16_t bf16_add( bfloat16_t, bfloat16_t ); @@ -225,6 +231,8 @@ uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool ); uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool ); int_fast32_t f32_to_i32_r_minMag( float32_t, bool ); int_fast64_t f32_to_i64_r_minMag( float32_t, bool ); +e4m3_t f32_to_e4m3( float32_t, bool ); +e5m2_t f32_to_e5m2( float32_t, bool ); bfloat16_t f32_to_bf16( float32_t ); float16_t f32_to_f16( float32_t ); float64_t f32_to_f64( float32_t ); diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index 899f00a..44a5b81 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -1,6 +1,10 @@ softfloat_subproject_deps = softfloat_c_srcs = \ + e4m3_to_f16.c \ + e5m2_to_f16.c \ + e4m3_to_bf16.c \ + e5m2_to_bf16.c \ bf16_add.c \ bf16_div.c \ bf16_mul.c \ @@ -9,6 +13,8 @@ softfloat_c_srcs = \ bf16_sub.c \ bf16_cmp.c \ bf16_classify.c \ + bf16_to_e4m3.c \ + bf16_to_e5m2.c \ bf16_to_f32.c \ bf16_to_f64.c \ bf16_to_i8.c \ @@ -61,6 +67,8 @@ softfloat_c_srcs = \ f16_to_f128.c \ f16_to_f32.c \ f16_to_f64.c \ + f16_to_e4m3.c \ + f16_to_e5m2.c \ f16_to_i8.c \ f16_to_i16.c \ f16_to_i32.c \ @@ -89,6 +97,8 @@ softfloat_c_srcs = \ f32_roundToInt.c \ f32_sqrt.c \ f32_sub.c \ + f32_to_e4m3.c \ + f32_to_e5m2.c \ f32_to_bf16.c \ f32_to_f128.c \ f32_to_f16.c \ @@ -103,6 +113,8 @@ softfloat_c_srcs = \ f32_to_ui32_r_minMag.c \ f32_to_ui64.c \ f32_to_ui64_r_minMag.c \ + f32_to_i8.c \ + f32_to_ui8.c \ f64_add.c \ f64_classify.c \ f64_div.c \ @@ -185,6 +197,8 @@ softfloat_c_srcs = \ s_normRoundPackToF32.c \ s_normRoundPackToF64.c \ s_normSubnormalF128Sig.c \ + s_normSubnormalE4M3Sig.c \ + s_normSubnormalE5M2Sig.c \ s_normSubnormalF16Sig.c \ s_normSubnormalF32Sig.c \ s_normSubnormalF64Sig.c \ @@ -199,6 +213,8 @@ softfloat_c_srcs = \ s_roundMToUI64.c \ s_roundPackMToI64.c \ s_roundPackMToUI64.c \ + s_roundPackToE4M3.c \ + s_roundPackToE5M2.c \ s_roundPackToBF16.c \ s_roundPackToF128.c \ s_roundPackToF16.c \ diff --git a/softfloat/softfloat_types.h b/softfloat/softfloat_types.h index 34c518f..9c69623 100644 --- a/softfloat/softfloat_types.h +++ b/softfloat/softfloat_types.h @@ -4,8 +4,8 @@ This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. -Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of -California. All rights reserved. +Copyright 2011, 2012, 2013, 2014, 2015, 2017, 2025 The Regents of the University +of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -40,19 +40,27 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <stdint.h> /*---------------------------------------------------------------------------- -| Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point -| arguments and results to/from functions. These types must be exactly +| Types used to pass 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit floating-point +| arguments and results to/from functions. These types must be exactly 8 bits, | 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively. Where a | platform has "native" support for IEEE-Standard floating-point formats, | the types below may, if desired, be defined as aliases for the native types | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ +typedef struct { uint8_t v; } float8_t; typedef struct { uint16_t v; } float16_t; typedef float16_t bfloat16_t; typedef struct { uint32_t v; } float32_t; typedef struct { uint64_t v; } float64_t; typedef struct { uint64_t v[2]; } float128_t; + +/*---------------------------------------------------------------------------- +| OCP 8-bit floating-point (OFP8) types. +*----------------------------------------------------------------------------*/ +typedef float8_t e4m3_t; +typedef float8_t e5m2_t; + /*---------------------------------------------------------------------------- | The format of an 80-bit extended floating-point number in memory. This | structure must contain a 16-bit field named 'signExp' and a 64-bit field diff --git a/softfloat/specialize.h b/softfloat/specialize.h index adbc081..7236720 100644 --- a/softfloat/specialize.h +++ b/softfloat/specialize.h @@ -4,8 +4,8 @@ This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. -Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of -California. All rights reserved. +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2025 The Regents of the University +of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -94,6 +94,12 @@ extern "C" { struct commonNaN { char _unused; }; /*---------------------------------------------------------------------------- +| The bit pattern for a default generated 8-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNE4M3 0x7F +#define defaultNaNE5M2 0x7F + +/*---------------------------------------------------------------------------- | The bit pattern for a default generated 16-bit floating-point NaN. *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0x7E00 @@ -118,6 +124,14 @@ struct commonNaN { char _unused; }; #define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF)) /*---------------------------------------------------------------------------- +| Assuming `uiA' has the bit pattern of a 8-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by `zPtr'. +*----------------------------------------------------------------------------*/ +#define softfloat_E4M3UIToCommonNaN( uiA, zPtr ) (void) (uiA), (void) (zPtr) +#define softfloat_E5M2UIToCommonNaN( uiA, zPtr ) (void) (uiA), (void) (zPtr) + +/*---------------------------------------------------------------------------- | Assuming `uiA' has the bit pattern of a 16-bit floating-point NaN, converts | this NaN to the common NaN form, and stores the resulting common NaN at the | location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid @@ -137,6 +151,13 @@ struct commonNaN { char _unused; }; | Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. *----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToE4M3UI( aPtr ) ((uint_fast8_t) defaultNaNE4M3) +#define softfloat_commonNaNToE5M2UI( aPtr ) ((uint_fast8_t) defaultNaNE5M2) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ #define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) /*---------------------------------------------------------------------------- @@ -146,6 +167,11 @@ struct commonNaN { char _unused; }; #define softfloat_commonNaNToF16UI( aPtr ) ((uint_fast16_t) defaultNaNF16UI) /*---------------------------------------------------------------------------- +| The bit pattern for a default generated BF16 NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + +/*---------------------------------------------------------------------------- | Interpreting `uiA' and `uiB' as the bit patterns of two 16-bit floating- | point values, at least one of which is a NaN, returns the bit pattern of | the combined NaN result. If either `uiA' or `uiB' has the pattern of a diff --git a/spike_main/spike.cc b/spike_main/spike.cc index b8a1b5c..5617a82 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -71,6 +71,7 @@ static void help(int exit_code = 1) fprintf(stderr, " --real-time-clint Increment clint time at real-time rate\n"); fprintf(stderr, " --triggers=<n> Number of supported triggers [default 4]\n"); fprintf(stderr, " --dm-progsize=<words> Progsize for the debug module [default 2]\n"); + fprintf(stderr, " --dm-datacount=<n> Number of data registers available for the debug module [default 2]\n"); fprintf(stderr, " --dm-sba=<bits> Debug system bus access supports up to " "<bits> wide accesses [default 0]\n"); fprintf(stderr, " --dm-auth Debug module requires debugger to authenticate\n"); @@ -83,6 +84,7 @@ static void help(int exit_code = 1) fprintf(stderr, " --dm-no-abstract-fpr Debug module won't support abstract FPR access\n"); fprintf(stderr, " --dm-no-halt-groups Debug module won't support halt groups\n"); fprintf(stderr, " --dm-no-impebreak Debug module won't support implicit ebreak in program buffer\n"); + fprintf(stderr, " --dm-no-abstractauto Debug module won't support the abstractauto register\n"); fprintf(stderr, " --blocksz=<size> Cache block size (B) for CMO operations(powers of 2) [default 64]\n"); fprintf(stderr, " --instructions=<n> Stop after n instructions\n"); @@ -413,6 +415,8 @@ int main(int argc, char** argv) }); parser.option(0, "dm-progsize", 1, [&](const char* s){dm_config.progbufsize = atoul_safe(s);}); + parser.option(0, "dm-datacount", 1, + [&](const char* s){dm_config.datacount = atoul_safe(s);}); parser.option(0, "dm-no-impebreak", 0, [&](const char UNUSED *s){dm_config.support_impebreak = false;}); parser.option(0, "dm-sba", 1, @@ -431,6 +435,8 @@ int main(int argc, char** argv) [&](const char UNUSED *s){dm_config.support_abstract_fpr_access = false;}); parser.option(0, "dm-no-halt-groups", 0, [&](const char UNUSED *s){dm_config.support_haltgroups = false;}); + parser.option(0, "dm-no-abstractauto", 0, + [&](const char UNUSED *s){dm_config.support_abstractauto = false;}); parser.option(0, "log-commits", 0, [&](const char UNUSED *s){log_commits = true;}); parser.option(0, "log", 1, @@ -451,6 +457,7 @@ int main(int argc, char** argv) min_blocksz, max_blocksz); exit(-1); } + cfg.cache_blocksz = blocksz; }); parser.option(0, "instructions", 1, [&](const char* s){ instructions = strtoull(s, 0, 0); @@ -541,7 +548,6 @@ int main(int argc, char** argv) if (dc) s.get_core(i)->get_mmu()->register_memtracer(&*dc); for (auto e : extensions) s.get_core(i)->register_extension(e()); - s.get_core(i)->get_mmu()->set_cache_blocksz(blocksz); } s.set_debug(debug); |
