diff options
87 files changed, 1606 insertions, 720 deletions
diff --git a/Makefile.in b/Makefile.in index d2d40e1..66d087b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -93,7 +93,7 @@ VPATH := $(addprefix $(src_dir)/, $(sprojs_enabled)) # These all appear on the command line, from lowest precedence to # highest. -default-CFLAGS := -DPREFIX=\"$(prefix)\" -Wall -Wno-unused -Wno-nonportable-include-path -g -O2 -fPIC +default-CFLAGS := -DPREFIX=\"$(prefix)\" -Wall -Wno-nonportable-include-path -g -O2 -fPIC default-CXXFLAGS := $(default-CFLAGS) -std=c++2a mcppbs-CPPFLAGS := @CPPFLAGS@ @@ -78,6 +78,7 @@ Spike supports the following RISC-V ISA features: - Zicond extension, v1.0 - Zilsd extension, v1.0 - Zclsd extension, v1.0 + - Zimop extension, v1.0 Versioning and APIs ------------------- diff --git a/ci-tests/atomics.c b/ci-tests/atomics.c new file mode 100644 index 0000000..ece5a38 --- /dev/null +++ b/ci-tests/atomics.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <stdatomic.h> + +atomic_int acnt = 0; +atomic_int bcnt = 0; + +int foo() { + for(int n = 0; n < 1000; ++n) { + ++acnt; + if(acnt % 10 == 0) + ++bcnt; + } + return acnt; +} + +int main(void) { + int acnt = foo(); + printf("First atomic counter is %u, second is %u\n", acnt, bcnt); + return 0; +} diff --git a/ci-tests/build-spike b/ci-tests/build-spike index 8774b5e..0a1b315 100755 --- a/ci-tests/build-spike +++ b/ci-tests/build-spike @@ -8,7 +8,7 @@ rm -rf build mkdir build cd build mkdir install -CXXFLAGS="-Wnon-virtual-dtor" CFLAGS="-Werror -Wignored-qualifiers -Wunused-function -Wunused-parameter -Wunused-variable" $DIR/../configure --prefix=`pwd`/install +CXXFLAGS="-Wnon-virtual-dtor" CFLAGS="-Werror -Wall -Wextra -Wvla" $DIR/../configure --prefix=`pwd`/install make -j"$(nproc 2> /dev/null || sysctl -n hw.ncpu)" make check make install install-hdrs-list.h diff --git a/ci-tests/create-ci-binary-tarball b/ci-tests/create-ci-binary-tarball index 73a549e..1080d0a 100755 --- a/ci-tests/create-ci-binary-tarball +++ b/ci-tests/create-ci-binary-tarball @@ -20,10 +20,16 @@ mkdir -p build/dummycsr && cd "$_" riscv64-unknown-elf-gcc -O2 -o customcsr `git rev-parse --show-toplevel`/ci-tests/customcsr.c cd - +mkdir -p build/atomics && cd "$_" +riscv64-unknown-elf-gcc -O2 -o atomics `git rev-parse --show-toplevel`/ci-tests/atomics.c +cd - + + mv build/pk/pk . mv build/hello/hello . mv build/dummy-slliuw/dummy-slliuw . mv build/dummycsr/customcsr . -tar -cf spike-ci.tar pk hello dummy-slliuw customcsr +mv build/atomics/atomics . +tar -cf spike-ci.tar pk hello dummy-slliuw customcsr atomics -rm pk hello dummy-slliuw customcsr +rm pk hello dummy-slliuw customcsr atomics diff --git a/ci-tests/custom-csr.cc b/ci-tests/custom-csr.cc index 90fef01..857c9c3 100644 --- a/ci-tests/custom-csr.cc +++ b/ci-tests/custom-csr.cc @@ -20,15 +20,15 @@ class dummycsr_t: public csr_t { // dummy extension with dummy CSRs. Nice. struct xdummycsr_t : public extension_t { - const char *name() { return "dummycsr"; } + const char *name() const override { return "dummycsr"; } xdummycsr_t() {} - std::vector<insn_desc_t> get_instructions() override { + std::vector<insn_desc_t> get_instructions(const processor_t &) override { return {}; } - std::vector<disasm_insn_t *> get_disasms() override { + std::vector<disasm_insn_t *> get_disasms(const processor_t *) override { return {}; } diff --git a/ci-tests/test-customext.cc b/ci-tests/test-customext.cc index acbb066..77c739f 100644 --- a/ci-tests/test-customext.cc +++ b/ci-tests/test-customext.cc @@ -26,11 +26,11 @@ static reg_t do_nop4([[maybe_unused]] processor_t *p, // dummy extension that uses the same prefix as standard zba extension struct xslliuw_dummy_t : public extension_t { - const char *name() { return "dummyslliuw"; } + const char *name() const { return "dummyslliuw"; } xslliuw_dummy_t() {} - std::vector<insn_desc_t> get_instructions() { + std::vector<insn_desc_t> get_instructions(const processor_t &) { std::vector<insn_desc_t> insns; insns.push_back(insn_desc_t{MATCH_SLLI_UW, MASK_SLLI_UW, do_nop4, do_nop4, do_nop4, do_nop4, do_nop4, do_nop4, do_nop4, @@ -38,7 +38,7 @@ struct xslliuw_dummy_t : public extension_t { return insns; } - std::vector<disasm_insn_t *> get_disasms() { + std::vector<disasm_insn_t *> get_disasms(const processor_t *) { std::vector<disasm_insn_t *> insns; insns.push_back(new disasm_insn_t("dummy_slliuw", MATCH_SLLI_UW, MASK_SLLI_UW, {&xrd, &xrs1, &shamt})); diff --git a/ci-tests/test-spike b/ci-tests/test-spike index 36b748a..6fe5bdb 100755 --- a/ci-tests/test-spike +++ b/ci-tests/test-spike @@ -11,6 +11,7 @@ cd run wget https://github.com/riscv-software-src/riscv-isa-sim/releases/download/dummy-tag-for-ci-storage/spike-ci.tar tar xf spike-ci.tar time ../install/bin/spike --isa=rv64gc pk hello | grep "Hello, world! Pi is approximately 3.141588." +../install/bin/spike --log-commits --isa=rv64gc pk atomics | grep "First atomic counter is 1000, second is 100" # check that including sim.h in an external project works g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o test-libriscv diff --git a/customext/cflush.cc b/customext/cflush.cc index 485716a..c090e88 100644 --- a/customext/cflush.cc +++ b/customext/cflush.cc @@ -19,23 +19,23 @@ static reg_t custom_cflush(processor_t* p, insn_t insn, reg_t pc) class cflush_t : public extension_t { public: - const char* name() { return "cflush"; } + const char* name() const override { return "cflush"; } cflush_t() {} - std::vector<insn_desc_t> get_instructions() { - std::vector<insn_desc_t> insns; - insns.push_back((insn_desc_t){0xFC000073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}); - insns.push_back((insn_desc_t){0xFC200073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}); - insns.push_back((insn_desc_t){0xFC100073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}); + std::vector<insn_desc_t> get_instructions(const processor_t &) override { + std::vector<insn_desc_t> insns = { + {0xFC000073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}, + {0xFC200073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}, + {0xFC100073, 0xFFF07FFF, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush, custom_cflush}}; return insns; } - std::vector<disasm_insn_t*> get_disasms() { - std::vector<disasm_insn_t*> insns; - insns.push_back(new disasm_insn_t("cflush.d.l1", 0xFC000073, 0xFFF07FFF, {&xrs1})); - insns.push_back(new disasm_insn_t("cdiscard.d.l1", 0xFC200073, 0xFFF07FFF, {&xrs1})); - insns.push_back(new disasm_insn_t("cflush.i.l1", 0xFC100073, 0xFFF07FFF, {&xrs1})); + std::vector<disasm_insn_t *> get_disasms(const processor_t *) override { + std::vector<disasm_insn_t*> insns = { + new disasm_insn_t("cflush.d.l1", 0xFC000073, 0xFFF07FFF, {&xrs1}), + new disasm_insn_t("cdiscard.d.l1", 0xFC200073, 0xFFF07FFF, {&xrs1}), + new disasm_insn_t("cflush.i.l1", 0xFC100073, 0xFFF07FFF, {&xrs1})}; return insns; } }; diff --git a/customext/dummy_rocc.cc b/customext/dummy_rocc.cc index 8c051fa..6669887 100644 --- a/customext/dummy_rocc.cc +++ b/customext/dummy_rocc.cc @@ -5,14 +5,14 @@ class dummy_rocc_t : public rocc_t { public: - const char* name() { return "dummy_rocc"; } + const char* name() const { return "dummy_rocc"; } - reg_t custom0(rocc_insn_t insn, reg_t xs1, reg_t UNUSED xs2) + reg_t custom0(processor_t *p, rocc_insn_t insn, reg_t xs1, reg_t UNUSED xs2) { reg_t prev_acc = acc[insn.rs2]; if (insn.rs2 >= num_acc) - illegal_instruction(); + illegal_instruction(*p); switch (insn.funct) { @@ -28,7 +28,7 @@ class dummy_rocc_t : public rocc_t acc[insn.rs2] += xs1; break; default: - illegal_instruction(); + illegal_instruction(*p); } return prev_acc; // in all cases, xd <- previous value of acc[rs2] diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 63fd12a..49f2794 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -706,26 +706,6 @@ static void NOINLINE add_sfence_insn(disassembler_t* d, const char* name, uint32 d->add_insn(new disasm_insn_t(name, match, mask, {&xrs1, &xrs2})); } -static void NOINLINE add_pitype3_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) -{ - d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs1, &p_imm3})); -} - -static void NOINLINE add_pitype4_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) -{ - d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs1, &p_imm4})); -} - -static void NOINLINE add_pitype5_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) -{ - d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs1, &p_imm5})); -} - -static void NOINLINE add_pitype6_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) -{ - d->add_insn(new disasm_insn_t(name, match, mask, {&xrd, &xrs1, &p_imm6})); -} - static void NOINLINE add_vector_v_insn(disassembler_t* d, const char* name, uint32_t match, uint32_t mask) { d->add_insn(new disasm_insn_t(name, match, mask, {&vd, &vs2, opt, &vm})); diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index bd2bbc9..24eb5f2 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -1,11 +1,12 @@ #include "isa_parser.h" +#include <cstring> #include <stdexcept> static std::string strtolower(const char* str) { - std::string res; - for (const char *r = str; *r; r++) - res += std::tolower(*r); + std::string res(str); + for (char &c : res) + c = std::tolower(c); return res; } @@ -69,7 +70,7 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // G = IMAFD_Zicsr_Zifencei, but Spike includes the latter two // unconditionally, so they need not be explicitly added here. isa_string = isa_string.substr(0, 4) + "imafd" + isa_string.substr(5); - // Fall through + [[fallthrough]]; case 'i': extension_table['I'] = true; break; @@ -97,9 +98,9 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) switch (*p) { case 'v': vlen = 128; elen = 64; zvf = true; zvd = true; - // even rv32iv implies double float + [[fallthrough]]; case 'q': extension_table['D'] = true; - // Fall through + [[fallthrough]]; case 'd': extension_table['F'] = true; } extension_table[toupper(*p)] = true; @@ -139,6 +140,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) // HINTs encoded in base-ISA instructions are always present. } else if (ext_str == "zihintntl") { // HINTs encoded in base-ISA instructions are always present. + } else if (ext_str == "ziccid") { + extension_table[EXT_ZICCID] = true; + } else if (ext_str == "ziccif") { + // aligned instruction fetch is always atomic in Spike } else if (ext_str == "zaamo") { extension_table[EXT_ZAAMO] = true; } else if (ext_str == "zalrsc") { @@ -330,6 +335,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SSCSRIND] = true; } else if (ext_str == "smcntrpmf") { extension_table[EXT_SMCNTRPMF] = true; + } else if (ext_str == "smcdeleg") { + extension_table[EXT_SMCDELEG] = true; + } else if (ext_str == "ssccfg") { + extension_table[EXT_SSCCFG] = true; } else if (ext_str == "zimop") { extension_table[EXT_ZIMOP] = true; } else if (ext_str == "zcmop") { @@ -381,6 +390,14 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_SSDBLTRP] = true; } else if (ext_str == "smdbltrp") { extension_table[EXT_SMDBLTRP] = true; + } else if (ext_str == "smaia") { + extension_table[EXT_SMAIA] = true; + extension_table[EXT_SSAIA] = true; + extension_table[EXT_SMCSRIND] = true; + extension_table[EXT_SSCSRIND] = true; + } else if (ext_str == "ssaia") { + extension_table[EXT_SSAIA] = true; + extension_table[EXT_SSCSRIND] = true; } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/fdt/fdt.mk.in b/fdt/fdt.mk.in index 64d06ac..accc080 100644 --- a/fdt/fdt.mk.in +++ b/fdt/fdt.mk.in @@ -16,4 +16,4 @@ fdt_c_srcs = \ fdt_addresses.c \ fdt_overlay.c \ -fdt_CFLAGS = -I$(src_dir)/fdt +fdt_CFLAGS = -I$(src_dir)/fdt -Wno-sign-compare diff --git a/fesvr/byteorder.h b/fesvr/byteorder.h index 71ce515..2147f96 100644 --- a/fesvr/byteorder.h +++ b/fesvr/byteorder.h @@ -16,8 +16,8 @@ static inline int32_t swap(int32_t n) { return int32_t(swap(uint32_t(n))); } static inline int64_t swap(int64_t n) { return int64_t(swap(uint64_t(n))); } #ifdef HAVE_INT128 -typedef __int128 int128_t; -typedef unsigned __int128 uint128_t; +__extension__ typedef __int128 int128_t; +__extension__ typedef unsigned __int128 uint128_t; static inline uint128_t swap(uint128_t n) { return (uint128_t(swap(uint64_t(n))) << 64) | swap(uint64_t(n >> 64)); } static inline int128_t swap(int128_t n) { return int128_t(swap(uint128_t(n))); } #endif diff --git a/fesvr/dtm.cc b/fesvr/dtm.cc index 0f810e7..a0c3254 100644 --- a/fesvr/dtm.cc +++ b/fesvr/dtm.cc @@ -51,6 +51,9 @@ } \ } +#define MAX_DATA_WORDS (1 << DM_ABSTRACTCS_DATACOUNT_LENGTH) +#define MAX_PROG_WORDS (1 << DM_ABSTRACTCS_PROGBUFSIZE_LENGTH) + uint32_t dtm_t::do_command(dtm_t::req r) { req_buf = r; @@ -61,17 +64,20 @@ uint32_t dtm_t::do_command(dtm_t::req r) uint32_t dtm_t::read(uint32_t addr) { - return do_command((req){addr, 1, 0}); + req r = {addr, 1, 0}; + return do_command(r); } uint32_t dtm_t::write(uint32_t addr, uint32_t data) { - return do_command((req){addr, 2, data}); + req r = {addr, 2, data}; + return do_command(r); } void dtm_t::nop() { - do_command((req){0, 0, 0}); + req r = {0, 0, 0}; + do_command(r); } void dtm_t::select_hart(int hartsel) { @@ -104,7 +110,7 @@ void dtm_t::halt(int hartsel) read(DM_DMSTATUS); } - int dmcontrol = DM_DMCONTROL_HALTREQ | DM_DMCONTROL_DMACTIVE; + reg_t dmcontrol = DM_DMCONTROL_HALTREQ | DM_DMCONTROL_DMACTIVE; dmcontrol = set_field(dmcontrol, DM_DMCONTROL_HASEL, hartsel); write(DM_DMCONTROL, dmcontrol); int dmstatus; @@ -142,7 +148,7 @@ void dtm_t::resume(int hartsel) uint64_t dtm_t::save_reg(unsigned regno) { - uint32_t data[xlen/(8*4)]; + uint32_t data[MAX_DATA_WORDS]; uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_SIZE(xlen) | AC_AR_REGNO(regno); RUN_AC_OR_DIE(command, 0, 0, data, xlen / (8*4)); @@ -155,7 +161,7 @@ uint64_t dtm_t::save_reg(unsigned regno) void dtm_t::restore_reg(unsigned regno, uint64_t val) { - uint32_t data[xlen/(8*4)]; + uint32_t data[MAX_DATA_WORDS]; data[0] = (uint32_t) val; if (xlen > 32) { data[1] = (uint32_t) (val >> 32); @@ -174,8 +180,8 @@ uint32_t dtm_t::run_abstract_command(uint32_t command, const uint32_t program[], size_t program_n, uint32_t data[], size_t data_n) { - assert(program_n <= ram_words); - assert(data_n <= data_words); + assert(program_n <= MAX_PROG_WORDS); + assert(data_n <= MAX_DATA_WORDS); for (size_t i = 0; i < program_n; i++) { write(DM_PROGBUF0 + i, program[i]); @@ -214,8 +220,8 @@ size_t dtm_t::chunk_align() void dtm_t::read_chunk(uint64_t taddr, size_t len, void* dst) { - uint32_t prog[ram_words]; - uint32_t data[data_words]; + uint32_t prog[MAX_PROG_WORDS]; + uint32_t data[MAX_DATA_WORDS]; uint8_t * curr = (uint8_t*) dst; @@ -267,8 +273,8 @@ void dtm_t::read_chunk(uint64_t taddr, size_t len, void* dst) void dtm_t::write_chunk(uint64_t taddr, size_t len, const void* src) { - uint32_t prog[ram_words]; - uint32_t data[data_words]; + uint32_t prog[MAX_PROG_WORDS]; + uint32_t data[MAX_DATA_WORDS]; const uint8_t * curr = (const uint8_t*) src; @@ -362,8 +368,8 @@ void dtm_t::die(uint32_t cmderr) void dtm_t::clear_chunk(uint64_t taddr, size_t len) { - uint32_t prog[ram_words]; - uint32_t data[data_words]; + uint32_t prog[MAX_PROG_WORDS]; + uint32_t data[MAX_DATA_WORDS]; halt(current_hart); uint64_t s0 = save_reg(S0); @@ -477,8 +483,8 @@ uint32_t dtm_t::get_xlen() uint32_t command = AC_ACCESS_REGISTER_TRANSFER | AC_AR_REGNO(S0); uint32_t cmderr; - const uint32_t prog[] = {}; - uint32_t data[] = {}; + const uint32_t prog[1] = {}; + uint32_t data[1] = {}; cmderr = run_abstract_command(command | AC_AR_SIZE(128), prog, 0, data, 0); if (cmderr == 0){ @@ -560,11 +566,6 @@ void dtm_t::producer_thread() // Poll until the debugger agrees it's enabled. while ((read(DM_DMCONTROL) & DM_DMCONTROL_DMACTIVE) == 0) ; - // These are checked every time we run an abstract command. - uint32_t abstractcs = read(DM_ABSTRACTCS); - ram_words = get_field(abstractcs, DM_ABSTRACTCS_PROGBUFSIZE); - data_words = get_field(abstractcs, DM_ABSTRACTCS_DATACOUNT); - // These things are only needed for the 'modify_csr' function. // That could be re-written to not use these at some performance // overhead. diff --git a/fesvr/dtm.h b/fesvr/dtm.h index f47b648..03c2f79 100644 --- a/fesvr/dtm.h +++ b/fesvr/dtm.h @@ -109,8 +109,6 @@ class dtm_t : public htif_t static const int max_idle_cycles = 10000; - size_t ram_words; - size_t data_words; int num_harts; int current_hart; diff --git a/fesvr/htif.cc b/fesvr/htif.cc index a2477c1..15f79bf 100644 --- a/fesvr/htif.cc +++ b/fesvr/htif.cc @@ -65,14 +65,14 @@ htif_t::htif_t(int argc, char** argv) : htif_t() htif_t::htif_t(const std::vector<std::string>& args) : htif_t() { int argc = args.size() + 1; - char * argv[argc]; + std::vector<char*>argv(argc); argv[0] = (char *) "htif"; for (unsigned int i = 0; i < args.size(); i++) { argv[i+1] = (char *) args[i].c_str(); } //Set line size as 16 by default. line_size = 16; - parse_arguments(argc, argv); + parse_arguments(argc, &argv[0]); register_devices(); } @@ -158,11 +158,9 @@ void htif_t::load_symbols(std::map<std::string, uint64_t>& symbols) { class nop_memif_t : public memif_t { public: - nop_memif_t(htif_t* htif) : memif_t(htif), htif(htif) {} + nop_memif_t(htif_t* htif) : memif_t(htif) {} void read(addr_t UNUSED addr, size_t UNUSED len, void UNUSED *bytes) override {} void write(addr_t UNUSED taddr, size_t UNUSED len, const void UNUSED *src) override {} - private: - htif_t* htif; } nop_memif(this); reg_t nop_entry; @@ -253,11 +251,10 @@ void htif_t::stop() void htif_t::clear_chunk(addr_t taddr, size_t len) { - char zeros[chunk_max_size()]; - memset(zeros, 0, chunk_max_size()); + std::vector<uint8_t> zeros(chunk_max_size(), 0); for (size_t pos = 0; pos < len; pos += chunk_max_size()) - write_chunk(taddr + pos, std::min(len - pos, chunk_max_size()), zeros); + write_chunk(taddr + pos, std::min(len - pos, chunk_max_size()), &zeros[0]); } int htif_t::run() diff --git a/fesvr/memif.cc b/fesvr/memif.cc index e56bd94..59938b9 100644 --- a/fesvr/memif.cc +++ b/fesvr/memif.cc @@ -12,10 +12,10 @@ void memif_t::read(addr_t addr, size_t len, void* bytes) if (len && (addr & (align-1))) { size_t this_len = std::min(len, align - size_t(addr & (align-1))); - uint8_t chunk[align]; + std::vector<uint8_t> chunk(align); - cmemif->read_chunk(addr & ~(align-1), align, chunk); - memcpy(bytes, chunk + (addr & (align-1)), this_len); + cmemif->read_chunk(addr & ~(align-1), align, &chunk[0]); + memcpy(bytes, &chunk[addr & (align-1)], this_len); bytes = (char*)bytes + this_len; addr += this_len; @@ -26,10 +26,10 @@ void memif_t::read(addr_t addr, size_t len, void* bytes) { size_t this_len = len & (align-1); size_t start = len - this_len; - uint8_t chunk[align]; + std::vector<uint8_t> chunk(align); - cmemif->read_chunk(addr + start, align, chunk); - memcpy((char*)bytes + start, chunk, this_len); + cmemif->read_chunk(addr + start, align, &chunk[0]); + memcpy((char*)bytes + start, &chunk[0], this_len); len -= this_len; } @@ -45,11 +45,11 @@ void memif_t::write(addr_t addr, size_t len, const void* bytes) if (len && (addr & (align-1))) { size_t this_len = std::min(len, align - size_t(addr & (align-1))); - uint8_t chunk[align]; + std::vector<uint8_t> chunk(align); - cmemif->read_chunk(addr & ~(align-1), align, chunk); - memcpy(chunk + (addr & (align-1)), bytes, this_len); - cmemif->write_chunk(addr & ~(align-1), align, chunk); + cmemif->read_chunk(addr & ~(align-1), align, &chunk[0]); + memcpy(&chunk[addr & (align-1)], bytes, this_len); + cmemif->write_chunk(addr & ~(align-1), align, &chunk[0]); bytes = (char*)bytes + this_len; addr += this_len; @@ -60,11 +60,11 @@ void memif_t::write(addr_t addr, size_t len, const void* bytes) { size_t this_len = len & (align-1); size_t start = len - this_len; - uint8_t chunk[align]; + std::vector<uint8_t> chunk(align); - cmemif->read_chunk(addr + start, align, chunk); - memcpy(chunk, (char*)bytes + start, this_len); - cmemif->write_chunk(addr + start, align, chunk); + cmemif->read_chunk(addr + start, align, &chunk[0]); + memcpy(&chunk[0], (char*)bytes + start, this_len); + cmemif->write_chunk(addr + start, align, &chunk[0]); len -= this_len; } diff --git a/fesvr/syscall.cc b/fesvr/syscall.cc index f848126..014a468 100644 --- a/fesvr/syscall.cc +++ b/fesvr/syscall.cc @@ -114,19 +114,23 @@ struct riscv_statx attributes_mask(htif->to_target<uint64_t>(s.stx_attributes_mask)), atime { htif->to_target<int64_t>(s.stx_atime.tv_sec), - htif->to_target<uint32_t>(s.stx_atime.tv_nsec) + htif->to_target<uint32_t>(s.stx_atime.tv_nsec), + htif->to_target<int32_t>(0) }, btime { htif->to_target<int64_t>(s.stx_btime.tv_sec), - htif->to_target<uint32_t>(s.stx_btime.tv_nsec) + htif->to_target<uint32_t>(s.stx_btime.tv_nsec), + htif->to_target<int32_t>(0) }, ctime { htif->to_target<int64_t>(s.stx_ctime.tv_sec), - htif->to_target<uint32_t>(s.stx_ctime.tv_nsec) + htif->to_target<uint32_t>(s.stx_ctime.tv_nsec), + htif->to_target<int32_t>(0) }, mtime { htif->to_target<int64_t>(s.stx_mtime.tv_sec), - htif->to_target<uint32_t>(s.stx_mtime.tv_nsec) + htif->to_target<uint32_t>(s.stx_mtime.tv_nsec), + htif->to_target<int32_t>(0) }, rdev_major(htif->to_target<uint32_t>(s.stx_rdev_major)), rdev_minor(htif->to_target<uint32_t>(s.stx_rdev_minor)), diff --git a/riscv/abstract_device.h b/riscv/abstract_device.h index 0726cd7..d8ddbab 100644 --- a/riscv/abstract_device.h +++ b/riscv/abstract_device.h @@ -16,6 +16,7 @@ class abstract_device_t { public: virtual bool load(reg_t addr, size_t len, uint8_t* bytes) = 0; virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) = 0; + virtual reg_t size() = 0; virtual ~abstract_device_t() {} virtual void tick(reg_t UNUSED rtc_ticks) {} }; diff --git a/riscv/cfg.cc b/riscv/cfg.cc index 2f9a229..cc39a54 100644 --- a/riscv/cfg.cc +++ b/riscv/cfg.cc @@ -47,4 +47,5 @@ cfg_t::cfg_t() explicit_hartids = false; real_time_clint = false; trigger_count = 4; + cache_blocksz = 64; } diff --git a/riscv/cfg.h b/riscv/cfg.h index c972f03..8032856 100644 --- a/riscv/cfg.h +++ b/riscv/cfg.h @@ -6,6 +6,7 @@ #include <vector> #include "decode.h" #include <cassert> +class abstract_sim_if_t; typedef enum { endianness_little, @@ -77,6 +78,8 @@ public: bool explicit_hartids; bool real_time_clint; reg_t trigger_count; + reg_t cache_blocksz; + std::optional<abstract_sim_if_t*> external_simulator; size_t nprocs() const { return hartids.size(); } size_t max_hartid() const { return hartids.back(); } diff --git a/riscv/clint.cc b/riscv/clint.cc index 208ea0e..3d5c984 100644 --- a/riscv/clint.cc +++ b/riscv/clint.cc @@ -39,7 +39,8 @@ bool clint_t::load(reg_t addr, size_t len, uint8_t* bytes) tick(0); - if (addr >= MSIP_BASE && addr < MTIMECMP_BASE) { + static_assert(MSIP_BASE == 0); + if (/* addr >= MSIP_BASE && */ addr < MTIMECMP_BASE) { if (len == 8) { // Implement double-word loads as a pair of word loads return load(addr, 4, bytes) && load(addr + 4, 4, bytes + 4); @@ -68,7 +69,8 @@ bool clint_t::store(reg_t addr, size_t len, const uint8_t* bytes) if (len > 8) return false; - if (addr >= MSIP_BASE && addr < MTIMECMP_BASE) { + static_assert(MSIP_BASE == 0); + if (/* addr >= MSIP_BASE && */ addr < MTIMECMP_BASE) { if (len == 8) { // Implement double-word stores as a pair of word stores return store(addr, 4, bytes) && store(addr + 4, 4, bytes + 4); @@ -117,7 +119,7 @@ void clint_t::tick(reg_t rtc_ticks) } clint_t* clint_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, - const std::vector<std::string>& UNUSED sargs) { + const std::vector<std::string>& sargs UNUSED) { if (fdt_parse_clint(fdt, base, "riscv,clint0") == 0 || fdt_parse_clint(fdt, base, "sifive,clint0") == 0) return new clint_t(sim, sim->CPU_HZ / sim->INSNS_PER_RTC_TICK, @@ -126,7 +128,7 @@ clint_t* clint_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, return nullptr; } -std::string clint_generate_dts(const sim_t* sim, const std::vector<std::string>& UNUSED sargs) { +std::string clint_generate_dts(const sim_t* sim, const std::vector<std::string>& sargs UNUSED) { std::stringstream s; s << std::hex << " clint@" << CLINT_BASE << " {\n" diff --git a/riscv/csr_init.cc b/riscv/csr_init.cc index a03d188..0acd1c7 100644 --- a/riscv/csr_init.cc +++ b/riscv/csr_init.cc @@ -12,6 +12,24 @@ void state_t::add_csr(reg_t addr, const csr_t_p& csr) #define add_supervisor_csr(addr, csr) add_const_ext_csr('S', addr, csr) #define add_hypervisor_csr(addr, csr) add_ext_csr('H', addr, csr) +void state_t::add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg) +{ + // This assumes xlen is always max_xlen, which is true today (see + // mstatus_csr_t::unlogged_write()): + auto xlen = proc->get_isa().get_max_xlen(); + + const reg_t iprio0_addr = 0x30; + for (int i=0; i<16; i+=2) { + csr_t_p iprio = std::make_shared<aia_csr_t>(proc, iprio0_addr + i, 0, 0); + if (xlen == 32) { + ireg->add_ireg_proxy(iprio0_addr + i, std::make_shared<rv32_low_csr_t>(proc, iprio0_addr + i, iprio)); + ireg->add_ireg_proxy(iprio0_addr + i + 1, std::make_shared<rv32_high_csr_t>(proc, iprio0_addr + i + 1, iprio)); + } else { + ireg->add_ireg_proxy(iprio0_addr + i, iprio); + } + } +} + void state_t::csr_init(processor_t* const proc, reg_t max_isa) { // This assumes xlen is always max_xlen, which is true today (see @@ -87,8 +105,17 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) } } add_const_ext_csr(EXT_SSCOFPMF, CSR_SCOUNTOVF, std::make_shared<scountovf_csr_t>(proc, CSR_SCOUNTOVF)); - add_csr(CSR_MIE, mie = std::make_shared<mie_csr_t>(proc, CSR_MIE)); - add_csr(CSR_MIP, mip = std::make_shared<mip_csr_t>(proc, CSR_MIP)); + mie = std::make_shared<mie_csr_t>(proc, CSR_MIE); + mip = std::make_shared<mip_csr_t>(proc, CSR_MIP); + if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) { + add_csr(CSR_MIE, std::make_shared<rv32_low_csr_t>(proc, CSR_MIE, mie)); + add_csr(CSR_MIEH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIEH, mie)); + add_csr(CSR_MIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MIP, mip)); + add_csr(CSR_MIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIPH, mip)); + } else { + add_csr(CSR_MIE, mie); + add_csr(CSR_MIP, mip); + } auto sip_sie_accr = std::make_shared<generic_int_accessor_t>( this, ~MIP_HS_MASK, // read_mask @@ -116,21 +143,49 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) 1 // shiftamt ); - auto nonvirtual_sip = std::make_shared<mip_proxy_csr_t>(proc, CSR_SIP, sip_sie_accr); + nonvirtual_sip = std::make_shared<sip_csr_t>(proc, CSR_SIP, sip_sie_accr); auto vsip = std::make_shared<mip_proxy_csr_t>(proc, CSR_VSIP, vsip_vsie_accr); - add_hypervisor_csr(CSR_VSIP, vsip); - add_supervisor_csr(CSR_SIP, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip)); + auto sip = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_VSIP, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIP, vsip)); + add_hypervisor_csr(CSR_VSIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIPH, vsip)); + add_supervisor_csr(CSR_SIP, std::make_shared<rv32_low_csr_t>(proc, CSR_SIP, sip)); + add_supervisor_csr(CSR_SIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIPH, sip)); + } else { + add_hypervisor_csr(CSR_VSIP, vsip); + add_supervisor_csr(CSR_SIP, sip); + } add_hypervisor_csr(CSR_HIP, std::make_shared<mip_proxy_csr_t>(proc, CSR_HIP, hip_hie_accr)); - add_hypervisor_csr(CSR_HVIP, hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0)); + hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_HVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIP, hvip)); + add_hypervisor_csr(CSR_HVIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HVIPH, hvip)); + } else { + add_hypervisor_csr(CSR_HVIP, hvip); + } - auto nonvirtual_sie = std::make_shared<mie_proxy_csr_t>(proc, CSR_SIE, sip_sie_accr); + nonvirtual_sie = std::make_shared<sie_csr_t>(proc, CSR_SIE, sip_sie_accr); auto vsie = std::make_shared<mie_proxy_csr_t>(proc, CSR_VSIE, vsip_vsie_accr); - add_hypervisor_csr(CSR_VSIE, vsie); - add_supervisor_csr(CSR_SIE, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie)); + auto sie = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_VSIE, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIE, vsie)); + add_hypervisor_csr(CSR_VSIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIEH, vsie)); + add_supervisor_csr(CSR_SIE, std::make_shared<rv32_low_csr_t>(proc, CSR_SIE, sie)); + add_supervisor_csr(CSR_SIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIEH, sie)); + } else { + add_hypervisor_csr(CSR_VSIE, vsie); + add_supervisor_csr(CSR_SIE, sie); + } add_hypervisor_csr(CSR_HIE, std::make_shared<mie_proxy_csr_t>(proc, CSR_HIE, hip_hie_accr)); add_supervisor_csr(CSR_MEDELEG, medeleg = std::make_shared<medeleg_csr_t>(proc, CSR_MEDELEG)); - add_supervisor_csr(CSR_MIDELEG, mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG)); + mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG); + if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) { + add_supervisor_csr(CSR_MIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_MIDELEG, mideleg)); + add_supervisor_csr(CSR_MIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_MIDELEGH, mideleg)); + } else { + add_supervisor_csr(CSR_MIDELEG, mideleg); + } const reg_t counteren_mask = (proc->extension_enabled_const(EXT_ZICNTR) ? 0x7UL : 0x0) | (proc->extension_enabled_const(EXT_ZIHPM) ? 0xfffffff8ULL : 0x0); add_user_csr(CSR_MCOUNTEREN, mcounteren = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTEREN, counteren_mask, 0)); add_csr(CSR_MCOUNTINHIBIT, mcountinhibit = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTINHIBIT, counteren_mask & (~MCOUNTEREN_TIME), 0)); @@ -162,7 +217,13 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) add_hypervisor_csr(CSR_HSTATUS, hstatus = std::make_shared<hstatus_csr_t>(proc, CSR_HSTATUS)); add_hypervisor_csr(CSR_HGEIE, std::make_shared<const_csr_t>(proc, CSR_HGEIE, 0)); add_hypervisor_csr(CSR_HGEIP, std::make_shared<const_csr_t>(proc, CSR_HGEIP, 0)); - add_hypervisor_csr(CSR_HIDELEG, hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg)); + hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg); + if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) { + add_hypervisor_csr(CSR_HIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_HIDELEG, hideleg)); + add_hypervisor_csr(CSR_HIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HIDELEGH, hideleg)); + } else { + add_hypervisor_csr(CSR_HIDELEG, hideleg); + } const reg_t hedeleg_mask = (1 << CAUSE_MISALIGNED_FETCH) | (1 << CAUSE_FETCH_ACCESS) | @@ -250,7 +311,8 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) (proc->extension_enabled(EXT_SSTC) ? MENVCFG_STCE : 0) | (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) | (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) | - (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0); + (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)| + (proc->extension_enabled(EXT_SMCDELEG) ? MENVCFG_CDE : 0); menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0); if (xlen == 32) { add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg)); @@ -284,7 +346,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) const reg_t sstateen0_mask = (proc->extension_enabled(EXT_ZFINX) ? SSTATEEN0_FCSR : 0) | (proc->extension_enabled(EXT_ZCMT) ? SSTATEEN0_JVT : 0) | SSTATEEN0_CS; - const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN; + const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_CSRIND | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN; const reg_t mstateen0_mask = hstateen0_mask | (proc->extension_enabled(EXT_SSQOSID) ? MSTATEEN0_PRIV114 : 0); for (int i = 0; i < 4; i++) { const reg_t mstateen_mask = i == 0 ? mstateen0_mask : MSTATEEN_HSTATEEN; @@ -320,7 +382,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) if (proc->extension_enabled_const(EXT_SSTC)) { stimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_STIMECMP, MIP_STIP); vstimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_VSTIMECMP, MIP_VSTIP); - auto virtualized_stimecmp = std::make_shared<virtualized_stimecmp_csr_t>(proc, stimecmp, vstimecmp); + auto virtualized_stimecmp = std::make_shared<virtualized_with_special_permission_csr_t>(proc, stimecmp, vstimecmp); if (xlen == 32) { add_supervisor_csr(CSR_STIMECMP, std::make_shared<rv32_low_csr_t>(proc, CSR_STIMECMP, virtualized_stimecmp)); add_supervisor_csr(CSR_STIMECMPH, std::make_shared<rv32_high_csr_t>(proc, CSR_STIMECMPH, virtualized_stimecmp)); @@ -337,32 +399,99 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) const reg_t ssp_mask = -reg_t(xlen / 8); add_ext_csr(EXT_ZICFISS, CSR_SSP, ssp = std::make_shared<ssp_csr_t>(proc, CSR_SSP, ssp_mask, 0)); + // Smcdeleg + if (proc->extension_enabled_const(EXT_SMCDELEG) || proc->extension_enabled_const(EXT_SSCCFG)) { + add_supervisor_csr(CSR_SCOUNTINHIBIT, scountinhibit = std::make_shared<scntinhibit_csr_t>(proc, CSR_SCOUNTINHIBIT, mcountinhibit)); + } // Smcsrind / Sscsrind if (proc->extension_enabled_const(EXT_SMCSRIND)) { csr_t_p miselect = std::make_shared<basic_csr_t>(proc, CSR_MISELECT, 0); add_csr(CSR_MISELECT, miselect); - const reg_t mireg_csrs[] = { CSR_MIREG, CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 }; + sscsrind_reg_csr_t::sscsrind_reg_csr_t_p mireg; + add_csr(CSR_MIREG, mireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_MIREG, miselect)); + add_ireg_proxy(proc, mireg); + const reg_t mireg_csrs[] = { CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 }; for (auto csr : mireg_csrs) add_csr(csr, std::make_shared<sscsrind_reg_csr_t>(proc, csr, miselect)); } if (proc->extension_enabled_const(EXT_SSCSRIND)) { - csr_t_p vsiselect = std::make_shared<basic_csr_t>(proc, CSR_VSISELECT, 0); + csr_t_p vsiselect = std::make_shared<siselect_csr_t>(proc, CSR_VSISELECT, 0); add_hypervisor_csr(CSR_VSISELECT, vsiselect); - csr_t_p siselect = std::make_shared<basic_csr_t>(proc, CSR_SISELECT, 0); - add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_csr_t>(proc, siselect, vsiselect)); + csr_t_p siselect = std::make_shared<siselect_csr_t>(proc, CSR_SISELECT, 0); + add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_with_special_permission_csr_t>(proc, siselect, vsiselect)); - const reg_t vsireg_csrs[] = { CSR_VSIREG, CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 }; - const reg_t sireg_csrs[] = { CSR_SIREG, CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 }; + auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_VSIREG, vsiselect); + add_hypervisor_csr(CSR_VSIREG, vsireg); + + auto sireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_SIREG, siselect); + add_ireg_proxy(proc, sireg); + add_supervisor_csr(CSR_SIREG, std::make_shared<virtualized_indirect_csr_t>(proc, sireg, vsireg)); + + const reg_t vsireg_csrs[] = { CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 }; + const reg_t sireg_csrs[] = { CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 }; for (size_t i = 0; i < std::size(vsireg_csrs); i++) { auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, vsireg_csrs[i], vsiselect); add_hypervisor_csr(vsireg_csrs[i], vsireg); auto sireg = std::make_shared<sscsrind_reg_csr_t>(proc, sireg_csrs[i], siselect); add_supervisor_csr(sireg_csrs[i], std::make_shared<virtualized_indirect_csr_t>(proc, sireg, vsireg)); + + // Smcdeleg + if (proc->extension_enabled(EXT_SSCCFG) || proc->extension_enabled(EXT_SMCDELEG)) { + switch (sireg_csrs[i]) { + case CSR_SIREG: + if (proc->extension_enabled_const(EXT_ZICNTR)) { + sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle); + sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret); + } + if (proc->extension_enabled_const(EXT_ZIHPM)) { + for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++) + sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]); + } + break; + case CSR_SIREG4: + if (xlen == 32) { + if (proc->extension_enabled_const(EXT_ZICNTR)) { + sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, csrmap[CSR_CYCLEH]); + sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, csrmap[CSR_INSTRETH]); + } + if (proc->extension_enabled_const(EXT_ZIHPM)) { + for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++) + sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3H + j]); + } + } + break; + case CSR_SIREG2: + if (proc->extension_enabled_const(EXT_ZICNTR)) { + sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcyclecfg); + sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRETCFG, minstretcfg); + } + if (proc->extension_enabled_const(EXT_ZIHPM)) { + for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3 + 1); j++) + sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMEVENT_3 + j, csrmap[CSR_MHPMEVENT3H + j]); + } + break; + case CSR_SIREG5: + if (xlen == 32) { + if (proc->extension_enabled_const(EXT_ZICNTR)) { + sireg->add_ireg_proxy(SISELECT_SMCDELEG_START, mcycle); + sireg->add_ireg_proxy(SISELECT_SMCDELEG_INSTRET, minstret); + } + if (proc->extension_enabled_const(EXT_ZIHPM)) { + for (size_t j = 0; j < (SISELECT_SMCDELEG_END - SISELECT_SMCDELEG_HPMEVENT_3); j++) + sireg->add_ireg_proxy(SISELECT_SMCDELEG_HPMCOUNTER_3 + j, csrmap[CSR_HPMCOUNTER3 + j]); + } + } + case CSR_SIREG3: + case CSR_SIREG6: + default: + break; + } + } } } @@ -380,4 +509,44 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa) const reg_t srmcfg_mask = SRMCFG_MCID | SRMCFG_RCID; add_const_ext_csr(EXT_SSQOSID, CSR_SRMCFG, std::make_shared<srmcfg_csr_t>(proc, CSR_SRMCFG, srmcfg_mask, 0)); + + mvien = std::make_shared<masked_csr_t>(proc, CSR_MVIEN, MIP_SEIP | MIP_SSIP, 0); + mvip = std::make_shared<mvip_csr_t>(proc, CSR_MVIP, 0); + if (proc->extension_enabled_const(EXT_SMAIA)) { + add_csr(CSR_MTOPI, std::make_shared<mtopi_csr_t>(proc, CSR_MTOPI)); + if (xlen == 32) { + add_supervisor_csr(CSR_MVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIEN, mvien)); + add_supervisor_csr(CSR_MVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIENH, mvien)); + add_supervisor_csr(CSR_MVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIP, mvip)); + add_supervisor_csr(CSR_MVIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIPH, mvip)); + } else { + add_supervisor_csr(CSR_MVIEN, mvien); + add_supervisor_csr(CSR_MVIP, mvip); + } + } + + hvictl = std::make_shared<aia_csr_t>(proc, CSR_HVICTL, HVICTL_VTI | HVICTL_IID | HVICTL_DPR | HVICTL_IPRIOM | HVICTL_IPRIO, 0); + vstopi = std::make_shared<vstopi_csr_t>(proc, CSR_VSTOPI); + if (proc->extension_enabled_const(EXT_SSAIA)) { // Included by EXT_SMAIA + csr_t_p nonvirtual_stopi = std::make_shared<nonvirtual_stopi_csr_t>(proc, CSR_STOPI); + add_supervisor_csr(CSR_STOPI, std::make_shared<virtualized_with_special_permission_csr_t>(proc, nonvirtual_stopi, vstopi)); + add_supervisor_csr(CSR_STOPEI, std::make_shared<inaccessible_csr_t>(proc, CSR_STOPEI)); + auto hvien = std::make_shared<aia_csr_t>(proc, CSR_HVIEN, 0, 0); + auto hviprio1 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO1, 0, 0); + auto hviprio2 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO2, 0, 0); + if (xlen == 32) { + add_hypervisor_csr(CSR_HVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIEN, hvien)); + add_hypervisor_csr(CSR_HVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIENH, hvien)); + add_hypervisor_csr(CSR_HVIPRIO1, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO1, hviprio1)); + add_hypervisor_csr(CSR_HVIPRIO1H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO1H, hviprio1)); + add_hypervisor_csr(CSR_HVIPRIO2, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO2, hviprio2)); + add_hypervisor_csr(CSR_HVIPRIO2H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO2H, hviprio2)); + } else { + add_hypervisor_csr(CSR_HVIEN, hvien); + add_hypervisor_csr(CSR_HVIPRIO1, hviprio1); + add_hypervisor_csr(CSR_HVIPRIO2, hviprio2); + } + add_hypervisor_csr(CSR_HVICTL, hvictl); + add_hypervisor_csr(CSR_VSTOPI, vstopi); + } } diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 3dbac7b..49717e5 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -15,6 +15,8 @@ #include "insn_macros.h" // For CSR_DCSR_V: #include "debug_defines.h" +// For ctz: +#include "arith.h" // STATE macro used by require_privilege() macro: #undef STATE @@ -313,31 +315,31 @@ bool mseccfg_csr_t::get_sseed() const noexcept { } bool mseccfg_csr_t::unlogged_write(const reg_t val) noexcept { - if (proc->n_pmp == 0) - return false; - - // pmpcfg.L is 1 in any rule or entry (including disabled entries) - const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp, - [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } ); reg_t new_val = read(); - // When RLB is 0 and pmplock_recorded, RLB is locked to 0. - // Otherwise set the RLB bit according val - if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) { - new_val &= ~MSECCFG_RLB; - new_val |= (val & MSECCFG_RLB); - } + if (proc->n_pmp != 0) { + // pmpcfg.L is 1 in any rule or entry (including disabled entries) + const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp, + [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } ); - new_val |= (val & MSECCFG_MMWP); //MMWP is sticky - new_val |= (val & MSECCFG_MML); //MML is sticky + // When RLB is 0 and pmplock_recorded, RLB is locked to 0. + // Otherwise set the RLB bit according val + if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) { + new_val &= ~MSECCFG_RLB; + new_val |= (val & MSECCFG_RLB); + } + + new_val |= (val & MSECCFG_MMWP); //MMWP is sticky + new_val |= (val & MSECCFG_MML); //MML is sticky + + proc->get_mmu()->flush_tlb(); + } if (proc->extension_enabled(EXT_ZKR)) { uint64_t mask = MSECCFG_USEED | MSECCFG_SSEED; new_val = (new_val & ~mask) | (val & mask); } - proc->get_mmu()->flush_tlb(); - if (proc->extension_enabled(EXT_ZICFILP)) { new_val &= ~MSECCFG_MLPE; new_val |= (val & MSECCFG_MLPE); @@ -639,6 +641,22 @@ reg_t rv32_high_csr_t::written_value() const noexcept { return (orig->written_value() >> 32) & 0xffffffffU; } +aia_rv32_high_csr_t::aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig): + rv32_high_csr_t(proc, addr, orig) { +} + +void aia_rv32_high_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + rv32_high_csr_t::verify_permissions(insn, write); +} + // implement class sstatus_csr_t sstatus_csr_t::sstatus_csr_t(processor_t* const proc, sstatus_proxy_csr_t_p orig, vsstatus_csr_t_p virt): virtualized_csr_t(proc, orig, virt), @@ -781,8 +799,14 @@ mip_csr_t::mip_csr_t(processor_t* const proc, const reg_t addr): mip_or_mie_csr_t(proc, addr) { } +void mip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept { + if (!(state->mvien->read() & MIP_SEIP) && (mask & MIP_SEIP)) + state->mvip->write_with_mask(MIP_SEIP, val); // mvip.SEIP is an alias of mip.SEIP when mvien.SEIP=0 + mip_or_mie_csr_t::write_with_mask(mask & ~MIP_SEIP, val); +} + reg_t mip_csr_t::read() const noexcept { - return val | state->hvip->basic_csr_t::read(); + return val | state->hvip->basic_csr_t::read() | ((state->mvien->read() & MIP_SEIP) ? 0 : (state->mvip->basic_csr_t::read() & MIP_SEIP)); } void mip_csr_t::backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept { @@ -864,6 +888,15 @@ mip_proxy_csr_t::mip_proxy_csr_t(processor_t* const proc, const reg_t addr, gene accr(accr) { } +void mip_proxy_csr_t::verify_permissions(insn_t insn, bool write) const { + csr_t::verify_permissions(insn, write); + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && + proc->extension_enabled('S') && state->v) + throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sip when hvictl.VTI=1 + } +} + reg_t mip_proxy_csr_t::read() const noexcept { return accr->ip_read(); } @@ -879,6 +912,15 @@ mie_proxy_csr_t::mie_proxy_csr_t(processor_t* const proc, const reg_t addr, gene accr(accr) { } +void mie_proxy_csr_t::verify_permissions(insn_t insn, bool write) const { + csr_t::verify_permissions(insn, write); + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && + proc->extension_enabled('S') && state->v) + throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sie when hvictl.VTI=1 + } +} + reg_t mie_proxy_csr_t::read() const noexcept { return accr->ie_read(); } @@ -956,6 +998,38 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept { return basic_csr_t::unlogged_write((read() & ~mask) | (val & mask)); } +sip_csr_t::sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr): + mip_proxy_csr_t(proc, addr, accr) { +} + +reg_t sip_csr_t::read() const noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + return (mip_proxy_csr_t::read() & ~mask) | (state->mvip->read() & mask); +} + +bool sip_csr_t::unlogged_write(const reg_t val) noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + state->mvip->write_with_mask(mask & accr->get_ip_write_mask(), val); + return mip_proxy_csr_t::unlogged_write(val & ~mask); +} + +sie_csr_t::sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr): + mie_proxy_csr_t(proc, addr, accr), + val(0) { +} + +reg_t sie_csr_t::read() const noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + return (mie_proxy_csr_t::read() & ~mask) | (val & mask); +} + +bool sie_csr_t::unlogged_write(const reg_t val) noexcept { + const reg_t mask = ~state->mideleg->read() & state->mvien->read(); + this->val = (this->val & ~mask) | (val & mask); + mie_proxy_csr_t::unlogged_write(val & ~mask); + return true; +} + // implement class masked_csr_t masked_csr_t::masked_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init): basic_csr_t(proc, addr, init), @@ -1075,6 +1149,7 @@ bool virtualized_satp_csr_t::unlogged_write(const reg_t val) noexcept { wide_counter_csr_t::wide_counter_csr_t(processor_t* const proc, const reg_t addr, smcntrpmf_csr_t_p config_csr): csr_t(proc, addr), val(0), + written(false), config_csr(config_csr) { } @@ -1083,7 +1158,15 @@ reg_t wide_counter_csr_t::read() const noexcept { } void wide_counter_csr_t::bump(const reg_t howmuch) noexcept { - if (is_counting_enabled()) { + if (written) { + // Because writing a CSR serializes the simulator, howmuch should + // reflect exactly one instruction: the explicit CSR write. + // If counting is disabled, though, howmuch will be zero. + assert(howmuch <= 1); + // The ISA mandates that explicit writes to instret take precedence + // over the instret, so simply skip the increment. + written = false; + } else if (is_counting_enabled()) { val += howmuch; // to keep log reasonable size, don't log every bump } // Clear cached value @@ -1091,23 +1174,15 @@ void wide_counter_csr_t::bump(const reg_t howmuch) noexcept { } bool wide_counter_csr_t::unlogged_write(const reg_t val) noexcept { + // Because writing a CSR serializes the simulator and is followed by a + // bump, back-to-back writes with no intervening bump should never occur. + assert(!written); + written = true; + this->val = val; - // The ISA mandates that if an instruction writes instret, the write - // takes precedence over the increment to instret. However, Spike - // unconditionally increments instret after executing an instruction. - // Correct for this artifact by decrementing instret here. - // Ensure that Smctrpmf hasn't disabled counting. - if (is_counting_enabled()) { - this->val--; - } return true; } -reg_t wide_counter_csr_t::written_value() const noexcept { - // Re-adjust for upcoming bump() - return this->val + 1; -} - // Returns true if counting is not inhibited by Smcntrpmf. // Note that minstretcfg / mcyclecfg / mhpmevent* share the same inhibit bits. bool wide_counter_csr_t::is_counting_enabled() const noexcept { @@ -1230,7 +1305,7 @@ hideleg_csr_t::hideleg_csr_t(processor_t* const proc, const reg_t addr, csr_t_p reg_t hideleg_csr_t::read() const noexcept { return masked_csr_t::read() & mideleg->read(); -}; +} hgatp_csr_t::hgatp_csr_t(processor_t* const proc, const reg_t addr): basic_csr_t(proc, addr, 0) { @@ -1644,10 +1719,6 @@ bool stimecmp_csr_t::unlogged_write(const reg_t val) noexcept { return basic_csr_t::unlogged_write(val); } -virtualized_stimecmp_csr_t::virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt): - virtualized_csr_t(proc, orig, virt) { -} - void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { if (!(state->menvcfg->read() & MENVCFG_STCE)) { // access to (v)stimecmp with MENVCFG.STCE = 0 @@ -1663,9 +1734,18 @@ void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { } basic_csr_t::verify_permissions(insn, write); + + if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) { + if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && state->v && write) + throw trap_virtual_instruction(insn.bits()); + } +} + +virtualized_with_special_permission_csr_t::virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt): + virtualized_csr_t(proc, orig, virt) { } -void virtualized_stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const { +void virtualized_with_special_permission_csr_t::verify_permissions(insn_t insn, bool write) const { orig_csr->verify_permissions(insn, write); } @@ -1676,6 +1756,14 @@ scountovf_csr_t::scountovf_csr_t(processor_t* const proc, const reg_t addr): void scountovf_csr_t::verify_permissions(insn_t insn, bool write) const { if (!proc->extension_enabled(EXT_SSCOFPMF)) throw trap_illegal_instruction(insn.bits()); + + if (proc->extension_enabled('H') && + (proc->extension_enabled_const(EXT_SMCDELEG) || proc->extension_enabled(EXT_SSCCFG)) + ) { + if (state->v && (state->menvcfg->read() & MENVCFG_CDE)) { + throw trap_virtual_instruction(insn.bits()); + } + } csr_t::verify_permissions(insn, write); } @@ -1745,10 +1833,74 @@ sscsrind_reg_csr_t::sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr } void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND)) + throw trap_illegal_instruction(insn.bits()); + } + // Don't call base verify_permission for VS registers remapped to S-mode if (insn.csr() == address) csr_t::verify_permissions(insn, write); + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND)) + throw trap_virtual_instruction(insn.bits()); + } + + if (proc->extension_enabled(EXT_SMCDELEG)) { + if (insn.csr() >= CSR_VSIREG && insn.csr() <= CSR_VSIREG6) { + if (!state->v) { + // An attempt to access any vsireg* from M or S mode raises an illegal instruction exception. + throw trap_illegal_instruction(insn.bits()); + } else { + if (state->prv == PRV_S) { + // An attempt from VS-mode to access any vsireg raises an illegal instruction + // exception if menvcfg.CDE = 0, or a virtual instruction exception if menvcfg.CDE = 1 + if ((state->menvcfg->read() & MENVCFG_CDE) != MENVCFG_CDE) { + throw trap_illegal_instruction(insn.bits()); + } else { + throw trap_virtual_instruction(insn.bits()); + } + } else { + throw trap_virtual_instruction(insn.bits()); + } + } + } + if (insn.csr() >= CSR_SIREG && insn.csr() <= CSR_SIREG6) { + // attempts to access any sireg* when menvcfg.CDE = 0; + if ((state->menvcfg->read() & MENVCFG_CDE) != MENVCFG_CDE) { + if (!state->v) { + throw trap_illegal_instruction(insn.bits()); + } else { + if (state->prv == PRV_S) { + // An attempt from VS-mode to access any sireg* causes illegal instruction exception if menvcfg.CDE = 0 + throw trap_illegal_instruction(insn.bits()); + } else { + throw trap_virtual_instruction(insn.bits()); + } + } + } else { + // menvcfg.CDE = 1; + if (state->v) { + // An attempt from VS-mode to access any sireg* causes a virtual instruction exception if menvcfg.CDE = 1 + throw trap_virtual_instruction(insn.bits()); + } + // counter selected by siselect is not delegated to S-mode (the corresponding bit in mcounteren = 0). + auto iselect_addr = iselect->read(); + if (iselect_addr >= SISELECT_SMCDELEG_START && iselect_addr <= SISELECT_SMCDELEG_END) { + reg_t counter_id_offset = iselect_addr - SISELECT_SMCDELEG_START; + if (!(state->mcounteren->read() & (1U << counter_id_offset))) { + if (!state->v) { + throw trap_illegal_instruction(insn.bits()); + } else { + throw trap_virtual_instruction(insn.bits()); + } + } + } + } + } + } + csr_t_p proxy_csr = get_reg(); if (proxy_csr == nullptr) { if (!state->v) { @@ -1810,7 +1962,7 @@ srmcfg_csr_t::srmcfg_csr_t(processor_t* const proc, const reg_t addr, const reg_ masked_csr_t(proc, addr, mask, init) { } -void srmcfg_csr_t::verify_permissions(insn_t insn, bool write) const { +void srmcfg_csr_t::verify_permissions(insn_t insn, bool write UNUSED) const { if (!proc->extension_enabled(EXT_SSQOSID)) throw trap_illegal_instruction(insn.bits()); @@ -1879,3 +2031,199 @@ bool hstatus_csr_t::unlogged_write(const reg_t val) noexcept { proc->get_mmu()->flush_tlb(); return basic_csr_t::unlogged_write(new_hstatus); } + +scntinhibit_csr_t::scntinhibit_csr_t(processor_t* const proc, const reg_t addr, csr_t_p mcountinhibit): + basic_csr_t(proc, addr, mcountinhibit->read()) { +} + +void scntinhibit_csr_t::verify_permissions(insn_t insn, bool write) const { + if (insn.csr() == address) { + csr_t::verify_permissions(insn, write); + } + + if ((state->menvcfg->read() & MENVCFG_CDE) != MENVCFG_CDE) { + throw trap_illegal_instruction(insn.bits()); + } +} + +bool scntinhibit_csr_t::unlogged_write(const reg_t val) noexcept { + state->mcountinhibit->write(state->mcounteren->read() & val); + return true; +} + +reg_t scntinhibit_csr_t::read() const noexcept { + return state->mcounteren->read() & state->mcountinhibit->read(); +} + +mtopi_csr_t::mtopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +reg_t mtopi_csr_t::read() const noexcept { + reg_t enabled_interrupts = state->mip->read() & state->mie->read() & ~state->mideleg->read(); + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to M-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + reg_t identity = ctz(selected_interrupt); + return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0 +} + +bool mtopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +mvip_csr_t::mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init): + basic_csr_t(proc, addr, init) { +} + +reg_t mvip_csr_t::read() const noexcept { + const reg_t val = basic_csr_t::read(); + const reg_t mvien = state->mvien->read(); + const reg_t mip = state->mip->read(); + const reg_t menvcfg = state->menvcfg->read(); + return 0 + | (val & MIP_SEIP) + | ((menvcfg & MENVCFG_STCE) ? 0 : (mip & MIP_STIP)) + | (((mvien & MIP_SSIP) ? val : mip) & MIP_SSIP) + ; +} + +bool mvip_csr_t::unlogged_write(const reg_t val) noexcept { + if (!(state->menvcfg->read() & MENVCFG_STCE)) + state->mip->write_with_mask(MIP_STIP, val); // mvip.STIP is an alias of mip.STIP when mip.STIP is writable + if (!(state->mvien->read() & MIP_SSIP)) + state->mip->write_with_mask(MIP_SSIP, val); // mvip.SSIP is an alias of mip.SSIP when mvien.SSIP=0 + + const reg_t new_val = (val & MIP_SEIP) | (((state->mvien->read() & MIP_SSIP) ? val : basic_csr_t::read()) & MIP_SSIP); + return basic_csr_t::unlogged_write(new_val); +} + +void mvip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept { + basic_csr_t::unlogged_write((basic_csr_t::read() & ~mask) | (val & mask)); + log_write(); +} + +nonvirtual_stopi_csr_t::nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void nonvirtual_stopi_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + csr_t::verify_permissions(insn, write); +} + +reg_t nonvirtual_stopi_csr_t::read() const noexcept { + reg_t enabled_interrupts = state->nonvirtual_sip->read() & state->nonvirtual_sie->read() & ~state->hideleg->read(); + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to S-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + reg_t identity = ctz(selected_interrupt); + return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0 +} + +bool nonvirtual_stopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +inaccessible_csr_t::inaccessible_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void inaccessible_csr_t::verify_permissions(insn_t insn, bool write) const { + if (state->v) + throw trap_virtual_instruction(insn.bits()); + else + throw trap_illegal_instruction(insn.bits()); +} + +vstopi_csr_t::vstopi_csr_t(processor_t* const proc, const reg_t addr): + csr_t(proc, addr) { +} + +void vstopi_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + csr_t::verify_permissions(insn, write); +} + +reg_t vstopi_csr_t::read() const noexcept { + reg_t hvictl = state->hvictl->read(); + bool vti = hvictl & HVICTL_VTI; + reg_t iid = get_field(hvictl, HVICTL_IID); + bool dpr = hvictl & HVICTL_DPR; + bool ipriom = hvictl & HVICTL_IPRIOM; + reg_t iprio = get_field(hvictl, HVICTL_IPRIO); + + reg_t enabled_interrupts = state->mip->read() & state->mie->read() & state->hideleg->read(); + enabled_interrupts >>= 1; // VSSIP -> SSIP, etc + reg_t vgein = get_field(state->hstatus->read(), HSTATUS_VGEIN); + reg_t virtual_sei_priority = (vgein == 0 && iid == IRQ_S_EXT && iprio != 0) ? iprio : 255; // vstopi.IPRIO is 255 for priority number 256 + + reg_t identity, priority; + if (vti) { + if (!(enabled_interrupts & MIP_SEIP) && iid == IRQ_S_EXT) + return 0; + + identity = ((enabled_interrupts & MIP_SEIP) && (iid == IRQ_S_EXT || dpr)) ? IRQ_S_EXT : iid; + priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : ((iprio != 0 || !dpr) ? iprio : 255); + } else { + if (!enabled_interrupts) + return 0; // no enabled pending interrupt to VS-mode + + reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts); + identity = ctz(selected_interrupt); + priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : 255; // vstopi.IPRIO is 255 for interrupt with default priority lower than VSEI + } + return set_field((reg_t)(ipriom ? priority : 1), MTOPI_IID, identity); +} + +bool vstopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept { + return false; +} + +siselect_csr_t::siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init): + basic_csr_t(proc, addr, init) { +} + +void siselect_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND)) + throw trap_virtual_instruction(insn.bits()); + } + + basic_csr_t::verify_permissions(insn, write); +} + +aia_csr_t::aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init): + masked_csr_t(proc, addr, mask, init) { +} + +void aia_csr_t::verify_permissions(insn_t insn, bool write) const { + if (proc->extension_enabled(EXT_SMSTATEEN)) { + if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA)) + throw trap_illegal_instruction(insn.bits()); + + if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA)) + throw trap_virtual_instruction(insn.bits()); + } + + basic_csr_t::verify_permissions(insn, write); +} diff --git a/riscv/csrs.h b/riscv/csrs.h index 278bdb3..97fd0f1 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -301,6 +301,12 @@ class rv32_high_csr_t: public csr_t { csr_t_p orig; }; +class aia_rv32_high_csr_t: public rv32_high_csr_t { + public: + aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; + // sstatus.sdt is read_only 0 when menvcfg.dte = 0 class sstatus_proxy_csr_t final: public base_status_csr_t { public: @@ -356,7 +362,7 @@ class mip_or_mie_csr_t: public csr_t { mip_or_mie_csr_t(processor_t* const proc, const reg_t addr); virtual reg_t read() const noexcept override; - void write_with_mask(const reg_t mask, const reg_t val) noexcept; + virtual void write_with_mask(const reg_t mask, const reg_t val) noexcept; protected: virtual bool unlogged_write(const reg_t val) noexcept override final; @@ -371,6 +377,8 @@ class mip_csr_t: public mip_or_mie_csr_t { mip_csr_t(processor_t* const proc, const reg_t addr); virtual reg_t read() const noexcept override final; + void write_with_mask(const reg_t mask, const reg_t val) noexcept override; + // Does not log. Used by external things (clint) that wiggle bits in mip. void backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept; private: @@ -406,6 +414,7 @@ class generic_int_accessor_t { void ip_write(const reg_t val) noexcept; reg_t ie_read() const noexcept; void ie_write(const reg_t val) noexcept; + reg_t get_ip_write_mask() { return ip_write_mask; } private: state_t* const state; const reg_t read_mask; @@ -423,10 +432,10 @@ typedef std::shared_ptr<generic_int_accessor_t> generic_int_accessor_t_p; class mip_proxy_csr_t: public csr_t { public: mip_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual void verify_permissions(insn_t insn, bool write) const override; virtual reg_t read() const noexcept override; protected: virtual bool unlogged_write(const reg_t val) noexcept override; - private: generic_int_accessor_t_p accr; }; @@ -434,6 +443,7 @@ class mip_proxy_csr_t: public csr_t { class mie_proxy_csr_t: public csr_t { public: mie_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual void verify_permissions(insn_t insn, bool write) const override; virtual reg_t read() const noexcept override; protected: virtual bool unlogged_write(const reg_t val) noexcept override; @@ -460,6 +470,24 @@ class medeleg_csr_t: public basic_csr_t { const reg_t hypervisor_exceptions; }; +class sip_csr_t: public mip_proxy_csr_t { + public: + sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual reg_t read() const noexcept override; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + +class sie_csr_t: public mie_proxy_csr_t { + public: + sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr); + virtual reg_t read() const noexcept override; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; + private: + reg_t val; +}; + // For CSRs with certain bits hardwired class masked_csr_t: public basic_csr_t { public: @@ -542,10 +570,10 @@ class wide_counter_csr_t: public csr_t { void bump(const reg_t howmuch) noexcept; protected: virtual bool unlogged_write(const reg_t val) noexcept override; - virtual reg_t written_value() const noexcept override; private: bool is_counting_enabled() const noexcept; reg_t val; + bool written; smcntrpmf_csr_t_p config_csr; }; @@ -805,9 +833,9 @@ class stimecmp_csr_t: public basic_csr_t { reg_t intr_mask; }; -class virtualized_stimecmp_csr_t: public virtualized_csr_t { +class virtualized_with_special_permission_csr_t: public virtualized_csr_t { public: - virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt); + virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt); virtual void verify_permissions(insn_t insn, bool write) const override; }; @@ -899,4 +927,73 @@ class hstatus_csr_t final: public basic_csr_t { protected: virtual bool unlogged_write(const reg_t val) noexcept override; }; + +class scntinhibit_csr_t: public basic_csr_t { + public: + scntinhibit_csr_t(processor_t* const proc, const reg_t addr, csr_t_p mcountinhibit); + reg_t read() const noexcept override; + virtual void verify_permissions(insn_t insn, bool write) const override; + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + +class mtopi_csr_t: public csr_t { + public: + mtopi_csr_t(processor_t* const proc, const reg_t addr); + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class mvip_csr_t : public basic_csr_t { + public: + mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init); + reg_t read() const noexcept override; + + void write_with_mask(const reg_t mask, const reg_t val) noexcept; + + protected: + virtual bool unlogged_write(const reg_t val) noexcept override; +}; + +typedef std::shared_ptr<mvip_csr_t> mvip_csr_t_p; + +class nonvirtual_stopi_csr_t: public csr_t { + public: + nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class inaccessible_csr_t: public csr_t { + public: + inaccessible_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + reg_t read() const noexcept override { return 0; } + protected: + bool unlogged_write(const reg_t UNUSED val) noexcept override { return false; } +}; + +class vstopi_csr_t: public csr_t { + public: + vstopi_csr_t(processor_t* const proc, const reg_t addr); + virtual void verify_permissions(insn_t insn, bool write) const override; + virtual reg_t read() const noexcept override; + protected: + bool unlogged_write(const reg_t val) noexcept override; +}; + +class siselect_csr_t: public basic_csr_t { + public: + siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; + +class aia_csr_t: public masked_csr_t { + public: + aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init); + virtual void verify_permissions(insn_t insn, bool write) const override; +}; #endif diff --git a/riscv/debug_module.cc b/riscv/debug_module.cc index 7c59744..a89a4ff 100644 --- a/riscv/debug_module.cc +++ b/riscv/debug_module.cc @@ -13,7 +13,7 @@ #if 0 # define D(x) x #else -# define D(x) +# define D(x) (void) 0 #endif // Return the number of bits wide that a field has to be to encode up to n @@ -249,6 +249,11 @@ bool debug_module_t::store(reg_t addr, size_t len, const uint8_t* bytes) return false; } +reg_t debug_module_t::size() +{ + return PGSIZE; +} + void debug_module_t::write32(uint8_t *memory, unsigned int index, uint32_t value) { uint8_t* base = memory + index * 4; @@ -445,7 +450,6 @@ bool debug_module_t::dmi_read(unsigned address, uint32_t *value) } else { dmstatus.allresumeack = false; } - auto hart = sim->get_harts().at(hart_id); if (!hart_available(hart_id)) { dmstatus.allrunning = false; dmstatus.allhalted = false; diff --git a/riscv/debug_module.h b/riscv/debug_module.h index 3771489..904f03e 100644 --- a/riscv/debug_module.h +++ b/riscv/debug_module.h @@ -114,8 +114,9 @@ class debug_module_t : public abstract_device_t debug_module_t(simif_t *sim, const debug_module_config_t &config); ~debug_module_t(); - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + reg_t size() override; // Debug Module Interface that the debugger (in our case through JTAG DTM) // uses to access the DM. diff --git a/riscv/decode.h b/riscv/decode.h index f36c04e..51ecbeb 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -79,6 +79,10 @@ public: insn_t(insn_bits_t bits) : b(bits) {} insn_bits_t bits() { return b; } int length() { return insn_length(b); } + [[maybe_unused]] int64_t opcode() { return x(0, 7); } + [[maybe_unused]] int64_t funct7() { return x(25, 7); } + [[maybe_unused]] int64_t funct3() { return x(12, 3); } + [[maybe_unused]] int64_t funct2() { return x(25, 2); } int64_t i_imm() { return xs(20, 12); } int64_t shamt() { return x(20, 6); } int64_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); } @@ -95,6 +99,7 @@ public: uint64_t bs() { return x(30, 2); } // Crypto ISE - SM4/AES32 byte select. uint64_t rcon() { return x(20, 4); } // Crypto ISE - AES64 round const. + [[maybe_unused]] int64_t rvc_opcode() { return x(0, 2); } int64_t rvc_imm() { return x(2, 5) + (xs(12, 1) << 5); } int64_t rvc_zimm() { return x(2, 5) + (x(12, 1) << 5); } int64_t rvc_addi4spn_imm() { return (x(6, 1) << 2) + (x(5, 1) << 3) + (x(11, 2) << 4) + (x(7, 4) << 6); } @@ -170,23 +175,29 @@ public: switch (rvc_rlist()) { case 15: stack_adj_base += 16; + [[fallthrough]]; case 14: if (xlen == 64) stack_adj_base += 16; + [[fallthrough]]; case 13: case 12: stack_adj_base += 16; + [[fallthrough]]; case 11: case 10: if (xlen == 64) stack_adj_base += 16; + [[fallthrough]]; case 9: case 8: stack_adj_base += 16; + [[fallthrough]]; case 7: case 6: if (xlen == 64) stack_adj_base += 16; + [[fallthrough]]; case 5: case 4: stack_adj_base += 16; diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 807ad98..892515f 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -337,10 +337,10 @@ inline long double to_f(float128_t f) { long double r; memcpy(&r, &f, sizeof(r)) #define DEBUG_RVV_FMA_VF \ printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old)); #else -#define DEBUG_RVV_FP_VV 0 -#define DEBUG_RVV_FP_VF 0 -#define DEBUG_RVV_FMA_VV 0 -#define DEBUG_RVV_FMA_VF 0 +#define DEBUG_RVV_FP_VV (void) 0 +#define DEBUG_RVV_FP_VF (void) 0 +#define DEBUG_RVV_FMA_VV (void) 0 +#define DEBUG_RVV_FMA_VF (void) 0 #endif #define DECLARE_XENVCFG_VARS(field) \ diff --git a/riscv/devices.cc b/riscv/devices.cc index 2c06f78..b816ca1 100644 --- a/riscv/devices.cc +++ b/riscv/devices.cc @@ -8,53 +8,92 @@ mmio_device_map_t& mmio_device_map() return device_map; } +static auto empty_device = rom_device_t(std::vector<char>()); + +bus_t::bus_t() + : bus_t(&empty_device) +{ +} + +bus_t::bus_t(abstract_device_t* fallback) + : fallback(fallback) +{ +} + void bus_t::add_device(reg_t addr, abstract_device_t* dev) { - // Searching devices via lower_bound/upper_bound - // implicitly relies on the underlying std::map - // container to sort the keys and provide ordered - // iteration over this sort, which it does. (python's - // SortedDict is a good analogy) + // Allow empty devices by omitting them + auto size = dev->size(); + if (size == 0) + return; + + // Reject devices that overflow address size + if (addr + size - 1 < addr) { + fprintf(stderr, "device at [%" PRIx64 ", %" PRIx64 ") overflows address size\n", + addr, addr + size); + abort(); + } + + // Reject devices that overlap other devices + if (auto it = devices.upper_bound(addr); + (it != devices.end() && addr + size - 1 >= it->first) || + (it != devices.begin() && (it--, it->first + it->second->size() - 1 >= addr))) { + fprintf(stderr, "devices at [%" PRIx64 ", %" PRIx64 ") and [%" PRIx64 ", %" PRIx64 ") overlap\n", + it->first, it->first + it->second->size(), addr, addr + size); + abort(); + } + devices[addr] = dev; } bool bus_t::load(reg_t addr, size_t len, uint8_t* bytes) { - // Find the device with the base address closest to but - // less than addr (price-is-right search) - auto it = devices.upper_bound(addr); - if (devices.empty() || it == devices.begin()) { - // Either the bus is empty, or there weren't - // any items with a base address <= addr - return false; - } - // Found at least one item with base address <= addr - // The iterator points to the device after this, so - // go back by one item. - it--; - return it->second->load(addr - it->first, len, bytes); + if (auto [base, dev] = find_device(addr, len); dev) + return dev->load(addr - base, len, bytes); + return false; } bool bus_t::store(reg_t addr, size_t len, const uint8_t* bytes) { - // See comments in bus_t::load - auto it = devices.upper_bound(addr); - if (devices.empty() || it == devices.begin()) { - return false; - } - it--; - return it->second->store(addr - it->first, len, bytes); + if (auto [base, dev] = find_device(addr, len); dev) + return dev->store(addr - base, len, bytes); + return false; } -std::pair<reg_t, abstract_device_t*> bus_t::find_device(reg_t addr) +reg_t bus_t::size() { - // See comments in bus_t::load - auto it = devices.upper_bound(addr); - if (devices.empty() || it == devices.begin()) { - return std::make_pair((reg_t)0, (abstract_device_t*)NULL); + if (auto last = devices.rbegin(); last != devices.rend()) + return last->first + last->second->size(); + return 0; +} + +std::pair<reg_t, abstract_device_t*> bus_t::find_device(reg_t addr, size_t len) +{ + if (unlikely(!len || addr + len - 1 < addr)) + return std::make_pair(0, nullptr); + + // Obtain iterator to device immediately after the one that might match + auto it_after = devices.upper_bound(addr); + reg_t base, size; + if (likely(it_after != devices.begin())) { + // Obtain iterator to device that might match + auto it = std::prev(it_after); + base = it->first; + size = it->second->size(); + if (likely(addr - base + len - 1 < size)) { + // it fully contains [addr, addr + len) + return std::make_pair(it->first, it->second); + } } - it--; - return std::make_pair(it->first, it->second); + + if (unlikely((it_after != devices.end() && addr + len - 1 >= it_after->first) + || (it_after != devices.begin() && addr - base < size))) { + // it_after or it contains part of, but not all of, [addr, add + len) + return std::make_pair(0, nullptr); + } + + // No matching device + return std::make_pair(0, fallback); } mem_t::mem_t(reg_t size) @@ -116,3 +155,25 @@ void mem_t::dump(std::ostream& o) { } } } + +external_sim_device_t::external_sim_device_t(abstract_sim_if_t* sim) + : external_simulator(sim) {} + +void external_sim_device_t::set_simulator(abstract_sim_if_t* sim) { + external_simulator = sim; +} + +bool external_sim_device_t::load(reg_t addr, size_t len, uint8_t* bytes) { + if (unlikely(external_simulator == nullptr)) return false; + return external_simulator->load(addr, len, bytes); +} + +bool external_sim_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) { + if (unlikely(external_simulator == nullptr)) return false; + return external_simulator->store(addr, len, bytes); +} + +reg_t external_sim_device_t::size() { + if (unlikely(external_simulator == nullptr)) return 0; + return PGSIZE; // TODO: proper size +} diff --git a/riscv/devices.h b/riscv/devices.h index 6ef32e9..ccb5c9b 100644 --- a/riscv/devices.h +++ b/riscv/devices.h @@ -16,14 +16,21 @@ class simif_t; class bus_t : public abstract_device_t { public: + bus_t(); + + // the fallback device owns all addresses not owned by other devices + bus_t(abstract_device_t* fallback); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + reg_t size() override; void add_device(reg_t addr, abstract_device_t* dev); - std::pair<reg_t, abstract_device_t*> find_device(reg_t addr); + std::pair<reg_t, abstract_device_t*> find_device(reg_t addr, size_t len); private: std::map<reg_t, abstract_device_t*> devices; + abstract_device_t* fallback; }; class rom_device_t : public abstract_device_t { @@ -31,6 +38,7 @@ class rom_device_t : public abstract_device_t { rom_device_t(std::vector<char> data); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + reg_t size() override { return data.size(); } const std::vector<char>& contents() { return data; } private: std::vector<char> data; @@ -41,7 +49,6 @@ class abstract_mem_t : public abstract_device_t { virtual ~abstract_mem_t() = default; virtual char* contents(reg_t addr) = 0; - virtual reg_t size() = 0; virtual void dump(std::ostream& o) = 0; }; @@ -64,12 +71,31 @@ class mem_t : public abstract_mem_t { reg_t sz; }; +class abstract_sim_if_t { +public: + virtual ~abstract_sim_if_t() = default; + virtual bool load(reg_t addr, size_t len, uint8_t* bytes) = 0; + virtual bool store(reg_t addr, size_t len, const uint8_t* bytes) = 0; +}; + +class external_sim_device_t : public abstract_device_t { +public: + external_sim_device_t(abstract_sim_if_t* sim); + void set_simulator(abstract_sim_if_t* sim); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + reg_t size() override; + +private: + abstract_sim_if_t* external_simulator; +}; + class clint_t : public abstract_device_t { public: clint_t(const simif_t*, uint64_t freq_hz, bool real_time); bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; - size_t size() { return CLINT_SIZE; } + reg_t size() override { return CLINT_SIZE; } void tick(reg_t rtc_ticks) override; uint64_t get_mtimecmp(reg_t hartid) { return mtimecmp[hartid]; } uint64_t get_mtime() { return mtime; } @@ -109,7 +135,7 @@ class plic_t : public abstract_device_t, public abstract_interrupt_controller_t bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void set_interrupt_level(uint32_t id, int lvl) override; - size_t size() { return PLIC_SIZE; } + reg_t size() override { return PLIC_SIZE; } private: std::vector<plic_context_t> contexts; uint32_t num_ids; @@ -140,7 +166,7 @@ class ns16550_t : public abstract_device_t { bool load(reg_t addr, size_t len, uint8_t* bytes) override; bool store(reg_t addr, size_t len, const uint8_t* bytes) override; void tick(reg_t rtc_ticks) override; - size_t size() { return NS16550_SIZE; } + reg_t size() override { return NS16550_SIZE; } private: abstract_interrupt_controller_t *intctrl; uint32_t interrupt_id; diff --git a/riscv/dts.cc b/riscv/dts.cc index 9751ffe..5be9d57 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -424,7 +424,6 @@ int fdt_parse_isa(const void *fdt, int cpu_offset, const char **isa) int fdt_parse_hartid(const void *fdt, int cpu_offset, uint32_t *hartid) { int len, rc; - const void *prop; const fdt32_t *val; if ((rc = check_cpu_node(fdt, cpu_offset)) < 0) diff --git a/riscv/execute.cc b/riscv/execute.cc index 1fa6111..1b572a7 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -40,13 +40,12 @@ static void commit_log_print_value(FILE *log_file, int width, const void *data) fprintf(log_file, "0x%016" PRIx64, *(const uint64_t *)data); break; default: - // max lengh of vector - if (((width - 1) & width) == 0) { - const uint64_t *arr = (const uint64_t *)data; + if (width % 8 == 0) { + const uint8_t *arr = (const uint8_t *)data; fprintf(log_file, "0x"); - for (int idx = width / 64 - 1; idx >= 0; --idx) { - fprintf(log_file, "%016" PRIx64, arr[idx]); + for (int idx = width / 8 - 1; idx >= 0; --idx) { + fprintf(log_file, "%02" PRIx8, arr[idx]); } } else { abort(); @@ -202,7 +201,7 @@ static inline reg_t execute_insn_logged(processor_t* p, reg_t pc, insn_fetch_t f return npc; } -bool processor_t::slow_path() +bool processor_t::slow_path() const { return debug || state.single_step != state.STEP_NONE || state.debug_mode || log_commits_enabled || histogram_enabled || in_wfi || check_triggers_icount; @@ -211,6 +210,8 @@ bool processor_t::slow_path() // fetch/decode/execute loop void processor_t::step(size_t n) { + mmu_t* _mmu = mmu; + if (!state.debug_mode) { if (halt_request == HR_REGULAR) { enter_debug_mode(DCSR_CAUSE_DEBUGINT, 0); @@ -222,10 +223,18 @@ void processor_t::step(size_t n) } } + if (extension_enabled(EXT_ZICCID)) { + // Ziccid requires stores eventually become visible to instruction fetch, + // so periodically flush the I$ + if (ziccid_flush_count-- == 0) { + ziccid_flush_count += ZICCID_FLUSH_PERIOD; + _mmu->flush_icache(); + } + } + while (n > 0) { size_t instret = 0; reg_t pc = state.pc; - mmu_t* _mmu = mmu; state.prv_changed = false; state.v_changed = false; @@ -343,10 +352,6 @@ void processor_t::step(size_t n) } catch (triggers::matched_t& t) { - if (mmu->matched_trigger) { - delete mmu->matched_trigger; - mmu->matched_trigger = NULL; - } take_trigger_action(t.action, t.address, pc, t.gva); } catch(trap_debug_mode&) @@ -365,12 +370,10 @@ void processor_t::step(size_t n) in_wfi = true; } - if (!(state.mcountinhibit->read() & MCOUNTINHIBIT_IR)) - state.minstret->bump(instret); + state.minstret->bump((state.mcountinhibit->read() & MCOUNTINHIBIT_IR) ? 0 : instret); // Model a hart whose CPI is 1. - if (!(state.mcountinhibit->read() & MCOUNTINHIBIT_CY)) - state.mcycle->bump(instret); + state.mcycle->bump((state.mcountinhibit->read() & MCOUNTINHIBIT_CY) ? 0 : instret); n -= instret; } diff --git a/riscv/extension.cc b/riscv/extension.cc index 520c2ed..195eea1 100644 --- a/riscv/extension.cc +++ b/riscv/extension.cc @@ -3,21 +3,15 @@ #include "extension.h" #include "trap.h" -extension_t::~extension_t() -{ -} - -void extension_t::illegal_instruction() +void extension_t::illegal_instruction([[maybe_unused]] processor_t &proc) { throw trap_illegal_instruction(0); } -void extension_t::raise_interrupt() +void extension_t::raise_interrupt([[maybe_unused]] processor_t &proc) { - p->take_interrupt((reg_t)1 << IRQ_COP); // must not return + proc.take_interrupt((reg_t)1 << IRQ_COP); // must not return throw std::logic_error("a COP exception was posted, but interrupts are disabled!"); } -void extension_t::clear_interrupt() -{ -} +void extension_t::clear_interrupt([[maybe_unused]] processor_t &proc) {} diff --git a/riscv/extension.h b/riscv/extension.h index 991da7e..411e65b 100644 --- a/riscv/extension.h +++ b/riscv/extension.h @@ -11,21 +11,18 @@ class extension_t { public: - virtual std::vector<insn_desc_t> get_instructions() = 0; - virtual std::vector<disasm_insn_t*> get_disasms() = 0; - virtual std::vector<csr_t_p> get_csrs ([[maybe_unused]] processor_t &proc) const { return {}; }; - virtual const char* name() = 0; - virtual void reset() {}; - virtual void set_debug(bool UNUSED value) {} - virtual ~extension_t(); + virtual std::vector<insn_desc_t> get_instructions(const processor_t &proc) = 0; + virtual std::vector<disasm_insn_t*> get_disasms(const processor_t *proc = nullptr) = 0; + virtual std::vector<csr_t_p> get_csrs(processor_t &) const { return {}; }; + virtual const char* name() const = 0; + virtual void reset(processor_t &) {}; + virtual void set_debug(bool UNUSED value, const processor_t &) {} + virtual ~extension_t() = default; - void set_processor(processor_t* _p) { p = _p; } protected: - processor_t* p; - - void illegal_instruction(); - void raise_interrupt(); - void clear_interrupt(); + void illegal_instruction(processor_t &proc); + void raise_interrupt(processor_t &proc); + void clear_interrupt(processor_t &proc); }; std::function<extension_t*()> find_extension(const char* name); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index a1969de..6624d8b 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -9,11 +9,7 @@ require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1); reg_t pos = 0; VI_GENERAL_LOOP_BASE - const int midx = i / 64; - const int mpos = i % 64; - - bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1; - if (do_mask) { + if (P.VU.mask_elt(rs1_num, i)) { switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num, pos, true) = P.VU.elt<uint8_t>(rs2_num, i); diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h index f909311..26a1276 100644 --- a/riscv/insns/vcpop_m.h +++ b/riscv/insns/vcpop_m.h @@ -6,15 +6,7 @@ reg_t rs2_num = insn.rs2(); require(P.VU.vstart->read() == 0); reg_t popcount = 0; for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { - const int midx = i / 32; - const int mpos = i % 32; - - bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - if (insn.v_vm() == 1) { - popcount += vs2_lsb; - } else { - bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1; - popcount += (vs2_lsb && do_mask); - } + bool vs2_bit = P.VU.mask_elt(rs2_num, i); + popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i)); } WRITE_RD(popcount); diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h index a130e5d..e3f5263 100644 --- a/riscv/insns/vfirst_m.h +++ b/riscv/insns/vfirst_m.h @@ -8,8 +8,7 @@ reg_t pos = -1; for (reg_t i=P.VU.vstart->read(); i < vl; ++i) { VI_LOOP_ELEMENT_SKIP() - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - if (vs2_lsb) { + if (P.VU.mask_elt(rs2_num, i)) { pos = i; break; } diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h index bcbfe74..728678c 100644 --- a/riscv/insns/vghsh_vv.h +++ b/riscv/insns/vghsh_vv.h @@ -2,9 +2,13 @@ #include "zvk_ext_macros.h" +const uint32_t EGS = 4; + require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require(P.VU.vl->read() % EGS == 0); +VI_CHECK_SSS(true) VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h index 820b396..0d223e8 100644 --- a/riscv/insns/vgmul_vv.h +++ b/riscv/insns/vgmul_vv.h @@ -2,9 +2,13 @@ #include "zvk_ext_macros.h" +const uint32_t EGS = 4; + require_zvkg; require(P.VU.vsew == 32); require_egw_fits(128); +require(P.VU.vl->read() % EGS == 0); +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index 1ee9229..00155db 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -12,36 +12,31 @@ require_noover(rd_num, P.VU.vflmul, rs2_num, 1); int cnt = 0; for (reg_t i = 0; i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool do_mask = P.VU.mask_elt(0, i); bool has_one = false; if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - if (vs2_lsb) { + if (P.VU.mask_elt(rs2_num, i)) { has_one = true; } } - bool use_ori = (insn.v_vm() == 0) && !do_mask; + // Bypass masked-off elements + if ((insn.v_vm() == 0) && !do_mask) + continue; + switch (sew) { case e8: - P.VU.elt<uint8_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint8_t>(rd_num, i) : cnt; + P.VU.elt<uint8_t>(rd_num, i, true) = cnt; break; case e16: - P.VU.elt<uint16_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint16_t>(rd_num, i) : cnt; + P.VU.elt<uint16_t>(rd_num, i, true) = cnt; break; case e32: - P.VU.elt<uint32_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint32_t>(rd_num, i) : cnt; + P.VU.elt<uint32_t>(rd_num, i, true) = cnt; break; default: - P.VU.elt<uint64_t>(rd_num, i, true) = use_ori ? - P.VU.elt<uint64_t>(rd_num, i) : cnt; + P.VU.elt<uint64_t>(rd_num, i, true) = cnt; break; } diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h index e9a87cf..49129f7 100644 --- a/riscv/insns/vmandn_mm.h +++ b/riscv/insns/vmandn_mm.h @@ -1,2 +1,2 @@ // vmandn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 & ~vs1); +VI_LOOP_MASK(vs2 & !vs1); diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h index 5a3ab09..4659e2f 100644 --- a/riscv/insns/vmnand_mm.h +++ b/riscv/insns/vmnand_mm.h @@ -1,2 +1,2 @@ // vmnand.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 & vs1)); +VI_LOOP_MASK(!(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h index ab93378..37327c0 100644 --- a/riscv/insns/vmnor_mm.h +++ b/riscv/insns/vmnor_mm.h @@ -1,2 +1,2 @@ // vmnor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 | vs1)); +VI_LOOP_MASK(!(vs2 | vs1)); diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h index 23026f5..71acc05 100644 --- a/riscv/insns/vmorn_mm.h +++ b/riscv/insns/vmorn_mm.h @@ -1,2 +1,2 @@ // vmorn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 | ~vs1); +VI_LOOP_MASK(vs2 | !vs1); diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index 1275872..3f736e0 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -11,22 +11,17 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && !vs2_lsb) { - res = 1; + res = true; } else if (!has_one && vs2_lsb) { has_one = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index cbcbc2a..b029327 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -11,22 +11,18 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && !vs2_lsb) { - res = 1; + res = true; } else if (!has_one && vs2_lsb) { has_one = true; - res = 1; + res = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 9bd4f0c..5753dbf 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -11,20 +11,16 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && vs2_lsb) { has_one = true; - res = 1; + res = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h index 0736d5b..8db61c2 100644 --- a/riscv/insns/vmxnor_mm.h +++ b/riscv/insns/vmxnor_mm.h @@ -1,2 +1,2 @@ // vmnxor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 ^ vs1)); +VI_LOOP_MASK(!(vs2 ^ vs1)); diff --git a/riscv/insns/vsm3c_vi.h b/riscv/insns/vsm3c_vi.h index b3e8121..f9375a5 100644 --- a/riscv/insns/vsm3c_vi.h +++ b/riscv/insns/vsm3c_vi.h @@ -3,6 +3,7 @@ #include "zvksh_ext_macros.h" require_vsm3_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h index dd6cb52..388b79f 100644 --- a/riscv/insns/vsm3me_vv.h +++ b/riscv/insns/vsm3me_vv.h @@ -13,6 +13,7 @@ (ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6)) require_vsm3_constraints; +VI_CHECK_SSS(true) VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h index 8f52e68..dd6f67d 100644 --- a/riscv/insns/vsm4k_vi.h +++ b/riscv/insns/vsm4k_vi.h @@ -15,6 +15,7 @@ static constexpr uint32_t zvksed_ck[32] = { }; require_vsm4_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h index 44011eb..8db1050 100644 --- a/riscv/insns/vsm4r_vs.h +++ b/riscv/insns/vsm4r_vs.h @@ -3,8 +3,10 @@ #include "zvksed_ext_macros.h" require_vsm4_constraints; +require_align(insn.rd(), P.VU.vflmul); +require_vs2_align_eglmul(128); // No overlap of vd and vs2. -require(insn.rd() != insn.rs2()); +require_noover_eglmul(insn.rd(), insn.rs2()); VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h index 9a18cec..18afee6 100644 --- a/riscv/insns/vsm4r_vv.h +++ b/riscv/insns/vsm4r_vv.h @@ -2,7 +2,9 @@ #include "zvksed_ext_macros.h" + require_vsm4_constraints; +VI_CHECK_SSS(false) VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( {}, diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h index 5c58927..4cf616d 100644 --- a/riscv/insns/vsra_vi.h +++ b/riscv/insns/vsra_vi.h @@ -1,5 +1,5 @@ // vsra.vi vd, vs2, zimm5 VI_VI_LOOP ({ - vd = vs2 >> (simm5 & (sew - 1) & 0x1f); + vd = vs2 >> (insn.v_zimm5() & (sew - 1)); }) diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h index cbdf47a..12f1240 100644 --- a/riscv/insns/vssra_vi.h +++ b/riscv/insns/vssra_vi.h @@ -1,8 +1,8 @@ -// vssra.vi vd, vs2, simm5 +// vssra.vi vd, vs2, zimm5 VI_VI_LOOP ({ VRM xrm = P.VU.get_vround_mode(); - int sh = simm5 & (sew - 1); + int sh = insn.v_zimm5() & (sew - 1); int128_t val = vs2; INT_ROUNDING(val, xrm, sh); diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h index 74fa37c..a2de49e 100644 --- a/riscv/insns/vssrl_vi.h +++ b/riscv/insns/vssrl_vi.h @@ -1,4 +1,4 @@ -// vssra.vi vd, vs2, simm5 +// vssra.vi vd, vs2, zimm5 VI_VI_ULOOP ({ VRM xrm = P.VU.get_vround_mode(); diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h index 13b5eb4..866cd78 100644 --- a/riscv/insns/vwsll_vi.h +++ b/riscv/insns/vwsll_vi.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(false); VI_ZVK_VI_WIDENING_ULOOP({ const reg_t shift = zimm5 & ((2 * sew) - 1); diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h index 5a64c6c..180fe97 100644 --- a/riscv/insns/vwsll_vv.h +++ b/riscv/insns/vwsll_vv.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(true); VI_ZVK_VV_WIDENING_ULOOP({ const reg_t shift = (vs1 & ((2 * sew) - 1)); diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h index 5264e80..4137d39 100644 --- a/riscv/insns/vwsll_vx.h +++ b/riscv/insns/vwsll_vx.h @@ -3,6 +3,7 @@ #include "zvk_ext_macros.h" require_zvbb; +VI_CHECK_DSS(false); VI_ZVK_VX_WIDENING_ULOOP({ const reg_t shift = (rs1 & ((2 * sew) - 1)); diff --git a/riscv/interactive.cc b/riscv/interactive.cc index 2701f49..9afc718 100644 --- a/riscv/interactive.cc +++ b/riscv/interactive.cc @@ -83,8 +83,7 @@ static void clear_str(bool noncanonical, int fd, std::string target_str) clear_motion += ' '; } clear_motion += '\r'; - if (write(fd, clear_motion.c_str(), clear_motion.size() + 1)) - ; // shut up gcc + (void) write(fd, clear_motion.c_str(), clear_motion.size() + 1); } } @@ -97,8 +96,7 @@ static void send_key(bool noncanonical, int fd, keybuffer_t key_code, const int { key_motion += (char) ((key_code >> (i * BITS_PER_CHAR)) & 0xff); } - if (write(fd, key_motion.c_str(), len) != len) - ; // shut up gcc + (void) write(fd, key_motion.c_str(), len); } } @@ -145,8 +143,8 @@ static std::string readline(int fd) clear_str(noncanonical, fd, s); cursor_pos--; s.erase(cursor_pos, 1); - if (noncanonical && write(fd, s.c_str(), s.size() + 1) != 1) - ; // shut up gcc + if (noncanonical) + (void) write(fd, s.c_str(), s.size() + 1); // move cursor by left arrow key for (unsigned i = 0; i < s.size() - cursor_pos; i++) { send_key(noncanonical, fd, KEYCODE_LEFT, 3); @@ -177,8 +175,8 @@ static std::string readline(int fd) clear_str(noncanonical, fd, s); history_index = std::min(history_commands.size(), history_index + 1); s = history_commands[history_commands.size() - history_index]; - if (noncanonical && write(fd, s.c_str(), s.size() + 1)) - ; // shut up gcc + if (noncanonical) + (void) write(fd, s.c_str(), s.size() + 1); cursor_pos = s.size(); } key_buffer = 0; @@ -193,8 +191,8 @@ static std::string readline(int fd) } else { s = history_commands[history_commands.size() - history_index]; } - if (noncanonical && write(fd, s.c_str(), s.size() + 1)) - ; // shut up gcc + if (noncanonical) + (void) write(fd, s.c_str(), s.size() + 1); cursor_pos = s.size(); } key_buffer = 0; @@ -222,14 +220,13 @@ static std::string readline(int fd) key_buffer = 0; break; case KEYCODE_ENTER: - if (noncanonical && write(fd, &ch, 1) != 1) - ; // shut up gcc + if (noncanonical) + (void) write(fd, &ch, 1); if (s.size() > initial_s_len && (history_commands.size() == 0 || s != history_commands[history_commands.size() - 1])) { history_commands.push_back(s); } return s.substr(initial_s_len); default: - DEFAULT_KEY: // unknown buffered key, do nothing if (key_buffer != 0) { key_buffer = 0; @@ -238,8 +235,8 @@ static std::string readline(int fd) clear_str(noncanonical, fd, s); s.insert(cursor_pos, 1, ch); cursor_pos++; - if (noncanonical && write(fd, s.c_str(), s.size() + 1) != 1) - ; // shut up gcc + if (noncanonical) + (void) write(fd, s.c_str(), s.size() + 1); // send left arrow key to move cursor for (unsigned i = 0; i < s.size() - cursor_pos; i++) { send_key(noncanonical, fd, KEYCODE_LEFT, 3); diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 61ba5a8..e99f720 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -50,6 +50,7 @@ typedef enum { EXT_ZFINX, EXT_ZHINX, EXT_ZHINXMIN, + EXT_ZICCID, EXT_ZICBOM, EXT_ZICBOZ, EXT_ZICNTR, @@ -76,6 +77,8 @@ typedef enum { EXT_INTERNAL_ZFH_MOVE, EXT_SMCSRIND, EXT_SSCSRIND, + EXT_SMCDELEG, + EXT_SSCCFG, EXT_SMCNTRPMF, EXT_ZIMOP, EXT_ZCMOP, @@ -88,6 +91,8 @@ typedef enum { EXT_SMMPM, EXT_SMNPM, EXT_SSNPM, + EXT_SMAIA, + EXT_SSAIA, NUM_ISA_EXTENSIONS } isa_extension_t; diff --git a/riscv/mmu.cc b/riscv/mmu.cc index d950146..30fc47a 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -7,15 +7,14 @@ #include "processor.h" #include "decode_macros.h" -mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc) - : sim(sim), proc(proc), +mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz) + : sim(sim), proc(proc), blocksz(cache_blocksz), #ifdef RISCV_ENABLE_DUAL_ENDIAN target_big_endian(endianness == endianness_big), #endif check_triggers_fetch(false), check_triggers_load(false), - check_triggers_store(false), - matched_trigger(NULL) + check_triggers_store(false) { #ifndef RISCV_ENABLE_DUAL_ENDIAN assert(endianness == endianness_little); @@ -36,9 +35,9 @@ void mmu_t::flush_icache() void mmu_t::flush_tlb() { - memset(tlb_insn_tag, -1, sizeof(tlb_insn_tag)); - memset(tlb_load_tag, -1, sizeof(tlb_load_tag)); - memset(tlb_store_tag, -1, sizeof(tlb_store_tag)); + memset(tlb_insn, -1, sizeof(tlb_insn)); + memset(tlb_load, -1, sizeof(tlb_load)); + memset(tlb_store, -1, sizeof(tlb_store)); flush_icache(); } @@ -69,29 +68,47 @@ reg_t mmu_t::translate(mem_access_info_t access_info, reg_t len) return paddr; } -tlb_entry_t mmu_t::fetch_slow_path(reg_t vaddr) +inline mmu_t::insn_parcel_t mmu_t::perform_intrapage_fetch(reg_t vaddr, uintptr_t host_addr, reg_t paddr) { + insn_parcel_t res; + + if (host_addr) + memcpy(&res, (char*)host_addr, sizeof(res)); + else if (!mmio_fetch(paddr, sizeof(res), (uint8_t*)&res)) + throw trap_instruction_access_fault(proc->state.v, vaddr, 0, 0); + + return res; +} + +mmu_t::insn_parcel_t mmu_t::fetch_slow_path(reg_t vaddr) +{ + if (matched_trigger) { + auto trig = matched_trigger.value(); + matched_trigger.reset(); + throw trig; + } + + if (auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_insn, vaddr, TLB_FLAGS & ~TLB_CHECK_TRIGGERS); tlb_hit) { + // Fast path for simple cases + return perform_intrapage_fetch(vaddr, host_addr, paddr); + } + + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_insn, vaddr, TLB_FLAGS); auto access_info = generate_access_info(vaddr, FETCH, {}); check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt); - tlb_entry_t result; - reg_t vpn = vaddr >> PGSHIFT; - if (unlikely(tlb_insn_tag[vpn % TLB_ENTRIES] != (vpn | TLB_CHECK_TRIGGERS))) { - reg_t paddr = translate(access_info, sizeof(fetch_temp)); - if (auto host_addr = sim->addr_to_mem(paddr)) { - result = refill_tlb(vaddr, paddr, host_addr, FETCH); - } else { - if (!mmio_fetch(paddr, sizeof fetch_temp, (uint8_t*)&fetch_temp)) - throw trap_instruction_access_fault(proc->state.v, vaddr, 0, 0); - result = {(char*)&fetch_temp - vaddr, paddr - vaddr}; - } - } else { - result = tlb_data[vpn % TLB_ENTRIES]; + if (!tlb_hit) { + paddr = translate(access_info, sizeof(insn_parcel_t)); + host_addr = (uintptr_t)sim->addr_to_mem(paddr); + + refill_tlb(vaddr, paddr, (char*)host_addr, FETCH); } - check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(*(const uint16_t*)(result.host_offset + vaddr))); + auto res = perform_intrapage_fetch(vaddr, host_addr, paddr); - return result; + check_triggers(triggers::OPERATION_EXECUTE, vaddr, access_info.effective_virt, from_le(res)); + + return res; } reg_t reg_from_bytes(size_t len, const uint8_t* bytes) @@ -123,7 +140,8 @@ reg_t reg_from_bytes(size_t len, const uint8_t* bytes) bool mmu_t::mmio_ok(reg_t paddr, access_type UNUSED type) { // Disallow access to debug region when not in debug mode - if (paddr >= DEBUG_START && paddr <= DEBUG_END && proc && !proc->state.debug_mode) + static_assert(DEBUG_START == 0); + if (/* paddr >= DEBUG_START && */ paddr <= DEBUG_END && proc && !proc->state.debug_mode) return false; return true; @@ -184,42 +202,47 @@ void mmu_t::check_triggers(triggers::operation_t operation, reg_t address, bool case triggers::TIMING_AFTER: // We want to take this exception on the next instruction. We check - // whether to do so in the I$ refill path, so flush the I$. - flush_icache(); - matched_trigger = new triggers::matched_t(operation, tval, match->action, virt); + // whether to do so in the I$ refill slow path, which we can force by + // flushing the TLB. + flush_tlb(); + matched_trigger = triggers::matched_t(operation, tval, match->action, virt); } } -void mmu_t::load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_t access_info) +inline void mmu_t::perform_intrapage_load(reg_t vaddr, uintptr_t host_addr, reg_t paddr, reg_t len, uint8_t* bytes, xlate_flags_t xlate_flags) { - reg_t addr = access_info.vaddr; - reg_t transformed_addr = access_info.transformed_vaddr; - reg_t vpn = transformed_addr >> PGSHIFT; - if (!access_info.flags.is_special_access() && vpn == (tlb_load_tag[vpn % TLB_ENTRIES] & ~TLB_CHECK_TRIGGERS)) { - auto host_addr = tlb_data[vpn % TLB_ENTRIES].host_offset + transformed_addr; - memcpy(bytes, host_addr, len); - return; + if (host_addr) { + memcpy(bytes, (char*)host_addr, len); + } else if (!mmio_load(paddr, len, bytes)) { + auto access_info = generate_access_info(vaddr, LOAD, xlate_flags); + if (access_info.flags.ss_access) + throw trap_store_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); + else + throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); } - reg_t paddr = translate(access_info, len); + if (tracer.interested_in_range(paddr, paddr + len, LOAD)) + tracer.trace(paddr, len, LOAD); +} - if (access_info.flags.lr && !sim->reservable(paddr)) { - throw trap_load_access_fault(access_info.effective_virt, transformed_addr, 0, 0); - } +void mmu_t::load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_t access_info) +{ + reg_t vaddr = access_info.vaddr; + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_load, vaddr, TLB_FLAGS); + if (!tlb_hit || access_info.flags.is_special_access()) { + paddr = translate(access_info, len); + host_addr = (uintptr_t)sim->addr_to_mem(paddr); - if (auto host_addr = sim->addr_to_mem(paddr)) { - memcpy(bytes, host_addr, len); - if (tracer.interested_in_range(paddr, paddr + PGSIZE, LOAD)) - tracer.trace(paddr, len, LOAD); - else if (!access_info.flags.is_special_access()) - refill_tlb(addr, paddr, host_addr, LOAD); + if (!access_info.flags.is_special_access()) + refill_tlb(vaddr, paddr, (char*)host_addr, LOAD); - } else if (!mmio_load(paddr, len, bytes)) { - (access_info.flags.ss_access)? - throw trap_store_access_fault(access_info.effective_virt, transformed_addr, 0, 0) : - throw trap_load_access_fault(access_info.effective_virt, transformed_addr, 0, 0); + if (access_info.flags.lr && !sim->reservable(paddr)) { + throw trap_load_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); + } } + perform_intrapage_load(vaddr, host_addr, paddr, len, bytes, access_info.flags); + if (access_info.flags.lr) { load_reservation_address = paddr; } @@ -227,6 +250,17 @@ void mmu_t::load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate_flags_t xlate_flags) { + if (likely(!xlate_flags.is_special_access())) { + // Fast path for simple cases + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_load, original_addr, TLB_FLAGS & ~TLB_CHECK_TRIGGERS); + bool intrapage = (original_addr % PGSIZE) + len <= PGSIZE; + bool aligned = (original_addr & (len - 1)) == 0; + + if (likely(tlb_hit && (aligned || (intrapage && is_misaligned_enabled())))) { + return perform_intrapage_load(original_addr, host_addr, paddr, len, bytes, xlate_flags); + } + } + auto access_info = generate_access_info(original_addr, LOAD, xlate_flags); reg_t transformed_addr = access_info.transformed_vaddr; check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt); @@ -255,38 +289,55 @@ void mmu_t::load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate bytes += sizeof(reg_t); } check_triggers(triggers::OPERATION_LOAD, transformed_addr, access_info.effective_virt, reg_from_bytes(len, bytes)); + + if (proc && unlikely(proc->get_log_commits_enabled())) + proc->state.log_mem_read.push_back(std::make_tuple(original_addr, 0, len)); } -void mmu_t::store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_access_info_t access_info, bool actually_store) +inline void mmu_t::perform_intrapage_store(reg_t vaddr, uintptr_t host_addr, reg_t paddr, reg_t len, const uint8_t* bytes, xlate_flags_t xlate_flags) { - reg_t addr = access_info.vaddr; - reg_t transformed_addr = access_info.transformed_vaddr; - reg_t vpn = transformed_addr >> PGSHIFT; - if (!access_info.flags.is_special_access() && vpn == (tlb_store_tag[vpn % TLB_ENTRIES] & ~TLB_CHECK_TRIGGERS)) { - if (actually_store) { - auto host_addr = tlb_data[vpn % TLB_ENTRIES].host_offset + transformed_addr; - memcpy(host_addr, bytes, len); - } - return; + if (host_addr) { + memcpy((char*)host_addr, bytes, len); + } else if (!mmio_store(paddr, len, bytes)) { + auto access_info = generate_access_info(vaddr, STORE, xlate_flags); + throw trap_store_access_fault(access_info.effective_virt, access_info.transformed_vaddr, 0, 0); } - reg_t paddr = translate(access_info, len); + if (tracer.interested_in_range(paddr, paddr + len, STORE)) + tracer.trace(paddr, len, STORE); +} - if (actually_store) { - if (auto host_addr = sim->addr_to_mem(paddr)) { - memcpy(host_addr, bytes, len); - if (tracer.interested_in_range(paddr, paddr + PGSIZE, STORE)) - tracer.trace(paddr, len, STORE); - else if (!access_info.flags.is_special_access()) - refill_tlb(addr, paddr, host_addr, STORE); - } else if (!mmio_store(paddr, len, bytes)) { - throw trap_store_access_fault(access_info.effective_virt, transformed_addr, 0, 0); - } +void mmu_t::store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_access_info_t access_info, bool actually_store) +{ + reg_t vaddr = access_info.vaddr; + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_store, vaddr, TLB_FLAGS); + if (!tlb_hit || access_info.flags.is_special_access()) { + paddr = translate(access_info, len); + host_addr = (uintptr_t)sim->addr_to_mem(paddr); + + if (!access_info.flags.is_special_access()) + refill_tlb(vaddr, paddr, (char*)host_addr, STORE); } + + if (actually_store) + perform_intrapage_store(vaddr, host_addr, paddr, len, bytes, access_info.flags); } void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes, xlate_flags_t xlate_flags, bool actually_store, bool UNUSED require_alignment) { + if (likely(!xlate_flags.is_special_access())) { + // Fast path for simple cases + auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_store, original_addr, TLB_FLAGS & ~TLB_CHECK_TRIGGERS); + bool intrapage = (original_addr % PGSIZE) + len <= PGSIZE; + bool aligned = (original_addr & (len - 1)) == 0; + + if (likely(tlb_hit && (aligned || (intrapage && is_misaligned_enabled())))) { + if (actually_store) + perform_intrapage_store(original_addr, host_addr, paddr, len, bytes, xlate_flags); + return; + } + } + auto access_info = generate_access_info(original_addr, STORE, xlate_flags); reg_t transformed_addr = access_info.transformed_vaddr; if (actually_store) { @@ -317,37 +368,44 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes } else { store_slow_path_intrapage(len, bytes, access_info, actually_store); } + + if (actually_store && proc && unlikely(proc->get_log_commits_enabled())) + proc->state.log_mem_write.push_back(std::make_tuple(original_addr, reg_from_bytes(len, bytes), len)); } tlb_entry_t mmu_t::refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type) { reg_t idx = (vaddr >> PGSHIFT) % TLB_ENTRIES; reg_t expected_tag = vaddr >> PGSHIFT; + reg_t base_paddr = paddr & ~reg_t(PGSIZE - 1); - tlb_entry_t entry = {host_addr - vaddr, paddr - vaddr}; + tlb_entry_t entry = {uintptr_t(host_addr) - (vaddr % PGSIZE), paddr - (vaddr % PGSIZE)}; - if (in_mprv()) + if (in_mprv() + || !pmp_homogeneous(base_paddr, PGSIZE) + || (proc && proc->get_log_commits_enabled())) return entry; - if ((tlb_load_tag[idx] & ~TLB_CHECK_TRIGGERS) != expected_tag) - tlb_load_tag[idx] = -1; - if ((tlb_store_tag[idx] & ~TLB_CHECK_TRIGGERS) != expected_tag) - tlb_store_tag[idx] = -1; - if ((tlb_insn_tag[idx] & ~TLB_CHECK_TRIGGERS) != expected_tag) - tlb_insn_tag[idx] = -1; - - if ((check_triggers_fetch && type == FETCH) || - (check_triggers_load && type == LOAD) || - (check_triggers_store && type == STORE)) - expected_tag |= TLB_CHECK_TRIGGERS; - - if (pmp_homogeneous(paddr & ~reg_t(PGSIZE - 1), PGSIZE)) { - if (type == FETCH) tlb_insn_tag[idx] = expected_tag; - else if (type == STORE) tlb_store_tag[idx] = expected_tag; - else tlb_load_tag[idx] = expected_tag; + auto trace_flag = tracer.interested_in_range(base_paddr, base_paddr + PGSIZE, type) ? TLB_CHECK_TRACER : 0; + auto mmio_flag = host_addr ? 0 : TLB_MMIO; + + switch (type) { + case FETCH: + tlb_insn[idx].data = entry; + tlb_insn[idx].tag = expected_tag | (check_triggers_fetch ? TLB_CHECK_TRIGGERS : 0) | trace_flag | mmio_flag; + break; + case LOAD: + tlb_load[idx].data = entry; + tlb_load[idx].tag = expected_tag | (check_triggers_load ? TLB_CHECK_TRIGGERS : 0) | trace_flag | mmio_flag; + break; + case STORE: + tlb_store[idx].data = entry; + tlb_store[idx].tag = expected_tag | (check_triggers_store ? TLB_CHECK_TRIGGERS : 0) | trace_flag | mmio_flag; + break; + default: + abort(); } - tlb_data[idx] = entry; return entry; } @@ -618,12 +676,14 @@ void mmu_t::register_memtracer(memtracer_t* t) } reg_t mmu_t::get_pmlen(bool effective_virt, reg_t effective_priv, xlate_flags_t flags) const { - if (!proc || proc->get_xlen() != 64 || ((proc->state.sstatus->readvirt(false) | proc->state.sstatus->readvirt(effective_virt)) & MSTATUS_MXR) || flags.hlvx) + if (!proc || proc->get_xlen() != 64 || flags.hlvx) return 0; reg_t pmm = 0; if (effective_priv == PRV_M) pmm = get_field(proc->state.mseccfg->read(), MSECCFG_PMM); + else if ((proc->state.sstatus->readvirt(false) | proc->state.sstatus->readvirt(effective_virt)) & MSTATUS_MXR) + pmm = 0; else if (!effective_virt && (effective_priv == PRV_S || (!proc->extension_enabled('S') && effective_priv == PRV_U))) pmm = get_field(proc->state.menvcfg->read(), MENVCFG_PMM); else if (effective_virt && effective_priv == PRV_S) diff --git a/riscv/mmu.h b/riscv/mmu.h index 3a12355..305d502 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -18,9 +18,23 @@ // virtual memory configuration #define PGSHIFT 12 const reg_t PGSIZE = 1 << PGSHIFT; -const reg_t PGMASK = ~(PGSIZE-1); #define MAX_PADDR_BITS 64 +// observability hooks for load, store and fetch +// intentionally empty not to cause runtime overhead +// can be redefined if needed +#ifndef MMU_OBSERVE_FETCH +#define MMU_OBSERVE_FETCH(addr, insn, length) +#endif + +#ifndef MMU_OBSERVE_LOAD +#define MMU_OBSERVE_LOAD(addr, data, length) +#endif + +#ifndef MMU_OBSERVE_STORE +#define MMU_OBSERVE_STORE(addr, data, length) +#endif + struct insn_fetch_t { insn_func_t func; @@ -34,8 +48,13 @@ struct icache_entry_t { }; struct tlb_entry_t { - char* host_offset; - reg_t target_offset; + uintptr_t host_addr; + reg_t target_addr; +}; + +struct dtlb_entry_t { + tlb_entry_t data; + reg_t tag; }; struct xlate_flags_t { @@ -66,31 +85,26 @@ void throw_access_exception(bool virt, reg_t addr, access_type type); class mmu_t { private: - std::map<reg_t, reg_t> alloc_cache; - std::vector<std::pair<reg_t, reg_t >> addr_tbl; - reg_t get_pmlen(bool effective_virt, reg_t effective_priv, xlate_flags_t flags) const; mem_access_info_t generate_access_info(reg_t addr, access_type type, xlate_flags_t xlate_flags); public: - mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc); + mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz); ~mmu_t(); template<typename T> T ALWAYS_INLINE load(reg_t addr, xlate_flags_t xlate_flags = {}) { target_endian<T> res; - reg_t vpn = addr >> PGSHIFT; bool aligned = (addr & (sizeof(T) - 1)) == 0; - bool tlb_hit = tlb_load_tag[vpn % TLB_ENTRIES] == vpn; + auto [tlb_hit, host_addr, _] = access_tlb(tlb_load, addr); if (likely(!xlate_flags.is_special_access() && aligned && tlb_hit)) { - res = *(target_endian<T>*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr); + res = *(target_endian<T>*)host_addr; } else { load_slow_path(addr, sizeof(T), (uint8_t*)&res, xlate_flags); } - if (unlikely(proc && proc->get_log_commits_enabled())) - proc->state.log_mem_read.push_back(std::make_tuple(addr, 0, sizeof(T))); + MMU_OBSERVE_LOAD(addr,from_target(res),sizeof(T)); return from_target(res); } @@ -120,19 +134,16 @@ public: template<typename T> void ALWAYS_INLINE store(reg_t addr, T val, xlate_flags_t xlate_flags = {}) { - reg_t vpn = addr >> PGSHIFT; + MMU_OBSERVE_STORE(addr, val, sizeof(T)); bool aligned = (addr & (sizeof(T) - 1)) == 0; - bool tlb_hit = tlb_store_tag[vpn % TLB_ENTRIES] == vpn; + auto [tlb_hit, host_addr, _] = access_tlb(tlb_store, addr); if (!xlate_flags.is_special_access() && likely(aligned && tlb_hit)) { - *(target_endian<T>*)(tlb_data[vpn % TLB_ENTRIES].host_offset + addr) = to_target(val); + *(target_endian<T>*)host_addr = to_target(val); } else { target_endian<T> target_val = to_target(val); store_slow_path(addr, sizeof(T), (const uint8_t*)&target_val, xlate_flags, true, false); } - - if (unlikely(proc && proc->get_log_commits_enabled())) - proc->state.log_mem_write.push_back(std::make_tuple(addr, val, sizeof(T))); } template<typename T> @@ -214,7 +225,10 @@ public: throw trap_load_address_misaligned((proc) ? proc->state.v : false, addr, 0, 0); } - return (float128_t){load<uint64_t>(addr), load<uint64_t>(addr + 8)}; + float128_t res; + res.v[0] = load<uint64_t>(addr); + res.v[1] = load<uint64_t>(addr + 8); + return res; } void cbo_zero(reg_t addr) { @@ -287,31 +301,32 @@ public: template<typename T> T ALWAYS_INLINE fetch_jump_table(reg_t addr) { - auto tlb_entry = translate_insn_addr(addr); - return from_target(*(target_endian<T>*)(tlb_entry.host_offset + addr)); + T res = 0; + for (size_t i = 0; i < sizeof(T) / sizeof(insn_parcel_t); i++) + res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t) * 8); + + // table accesses use data endianness, not instruction (little) endianness + return target_big_endian ? to_be(res) : res; } inline icache_entry_t* refill_icache(reg_t addr, icache_entry_t* entry) { - if (matched_trigger) - throw *matched_trigger; + insn_bits_t insn = fetch_insn_parcel(addr); - auto tlb_entry = translate_insn_addr(addr); - insn_bits_t insn = from_le(*(uint16_t*)(tlb_entry.host_offset + addr)); int length = insn_length(insn); if (likely(length == 4)) { - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; } else if (length == 2) { // entire instruction already fetched } else if (length == 6) { - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32; } else { static_assert(sizeof(insn_bits_t) == 8, "insn_bits_t must be uint64_t"); - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 2)) << 16; - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 4)) << 32; - insn |= (insn_bits_t)from_le(*(const uint16_t*)translate_insn_addr_to_host(addr + 6)) << 48; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 2) << 16; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 4) << 32; + insn |= (insn_bits_t)fetch_insn_parcel(addr + 6) << 48; } insn_fetch_t fetch = {proc->decode_insn(insn), insn}; @@ -319,19 +334,24 @@ public: entry->next = &icache[icache_index(addr + length)]; entry->data = fetch; - reg_t paddr = tlb_entry.target_offset + addr;; - if (tracer.interested_in_range(paddr, paddr + 1, FETCH)) { - entry->tag = -1; - tracer.trace(paddr, length, FETCH); + auto [check_tracer, _, paddr] = access_tlb(tlb_insn, addr, TLB_FLAGS, TLB_CHECK_TRACER); + if (unlikely(check_tracer)) { + if (tracer.interested_in_range(paddr, paddr + 1, FETCH)) { + entry->tag = -1; + tracer.trace(paddr, paddr + length, FETCH); + } } + MMU_OBSERVE_FETCH(addr, insn, length); return entry; } inline icache_entry_t* access_icache(reg_t addr) { icache_entry_t* entry = &icache[icache_index(addr)]; - if (likely(entry->tag == addr)) + if (likely(entry->tag == addr)){ + MMU_OBSERVE_FETCH(addr, entry->data.insn, insn_length(entry->data.insn.bits())); return entry; + } return refill_icache(addr, entry); } @@ -341,6 +361,17 @@ public: return refill_icache(addr, &entry)->data; } + std::tuple<bool, uintptr_t, reg_t> ALWAYS_INLINE access_tlb(const dtlb_entry_t* tlb, reg_t vaddr, reg_t allowed_flags = 0, reg_t required_flags = 0) + { + auto vpn = vaddr / PGSIZE, pgoff = vaddr % PGSIZE; + auto& entry = tlb[vpn % TLB_ENTRIES]; + auto hit = likely((entry.tag & (~allowed_flags | required_flags)) == (vpn | required_flags)); + bool mmio = allowed_flags & TLB_MMIO & entry.tag; + auto host_addr = mmio ? 0 : entry.data.host_addr + pgoff; + auto paddr = entry.data.target_addr + pgoff; + return std::make_tuple(hit, host_addr, paddr); + } + void flush_tlb(); void flush_icache(); @@ -366,17 +397,11 @@ public: return target_big_endian? target_endian<T>::to_be(n) : target_endian<T>::to_le(n); } - void set_cache_blocksz(reg_t size) - { - blocksz = size; - } - private: simif_t* sim; processor_t* proc; memtracer_list_t tracer; reg_t load_reservation_address; - uint16_t fetch_temp; reg_t blocksz; // implement an instruction cache for simulator performance @@ -387,10 +412,12 @@ private: // If a TLB tag has TLB_CHECK_TRIGGERS set, then the MMU must check for a // trigger match before completing an access. static const reg_t TLB_CHECK_TRIGGERS = reg_t(1) << 63; - tlb_entry_t tlb_data[TLB_ENTRIES]; - reg_t tlb_insn_tag[TLB_ENTRIES]; - reg_t tlb_load_tag[TLB_ENTRIES]; - reg_t tlb_store_tag[TLB_ENTRIES]; + static const reg_t TLB_CHECK_TRACER = reg_t(1) << 62; + static const reg_t TLB_MMIO = reg_t(1) << 61; + static const reg_t TLB_FLAGS = TLB_CHECK_TRIGGERS | TLB_CHECK_TRACER | TLB_MMIO; + dtlb_entry_t tlb_load[TLB_ENTRIES]; + dtlb_entry_t tlb_store[TLB_ENTRIES]; + dtlb_entry_t tlb_insn[TLB_ENTRIES]; // finish translation on a TLB miss and update the TLB tlb_entry_t refill_tlb(reg_t vaddr, reg_t paddr, char* host_addr, access_type type); @@ -403,11 +430,15 @@ private: reg_t walk(mem_access_info_t access_info); // handle uncommon cases: TLB misses, page faults, MMIO - tlb_entry_t fetch_slow_path(reg_t addr); + typedef uint16_t insn_parcel_t; + insn_parcel_t fetch_slow_path(reg_t addr); + insn_parcel_t perform_intrapage_fetch(reg_t vaddr, uintptr_t host_addr, reg_t paddr); void load_slow_path(reg_t original_addr, reg_t len, uint8_t* bytes, xlate_flags_t xlate_flags); void load_slow_path_intrapage(reg_t len, uint8_t* bytes, mem_access_info_t access_info); + void perform_intrapage_load(reg_t vaddr, uintptr_t host_addr, reg_t paddr, reg_t len, uint8_t* bytes, xlate_flags_t xlate_flags); void store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes, xlate_flags_t xlate_flags, bool actually_store, bool require_alignment); void store_slow_path_intrapage(reg_t len, const uint8_t* bytes, mem_access_info_t access_info, bool actually_store); + void perform_intrapage_store(reg_t vaddr, uintptr_t host_addr, reg_t paddr, reg_t len, const uint8_t* bytes, xlate_flags_t xlate_flags); bool mmio_fetch(reg_t paddr, size_t len, uint8_t* bytes); bool mmio_load(reg_t paddr, size_t len, uint8_t* bytes); bool mmio_store(reg_t paddr, size_t len, const uint8_t* bytes); @@ -466,16 +497,11 @@ private: } } - // ITLB lookup - inline tlb_entry_t translate_insn_addr(reg_t addr) { - reg_t vpn = addr >> PGSHIFT; - if (likely(tlb_insn_tag[vpn % TLB_ENTRIES] == vpn)) - return tlb_data[vpn % TLB_ENTRIES]; - return fetch_slow_path(addr); - } + inline insn_parcel_t fetch_insn_parcel(reg_t addr) { + if (auto [tlb_hit, host_addr, paddr] = access_tlb(tlb_insn, addr); tlb_hit) + return from_le(*(insn_parcel_t*)host_addr); - inline const uint16_t* translate_insn_addr_to_host(reg_t addr) { - return (uint16_t*)(translate_insn_addr(addr).host_offset + addr); + return from_le(fetch_slow_path(addr)); } inline bool in_mprv() const @@ -497,8 +523,7 @@ private: bool check_triggers_fetch; bool check_triggers_load; bool check_triggers_store; - // The exception describing a matched trigger, or NULL. - triggers::matched_t *matched_trigger; + std::optional<triggers::matched_t> matched_trigger; friend class processor_t; }; diff --git a/riscv/ns16550.cc b/riscv/ns16550.cc index 2805fd8..15e0873 100644 --- a/riscv/ns16550.cc +++ b/riscv/ns16550.cc @@ -328,7 +328,7 @@ void ns16550_t::tick(reg_t UNUSED rtc_ticks) update_interrupt(); } -std::string ns16550_generate_dts(const sim_t* sim, const std::vector<std::string>& UNUSED sargs) +std::string ns16550_generate_dts(const sim_t* sim, const std::vector<std::string>& sargs UNUSED) { std::stringstream s; s << std::hex @@ -348,7 +348,7 @@ std::string ns16550_generate_dts(const sim_t* sim, const std::vector<std::string return s.str(); } -ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& UNUSED sargs) +ns16550_t* ns16550_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& sargs UNUSED) { uint32_t ns16550_shift, ns16550_io_width, ns16550_int_id; if (fdt_parse_ns16550(fdt, base, diff --git a/riscv/plic.cc b/riscv/plic.cc index 14de6df..b6d204b 100644 --- a/riscv/plic.cc +++ b/riscv/plic.cc @@ -343,7 +343,8 @@ bool plic_t::load(reg_t addr, size_t len, uint8_t* bytes) return false; } - if (PRIORITY_BASE <= addr && addr < PENDING_BASE) { + static_assert(PRIORITY_BASE == 0); + if (/* PRIORITY_BASE <= addr && */ addr < PENDING_BASE) { ret = priority_read(addr, &val); } else if (PENDING_BASE <= addr && addr < ENABLE_BASE) { ret = pending_read(addr - PENDING_BASE, &val); @@ -384,7 +385,8 @@ bool plic_t::store(reg_t addr, size_t len, const uint8_t* bytes) write_little_endian_reg(&val, addr, len, bytes); - if (PRIORITY_BASE <= addr && addr < ENABLE_BASE) { + static_assert(PRIORITY_BASE == 0); + if (/* PRIORITY_BASE <= addr && */ addr < ENABLE_BASE) { ret = priority_write(addr, val); } else if (ENABLE_BASE <= addr && addr < CONTEXT_BASE) { uint32_t cntx = (addr - ENABLE_BASE) / ENABLE_PER_HART; @@ -401,7 +403,7 @@ bool plic_t::store(reg_t addr, size_t len, const uint8_t* bytes) return ret; } -std::string plic_generate_dts(const sim_t* sim, const std::vector<std::string>& UNUSED sargs) +std::string plic_generate_dts(const sim_t* sim, const std::vector<std::string>& sargs UNUSED) { std::stringstream s; s << std::hex @@ -424,7 +426,7 @@ std::string plic_generate_dts(const sim_t* sim, const std::vector<std::string>& return s.str(); } -plic_t* plic_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& UNUSED sargs) +plic_t* plic_parse_from_fdt(const void* fdt, const sim_t* sim, reg_t* base, const std::vector<std::string>& sargs UNUSED) { uint32_t plic_ndev; if (fdt_parse_plic(fdt, base, &plic_ndev, "riscv,plic0") == 0 || diff --git a/riscv/processor.cc b/riscv/processor.cc index 2917153..6fe64ab 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -34,7 +34,8 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, const cfg_t *cfg, simif_t* sim, uint32_t id, bool halt_on_reset, FILE* log_file, std::ostream& sout_) -: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), sim(sim), id(id), xlen(0), +: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), + sim(sim), id(id), xlen(isa.get_max_xlen()), histogram_enabled(false), log_commits_enabled(false), log_file(log_file), sout_(sout_.rdbuf()), halt_on_reset(halt_on_reset), in_wfi(false), check_triggers_icount(false), @@ -62,7 +63,7 @@ processor_t::processor_t(const char* isa_str, const char* priv_str, VU.vstart_alu = 0; register_base_instructions(); - mmu = new mmu_t(sim, cfg->endianness, this); + mmu = new mmu_t(sim, cfg->endianness, this, cfg->cache_blocksz); disassembler = new disassembler_t(&isa); for (auto e : isa.get_extensions()) @@ -99,42 +100,6 @@ processor_t::~processor_t() delete disassembler; } -static void bad_option_string(const char *option, const char *value, - const char *msg) -{ - fprintf(stderr, "error: bad %s option '%s'. %s\n", option, value, msg); - abort(); -} - -static std::string get_string_token(std::string str, const char delimiter, size_t& pos) -{ - size_t _pos = pos; - while (pos < str.length() && str[pos] != delimiter) ++pos; - return str.substr(_pos, pos - _pos); -} - -static bool check_pow2(int val) -{ - return ((val & (val - 1))) == 0; -} - -static std::string strtolower(const char* str) -{ - std::string res; - for (const char *r = str; *r; r++) - res += std::tolower(*r); - return res; -} - -static int xlen_to_uxl(int xlen) -{ - if (xlen == 32) - return 1; - if (xlen == 64) - return 2; - abort(); -} - void state_t::reset(processor_t* const proc, reg_t max_isa) { pc = DEFAULT_RSTVEC; @@ -169,7 +134,7 @@ void processor_t::set_debug(bool value) debug = value; for (auto e : custom_extensions) - e.second->set_debug(value); + e.second->set_debug(value, *this); } void processor_t::set_histogram(bool value) @@ -180,6 +145,7 @@ void processor_t::set_histogram(bool value) void processor_t::enable_log_commits() { log_commits_enabled = true; + mmu->flush_tlb(); // the TLB caches this setting } void processor_t::reset() @@ -200,7 +166,7 @@ void processor_t::reset() for (auto e : custom_extensions) { // reset any extensions for (auto &csr: e.second->get_csrs(*this)) state.add_csr(csr->address, csr); - e.second->reset(); + e.second->reset(*this); } if (sim) @@ -258,10 +224,10 @@ void processor_t::set_mmu_capability(int cap) break; case IMPL_MMU_SV57: set_impl(IMPL_MMU_SV57, true); - // Fall through + [[fallthrough]]; case IMPL_MMU_SV48: set_impl(IMPL_MMU_SV48, true); - // Fall through + [[fallthrough]]; case IMPL_MMU_SV39: set_impl(IMPL_MMU_SV39, true); set_impl(IMPL_MMU, true); @@ -276,10 +242,79 @@ void processor_t::set_mmu_capability(int cap) } } +reg_t processor_t::select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const +{ + // nonstandard interrupts have highest priority + if (enabled_interrupts >> (IRQ_LCOF + 1)) + enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1); + // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI + else if (enabled_interrupts & MIP_MEIP) + enabled_interrupts = MIP_MEIP; + else if (enabled_interrupts & MIP_MSIP) + enabled_interrupts = MIP_MSIP; + else if (enabled_interrupts & MIP_MTIP) + enabled_interrupts = MIP_MTIP; + else if (enabled_interrupts & MIP_SEIP) + enabled_interrupts = MIP_SEIP; + else if (enabled_interrupts & MIP_SSIP) + enabled_interrupts = MIP_SSIP; + else if (enabled_interrupts & MIP_STIP) + enabled_interrupts = MIP_STIP; + else if (enabled_interrupts & MIP_LCOFIP) + enabled_interrupts = MIP_LCOFIP; + else if (enabled_interrupts & MIP_VSEIP) + enabled_interrupts = MIP_VSEIP; + else if (enabled_interrupts & MIP_VSSIP) + enabled_interrupts = MIP_VSSIP; + else if (enabled_interrupts & MIP_VSTIP) + enabled_interrupts = MIP_VSTIP; + + return enabled_interrupts; +} + +bool processor_t::is_handled_in_vs() +{ + reg_t pending_interrupts = state.mip->read() & state.mie->read(); + + const reg_t s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read(); + const reg_t vstopi = state.vstopi->read(); + const reg_t vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0; // SSIP -> VSSIP, etc + + // M-ints have higher priority over HS-ints and VS-ints + const reg_t mie = get_field(state.mstatus->read(), MSTATUS_MIE); + const reg_t m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie); + reg_t enabled_interrupts = pending_interrupts & ~state.mideleg->read() & -m_enabled; + if (enabled_interrupts == 0) { + // HS-ints have higher priority over VS-ints + const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read(); + const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE); + const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie); + enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled; + if (state.v && enabled_interrupts == 0) { + // VS-ints have least priority and can only be taken with virt enabled + const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie); + enabled_interrupts = vs_pending_interrupt & -vs_enabled; + if (enabled_interrupts) + return true; + } + } + return false; +} + void processor_t::take_interrupt(reg_t pending_interrupts) { + reg_t s_pending_interrupts = 0; + reg_t vstopi = 0; + reg_t vs_pending_interrupt = 0; + + if (extension_enable_table[EXT_SSAIA]) { + s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read(); + vstopi = state.vstopi->read(); + vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0; + } + // Do nothing if no pending interrupts - if (!pending_interrupts) { + if (!pending_interrupts && !s_pending_interrupts && !vs_pending_interrupt) { return; } @@ -295,46 +330,20 @@ void processor_t::take_interrupt(reg_t pending_interrupts) const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read(); const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE); const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie); - enabled_interrupts = pending_interrupts & deleg_to_hs & -hs_enabled; + enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled; if (state.v && enabled_interrupts == 0) { // VS-ints have least priority and can only be taken with virt enabled - const reg_t deleg_to_vs = state.hideleg->read(); const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie); - enabled_interrupts = pending_interrupts & deleg_to_vs & -vs_enabled; + enabled_interrupts = vs_pending_interrupt & -vs_enabled; } } const bool nmie = !(state.mnstatus && !get_field(state.mnstatus->read(), MNSTATUS_NMIE)); if (!state.debug_mode && nmie && enabled_interrupts) { - // nonstandard interrupts have highest priority - if (enabled_interrupts >> (IRQ_LCOF + 1)) - enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1); - // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI - else if (enabled_interrupts & MIP_MEIP) - enabled_interrupts = MIP_MEIP; - else if (enabled_interrupts & MIP_MSIP) - enabled_interrupts = MIP_MSIP; - else if (enabled_interrupts & MIP_MTIP) - enabled_interrupts = MIP_MTIP; - else if (enabled_interrupts & MIP_SEIP) - enabled_interrupts = MIP_SEIP; - else if (enabled_interrupts & MIP_SSIP) - enabled_interrupts = MIP_SSIP; - else if (enabled_interrupts & MIP_STIP) - enabled_interrupts = MIP_STIP; - else if (enabled_interrupts & MIP_LCOFIP) - enabled_interrupts = MIP_LCOFIP; - else if (enabled_interrupts & MIP_VSEIP) - enabled_interrupts = MIP_VSEIP; - else if (enabled_interrupts & MIP_VSSIP) - enabled_interrupts = MIP_VSSIP; - else if (enabled_interrupts & MIP_VSTIP) - enabled_interrupts = MIP_VSTIP; - else - abort(); + reg_t selected_interrupt = select_an_interrupt_with_default_priority(enabled_interrupts); if (check_triggers_icount) TM.detect_icount_match(); - throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(enabled_interrupts)); + throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(selected_interrupt)); } } @@ -362,7 +371,7 @@ void processor_t::set_privilege(reg_t prv, bool virt) state.v_changed = state.v != state.prev_v; } -const char* processor_t::get_privilege_string() +const char* processor_t::get_privilege_string() const { if (state.debug_mode) return "D"; @@ -438,7 +447,8 @@ void processor_t::take_trap(trap_t& t, reg_t epc) bool supv_double_trap = false; if (interrupt) { vsdeleg = (curr_virt && state.prv <= PRV_S) ? state.hideleg->read() : 0; - hsdeleg = (state.prv <= PRV_S) ? state.mideleg->read() : 0; + vsdeleg >>= 1; + hsdeleg = (state.prv <= PRV_S) ? (state.mideleg->read() | state.nonvirtual_sip->read()) : 0; bit &= ~((reg_t)1 << (max_xlen - 1)); } else { vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg->read() & state.hedeleg->read()) : 0; @@ -447,14 +457,17 @@ void processor_t::take_trap(trap_t& t, reg_t epc) // An unexpected trap - a trap when SDT is 1 - traps to M-mode if ((state.prv <= PRV_S && bit < max_xlen) && (((vsdeleg >> bit) & 1) || ((hsdeleg >> bit) & 1))) { - reg_t s = state.sstatus->read(); + // Trap is handled in VS-mode or HS-mode. Read the sstatus of the + // mode that will handle the trap based on the delegation control + reg_t s = (((vsdeleg >> bit) & 1)) ? state.sstatus->read() : + state.nonvirtual_sstatus->read(); supv_double_trap = get_field(s, MSTATUS_SDT); if (supv_double_trap) vsdeleg = hsdeleg = 0; } - if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) { + if ((state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) || (state.v && interrupt && is_handled_in_vs())) { // Handle the trap in VS-mode - const reg_t adjusted_cause = interrupt ? bit - 1 : bit; // VSSIP -> SSIP, etc + const reg_t adjusted_cause = bit; reg_t vector = (state.vstvec->read() & 1) && interrupt ? 4 * adjusted_cause : 0; state.pc = (state.vstvec->read() & ~(reg_t)1) + vector; state.vscause->write(adjusted_cause | (interrupt ? interrupt_bit : 0)); @@ -703,18 +716,17 @@ void processor_t::build_opcode_map() } void processor_t::register_extension(extension_t *x) { - for (auto insn : x->get_instructions()) + for (auto insn : x->get_instructions(*this)) register_custom_insn(insn); build_opcode_map(); - for (auto disasm_insn : x->get_disasms()) + for (auto disasm_insn : x->get_disasms(this)) disassembler->add_insn(disasm_insn); if (!custom_extensions.insert(std::make_pair(x->name(), x)).second) { fprintf(stderr, "extensions must have unique names (got two named \"%s\"!)\n", x->name()); abort(); } - x->set_processor(this); } void processor_t::register_base_instructions() @@ -739,21 +751,27 @@ void processor_t::register_base_instructions() #include "insn_list.h" #undef DEFINE_INSN + #define DEFINE_INSN_UNCOND(name) { \ + insn_desc_t insn = { \ + name##_match, \ + name##_mask, \ + fast_rv32i_##name, \ + fast_rv64i_##name, \ + fast_rv32e_##name, \ + fast_rv64e_##name, \ + logged_rv32i_##name, \ + logged_rv64i_##name, \ + logged_rv32e_##name, \ + logged_rv64e_##name \ + }; \ + register_base_insn(insn); \ + } + // add overlapping instructions first, in order #define DECLARE_OVERLAP_INSN(name, ext) \ name##_overlapping = true; \ if (isa.extension_enabled(ext)) \ - register_base_insn((insn_desc_t) { \ - name##_match, \ - name##_mask, \ - fast_rv32i_##name, \ - fast_rv64i_##name, \ - fast_rv32e_##name, \ - fast_rv64e_##name, \ - logged_rv32i_##name, \ - logged_rv64i_##name, \ - logged_rv32e_##name, \ - logged_rv64e_##name}); + DEFINE_INSN_UNCOND(name); #include "overlap_list.h" #undef DECLARE_OVERLAP_INSN @@ -762,19 +780,10 @@ void processor_t::register_base_instructions() // appear earlier to improve search time on opcode_cache misses. #define DEFINE_INSN(name) \ if (!name##_overlapping) \ - register_base_insn((insn_desc_t) { \ - name##_match, \ - name##_mask, \ - fast_rv32i_##name, \ - fast_rv64i_##name, \ - fast_rv32e_##name, \ - fast_rv64e_##name, \ - logged_rv32i_##name, \ - logged_rv64i_##name, \ - logged_rv32e_##name, \ - logged_rv64e_##name}); + DEFINE_INSN_UNCOND(name); #include "insn_list.h" #undef DEFINE_INSN + #undef DEFINE_INSN_UNCOND // terminate instruction list with a catch-all register_base_insn(insn_desc_t::illegal_instruction); @@ -813,6 +822,11 @@ bool processor_t::store(reg_t addr, size_t len, const uint8_t* bytes) return false; } +reg_t processor_t::size() +{ + return PGSIZE; +} + void processor_t::trigger_updated(const std::vector<triggers::trigger_t *> &triggers) { mmu->flush_tlb(); diff --git a/riscv/processor.h b/riscv/processor.h index 4f22cbd..a6e9eeb 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -70,6 +70,7 @@ typedef std::vector<std::tuple<reg_t, uint64_t, uint8_t>> commit_log_mem_t; // architectural state of a RISC-V hart struct state_t { + void add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg); void reset(processor_t* const proc, reg_t max_isa); void add_csr(reg_t addr, const csr_t_p& csr); @@ -96,6 +97,8 @@ struct state_t wide_counter_csr_t_p mcycle; mie_csr_t_p mie; mip_csr_t_p mip; + csr_t_p nonvirtual_sip; + csr_t_p nonvirtual_sie; csr_t_p medeleg; csr_t_p mideleg; csr_t_p mcounteren; @@ -109,6 +112,7 @@ struct state_t csr_t_p stvec; virtualized_csr_t_p satp; csr_t_p scause; + csr_t_p scountinhibit; // When taking a trap into HS-mode, we must access the nonvirtualized HS-mode CSRs directly: csr_t_p nonvirtual_stvec; @@ -172,6 +176,11 @@ struct state_t csr_t_p ssp; + csr_t_p mvien; + mvip_csr_t_p mvip; + csr_t_p hvictl; + csr_t_p vstopi; + bool serialized; // whether timer CSRs are in a well-defined state // When true, execute a single instruction and then enter debug mode. This @@ -248,8 +257,8 @@ public: FILE *log_file, std::ostream& sout_); // because of command line option --log and -s we need both ~processor_t(); - const isa_parser_t &get_isa() { return isa; } - const cfg_t &get_cfg() { return *cfg; } + const isa_parser_t &get_isa() const & { return isa; } + const cfg_t &get_cfg() const & { return *cfg; } void set_debug(bool value); void set_histogram(bool value); @@ -326,7 +335,7 @@ public: } reg_t legalize_privilege(reg_t); void set_privilege(reg_t, bool); - const char* get_privilege_string(); + const char* get_privilege_string() const; void update_histogram(reg_t pc); const disassembler_t* get_disassembler() { return disassembler; } @@ -341,14 +350,15 @@ public: void register_extension(extension_t*); // MMIO slave interface - bool load(reg_t addr, size_t len, uint8_t* bytes); - bool store(reg_t addr, size_t len, const uint8_t* bytes); + bool load(reg_t addr, size_t len, uint8_t* bytes) override; + bool store(reg_t addr, size_t len, const uint8_t* bytes) override; + reg_t size() override; // When true, display disassembly of each instruction that's executed. bool debug; // When true, take the slow simulation path. - bool slow_path(); - bool halted() { return state.debug_mode; } + bool slow_path() const; + bool halted() const { return state.debug_mode; } enum { HR_NONE, /* Halt request is inactive. */ HR_REGULAR, /* Regular halt request/debug interrupt. */ @@ -368,6 +378,8 @@ public: void check_if_lpad_required(); + reg_t select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const; + private: const isa_parser_t isa; const cfg_t * const cfg; @@ -400,6 +412,10 @@ private: static const size_t OPCODE_CACHE_SIZE = 4095; opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE]; + unsigned ziccid_flush_count = 0; + static const unsigned ZICCID_FLUSH_PERIOD = 10; + + bool is_handled_in_vs(); void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); } void take_interrupt(reg_t mask); // take first enabled interrupt in mask void take_trap(trap_t& t, reg_t epc); // take an exception diff --git a/riscv/rocc.cc b/riscv/rocc.cc index 53ee051..9ba4fc1 100644 --- a/riscv/rocc.cc +++ b/riscv/rocc.cc @@ -14,15 +14,15 @@ u.i = insn; \ reg_t xs1 = u.r.xs1 ? RS1 : -1; \ reg_t xs2 = u.r.xs2 ? RS2 : -1; \ - reg_t xd = rocc->custom##n(u.r, xs1, xs2); \ + reg_t xd = rocc->custom##n(p, u.r, xs1, xs2); \ if (u.r.xd) \ WRITE_RD(xd); \ return pc+4; \ } \ \ - reg_t rocc_t::custom##n(rocc_insn_t UNUSED insn, reg_t UNUSED xs1, reg_t UNUSED xs2) \ + reg_t rocc_t::custom##n(processor_t *p, rocc_insn_t UNUSED insn, reg_t UNUSED xs1, reg_t UNUSED xs2) \ { \ - illegal_instruction(); \ + illegal_instruction(*p); \ return 0; \ } @@ -31,25 +31,17 @@ customX(1) customX(2) customX(3) -std::vector<insn_desc_t> rocc_t::get_instructions() +std::vector<insn_desc_t> rocc_t::get_instructions(const processor_t &) { - std::vector<insn_desc_t> insns; - insns.push_back((insn_desc_t){0x0b, 0x7f, - &::illegal_instruction, c0, &::illegal_instruction, c0, - &::illegal_instruction, c0, &::illegal_instruction, c0}); - insns.push_back((insn_desc_t){0x2b, 0x7f, - &::illegal_instruction, c1, &::illegal_instruction, c1, - &::illegal_instruction, c1, &::illegal_instruction, c1}); - insns.push_back((insn_desc_t){0x5b, 0x7f, - &::illegal_instruction, c2, &::illegal_instruction, c2, - &::illegal_instruction, c2, &::illegal_instruction, c2}); - insns.push_back((insn_desc_t){0x7b, 0x7f, - &::illegal_instruction, c3, &::illegal_instruction, c3, - &::illegal_instruction, c0, &::illegal_instruction, c3}); + std::vector<insn_desc_t> insns = { + {0x0b, 0x7f, &::illegal_instruction, c0, &::illegal_instruction, c0, &::illegal_instruction, c0, &::illegal_instruction, c0}, + {0x2b, 0x7f, &::illegal_instruction, c1, &::illegal_instruction, c1, &::illegal_instruction, c1, &::illegal_instruction, c1}, + {0x5b, 0x7f, &::illegal_instruction, c2, &::illegal_instruction, c2, &::illegal_instruction, c2, &::illegal_instruction, c2}, + {0x7b, 0x7f, &::illegal_instruction, c3, &::illegal_instruction, c3, &::illegal_instruction, c0, &::illegal_instruction, c3}}; return insns; } -std::vector<disasm_insn_t*> rocc_t::get_disasms() +std::vector<disasm_insn_t *> rocc_t::get_disasms(const processor_t *) { std::vector<disasm_insn_t*> insns; return insns; diff --git a/riscv/rocc.h b/riscv/rocc.h index d65ec97..d7fee26 100644 --- a/riscv/rocc.h +++ b/riscv/rocc.h @@ -24,12 +24,12 @@ union rocc_insn_union_t class rocc_t : public extension_t { public: - virtual reg_t custom0(rocc_insn_t insn, reg_t xs1, reg_t xs2); - virtual reg_t custom1(rocc_insn_t insn, reg_t xs1, reg_t xs2); - virtual reg_t custom2(rocc_insn_t insn, reg_t xs1, reg_t xs2); - virtual reg_t custom3(rocc_insn_t insn, reg_t xs1, reg_t xs2); - std::vector<insn_desc_t> get_instructions(); - std::vector<disasm_insn_t*> get_disasms(); + virtual reg_t custom0(processor_t *, rocc_insn_t insn, reg_t xs1, reg_t xs2); + virtual reg_t custom1(processor_t *, rocc_insn_t insn, reg_t xs1, reg_t xs2); + virtual reg_t custom2(processor_t *, rocc_insn_t insn, reg_t xs1, reg_t xs2); + virtual reg_t custom3(processor_t *, rocc_insn_t insn, reg_t xs1, reg_t xs2); + std::vector<insn_desc_t> get_instructions(const processor_t &proc) override; + std::vector<disasm_insn_t *> get_disasms(const processor_t *proc = nullptr) override; }; #define define_custom_func(type_name, ext_name_str, func_name, method_name) \ diff --git a/riscv/sim.cc b/riscv/sim.cc index 81acb1c..388d729 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -96,7 +96,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, } #endif - debug_mmu = new mmu_t(this, cfg->endianness, NULL); + debug_mmu = new mmu_t(this, cfg->endianness, NULL, cfg->cache_blocksz); // When running without using a dtb, skip the fdt-based configuration steps if (!dtb_enabled) { @@ -137,7 +137,6 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, dtb = strstream.str(); dts = dtb_to_dts(dtb); } else { - std::pair<reg_t, reg_t> initrd_bounds = cfg->initrd_bounds; std::string device_nodes; for (const device_factory_sargs_t& factory_sargs: device_factories) { const device_factory_t* factory = factory_sargs.first; @@ -238,6 +237,8 @@ sim_t::sim_t(const cfg_t *cfg, bool halted, procs[cpu_idx]->set_mmu_capability(IMPL_MMU_SBARE); } + procs[cpu_idx]->reset(); + cpu_idx++; } @@ -404,10 +405,9 @@ void sim_t::set_rom() char* sim_t::addr_to_mem(reg_t paddr) { if (!paddr_ok(paddr)) return NULL; - auto desc = bus.find_device(paddr); + auto desc = bus.find_device(paddr >> PGSHIFT << PGSHIFT, PGSIZE); if (auto mem = dynamic_cast<abstract_mem_t*>(desc.second)) - if (paddr - desc.first < mem->size()) - return mem->contents(paddr - desc.first); + return mem->contents(paddr - desc.first); return NULL; } diff --git a/riscv/sim.h b/riscv/sim.h index ce5dc6f..da04a88 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -100,8 +100,13 @@ private: remote_bitbang_t* remote_bitbang; std::optional<std::function<void()>> next_interactive_action; - // memory-mapped I/O routines + // If padd corresponds to memory (as opposed to an I/O device), return a + // host pointer corresponding to paddr. + // For these purposes, only memories that include the entire base page + // surrounding paddr are considered; smaller memories are treated as I/O. virtual char* addr_to_mem(reg_t paddr) override; + + // memory-mapped I/O routines virtual bool mmio_load(reg_t paddr, size_t len, uint8_t* bytes) override; virtual bool mmio_store(reg_t paddr, size_t len, const uint8_t* bytes) override; void set_rom(); diff --git a/riscv/triggers.cc b/riscv/triggers.cc index e130a87..9c21330 100644 --- a/riscv/triggers.cc +++ b/riscv/triggers.cc @@ -52,7 +52,7 @@ void trigger_t::tdata3_write(processor_t * const proc, const reg_t val) noexcept mhselect = get_field(val, CSR_TEXTRA_MHSELECT(xlen)); sbytemask = get_field(val, CSR_TEXTRA_SBYTEMASK(xlen)); svalue = proc->extension_enabled_const('S') ? get_field(val, CSR_TEXTRA_SVALUE(xlen)) : 0; - sselect = (sselect_t)((proc->extension_enabled_const('S') && get_field(val, CSR_TEXTRA_SSELECT(xlen)) <= SSELECT_MAXVAL) ? get_field(val, CSR_TEXTRA_SSELECT(xlen)) : SSELECT_IGNORE); + sselect = (sselect_t)((proc->extension_enabled_const('S') && get_field(val, CSR_TEXTRA_SSELECT(xlen)) <= SSELECT_MAXVAL) ? get_field(val, CSR_TEXTRA_SSELECT(xlen)) : (reg_t)SSELECT_IGNORE); } static reg_t tcontrol_value(const state_t * state) { @@ -274,7 +274,10 @@ std::optional<match_result_t> mcontrol_common_t::detect_memory_access_match(proc mcontrol_common_t::match_t mcontrol_common_t::legalize_match(reg_t val, reg_t maskmax) noexcept { switch (val) { - case MATCH_NAPOT: if (maskmax == 0) return MATCH_EQUAL; + case MATCH_NAPOT: + if (maskmax == 0) + return MATCH_EQUAL; + [[fallthrough]]; case MATCH_EQUAL: case MATCH_GE: case MATCH_LT: @@ -675,4 +678,4 @@ reg_t module_t::tinfo_read(unsigned UNUSED index) const noexcept (CSR_TINFO_VERSION_1 << CSR_TINFO_VERSION_OFFSET); } -}; +} diff --git a/riscv/triggers.h b/riscv/triggers.h index 3f1e86f..60ee5ca 100644 --- a/riscv/triggers.h +++ b/riscv/triggers.h @@ -301,6 +301,6 @@ private: std::vector<trigger_t *> triggers; }; -}; +} #endif diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 3e7dc45..1e33232 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -8,16 +8,10 @@ // // vector: masking skip helper // -#define VI_MASK_VARS \ - const int midx = i / 64; \ - const int mpos = i % 64; - #define VI_LOOP_ELEMENT_SKIP(BODY) \ - VI_MASK_VARS \ if (insn.v_vm() == 0) { \ BODY; \ - bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \ - if (skip) { \ + if (!P.VU.mask_elt(0, i)) { \ continue; \ } \ } @@ -206,7 +200,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, require_vector(true); \ reg_t vl = P.VU.vl->read(); \ reg_t UNUSED sew = P.VU.vsew; \ - reg_t rd_num = insn.rd(); \ + reg_t UNUSED rd_num = insn.rd(); \ reg_t UNUSED rs1_num = insn.rs1(); \ reg_t rs2_num = insn.rs2(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { @@ -231,24 +225,18 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_LOOP_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ - VI_MASK_VARS \ - auto v0 = P.VU.elt<uint64_t>(0, midx); \ - const uint64_t mmask = UINT64_C(1) << mpos; \ const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \ - uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; \ - uint128_t res = 0; \ - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); + uint64_t carry = insn.v_vm() == 0 ? P.VU.mask_elt(0, i) : 0; \ + bool res = false; #define VI_LOOP_CARRY_END \ - vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ } \ P.VU.vstart->write(0); #define VI_LOOP_WITH_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ - VI_MASK_VARS \ - auto &v0 = P.VU.elt<uint64_t>(0, midx); \ const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \ - uint64_t carry = (v0 >> mpos) & 0x1; + uint64_t carry = P.VU.mask_elt(0, i); #define VI_LOOP_CMP_BASE \ require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ @@ -260,12 +248,10 @@ static inline bool is_overlapped_widen(const int astart, int asize, reg_t rs2_num = insn.rs2(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ - uint64_t res = 0; + bool res = false; #define VI_LOOP_CMP_END \ - vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ } \ P.VU.vstart->write(0); @@ -274,13 +260,9 @@ static inline bool is_overlapped_widen(const int astart, int asize, require_vector(true); \ reg_t vl = P.VU.vl->read(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ - int midx = i / 64; \ - int mpos = i % 64; \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \ - uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \ - uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ - res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + bool vs2 = P.VU.mask_elt(insn.rs2(), i); \ + bool vs1 = P.VU.mask_elt(insn.rs1(), i); \ + P.VU.set_mask_elt(insn.rd(), i, (op)); \ } \ P.VU.vstart->write(0); @@ -354,7 +336,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_PARAMS(x) \ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \ - type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ + type_sew_t<x>::type UNUSED simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); #define XV_PARAMS(x) \ @@ -454,7 +436,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VFP_VF_CMP_PARAMS(width) \ float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \ - float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + float##width##_t UNUSED vs2 = P.VU.elt<float##width##_t>(rs2_num, i); #define VFP_VF_PARAMS(width) \ float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ @@ -523,8 +505,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, // merge and copy loop #define VI_MERGE_VARS \ - VI_MASK_VARS \ - bool UNUSED use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool UNUSED use_first = P.VU.mask_elt(0, i); #define VI_MERGE_LOOP_BASE \ VI_GENERAL_LOOP_BASE \ @@ -1181,25 +1162,6 @@ VI_VX_ULOOP({ \ #define VI_STRIP(inx) \ reg_t vreg_inx = inx; -#define VI_DUPLICATE_VREG(reg_num, idx_sew) \ -reg_t index[P.VU.vlmax]; \ - for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl->read() != 0; ++i) { \ - switch (idx_sew) { \ - case e8: \ - index[i] = P.VU.elt<uint8_t>(reg_num, i); \ - break; \ - case e16: \ - index[i] = P.VU.elt<uint16_t>(reg_num, i); \ - break; \ - case e32: \ - index[i] = P.VU.elt<uint32_t>(reg_num, i); \ - break; \ - case e64: \ - index[i] = P.VU.elt<uint64_t>(reg_num, i); \ - break; \ - } \ -} - #define VI_LD(stride, offset, elt_width, is_mask_ldst) \ const reg_t nf = insn.v_nf() + 1; \ VI_CHECK_LOAD(elt_width, is_mask_ldst); \ @@ -1218,6 +1180,23 @@ reg_t index[P.VU.vlmax]; \ } \ P.VU.vstart->write(0); +#define VI_LDST_GET_INDEX(elt_width) \ + reg_t index; \ + switch (elt_width) { \ + case e8: \ + index = P.VU.elt<uint8_t>(insn.rs2(), i); \ + break; \ + case e16: \ + index = P.VU.elt<uint16_t>(insn.rs2(), i); \ + break; \ + case e32: \ + index = P.VU.elt<uint32_t>(insn.rs2(), i); \ + break; \ + case e64: \ + index = P.VU.elt<uint64_t>(insn.rs2(), i); \ + break; \ + } \ + #define VI_LD_INDEX(elt_width, is_seg) \ const reg_t nf = insn.v_nf() + 1; \ VI_CHECK_LD_INDEX(elt_width); \ @@ -1226,8 +1205,8 @@ reg_t index[P.VU.vlmax]; \ const reg_t vd = insn.rd(); \ if (!is_seg) \ require(nf == 1); \ - VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ + VI_LDST_GET_INDEX(elt_width); \ VI_ELEMENT_SKIP; \ VI_STRIP(i); \ P.VU.vstart->write(i); \ @@ -1235,19 +1214,19 @@ reg_t index[P.VU.vlmax]; \ switch (P.VU.vsew) { \ case e8: \ P.VU.elt<uint8_t>(vd + fn * flmul, vreg_inx, true) = \ - MMU.load<uint8_t>(baseAddr + index[i] + fn * 1); \ + MMU.load<uint8_t>(baseAddr + index + fn * 1); \ break; \ case e16: \ P.VU.elt<uint16_t>(vd + fn * flmul, vreg_inx, true) = \ - MMU.load<uint16_t>(baseAddr + index[i] + fn * 2); \ + MMU.load<uint16_t>(baseAddr + index + fn * 2); \ break; \ case e32: \ P.VU.elt<uint32_t>(vd + fn * flmul, vreg_inx, true) = \ - MMU.load<uint32_t>(baseAddr + index[i] + fn * 4); \ + MMU.load<uint32_t>(baseAddr + index + fn * 4); \ break; \ default: \ P.VU.elt<uint64_t>(vd + fn * flmul, vreg_inx, true) = \ - MMU.load<uint64_t>(baseAddr + index[i] + fn * 8); \ + MMU.load<uint64_t>(baseAddr + index + fn * 8); \ break; \ } \ } \ @@ -1280,27 +1259,27 @@ reg_t index[P.VU.vlmax]; \ const reg_t vs3 = insn.rd(); \ if (!is_seg) \ require(nf == 1); \ - VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ + VI_LDST_GET_INDEX(elt_width); \ VI_STRIP(i) \ VI_ELEMENT_SKIP; \ P.VU.vstart->write(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ switch (P.VU.vsew) { \ case e8: \ - MMU.store<uint8_t>(baseAddr + index[i] + fn * 1, \ + MMU.store<uint8_t>(baseAddr + index + fn * 1, \ P.VU.elt<uint8_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ case e16: \ - MMU.store<uint16_t>(baseAddr + index[i] + fn * 2, \ + MMU.store<uint16_t>(baseAddr + index + fn * 2, \ P.VU.elt<uint16_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ case e32: \ - MMU.store<uint32_t>(baseAddr + index[i] + fn * 4, \ + MMU.store<uint32_t>(baseAddr + index + fn * 4, \ P.VU.elt<uint32_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ default: \ - MMU.store<uint64_t>(baseAddr + index[i] + fn * 8, \ + MMU.store<uint64_t>(baseAddr + index + fn * 8, \ P.VU.elt<uint64_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ } \ @@ -1484,9 +1463,7 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \ - uint64_t res = 0; + bool res = false; #define VI_VFP_LOOP_REDUCTION_BASE(width) \ float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \ @@ -1564,7 +1541,7 @@ reg_t index[P.VU.vlmax]; \ case e16: \ case e32: \ case e64: { \ - vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ break; \ } \ default: \ diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index a5c35f8..7c6633c 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -29,10 +29,11 @@ void vectorUnit_t::vectorUnit_t::reset() reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newType) { - int new_vlmul = 0; if (vtype->read() != newType) { + int new_vlmul = int8_t(extract64(newType, 0, 3) << 5) >> 5; + auto old_vlmax = vlmax; + vsew = 1 << (extract64(newType, 3, 3) + 3); - new_vlmul = int8_t(extract64(newType, 0, 3) << 5) >> 5; vflmul = new_vlmul >= 0 ? 1 << new_vlmul : 1.0 / (1 << -new_vlmul); vlmax = (VLEN/vsew) * vflmul; vta = extract64(newType, 6, 1); @@ -40,7 +41,8 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new vill = !(vflmul >= 0.125 && vflmul <= 8) || vsew > std::min(vflmul, 1.0f) * ELEN - || (newType >> 8) != 0; + || (newType >> 8) != 0 + || (rd == 0 && rs1 == 0 && old_vlmax != vlmax); if (vill) { vlmax = 0; @@ -54,7 +56,7 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new if (vlmax == 0) { vl->write_raw(0); } else if (rd == 0 && rs1 == 0) { - vl->write_raw(std::min(vl->read(), vlmax)); + ; // retain current VL } else if (rd != 0 && rs1 == 0) { vl->write_raw(vlmax); } else if (rs1 != 0) { diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h index a057c62..0e80618 100644 --- a/riscv/vector_unit.h +++ b/riscv/vector_unit.h @@ -108,6 +108,17 @@ public: template<typename EG> EG& elt_group(reg_t vReg, reg_t n, bool is_write = false); + bool mask_elt(reg_t vReg, reg_t n) + { + return (elt<uint8_t>(vReg, n / 8) >> (n % 8)) & 1; + } + + void set_mask_elt(reg_t vReg, reg_t n, bool value) + { + auto& e = elt<uint8_t>(vReg, n / 8, true); + e = (e & ~(1U << (n % 8))) | (value << (n % 8)); + } + public: void reset(); diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index f094629..702ad91 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -86,6 +86,32 @@ // (LMUL * VLEN) <= EGW #define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul)) +// Ensures that a register index is aligned to EMUL +// evaluated as EGW / VLEN. +// The check is only enabled if this value is greater +// than one (no index alignment check required for fractional EMUL) +#define require_vreg_align_eglmul(EGW, VREG_NUM) \ + do { \ + float vfeglmul = EGW / P.VU.VLEN; \ + if (vfeglmul > 1) { \ + require_align(VREG_NUM, vfeglmul); \ + }\ + } while (0) + +#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2()) + +// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide +// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register +// group. +// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits. +#define require_noover_eglmul(vd, vs2) \ + do { \ + int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \ + int aligned_vd = vd / vd_emul; \ + int aligned_vs2 = vs2 / vd_emul; \ + require(aligned_vd != aligned_vs2); \ + } while (0) + // Checks that the vector unit state (vtype and vl) can be interpreted // as element groups with EEW=32, EGS=4 (four 32-bits elements per group), // for an effective element group width of EGW=128 bits. diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h index db705c7..d94ddc2 100644 --- a/riscv/zvkned_ext_macros.h +++ b/riscv/zvkned_ext_macros.h @@ -2,6 +2,7 @@ // the RISC-V Zvkned extension (vector AES single round). #include "insns/aes_common.h" +#include "zvk_ext_macros.h" #ifndef RISCV_ZVKNED_EXT_MACROS_H_ #define RISCV_ZVKNED_EXT_MACROS_H_ @@ -9,16 +10,22 @@ // vaes*.vs instruction constraints: // - Zvkned is enabled // - EGW (128) <= LMUL * VLEN +// - vd is LMUL aligned +// - vs2 is ceil(EGW / VLEN) aligned // - vd and vs2 cannot overlap // // The constraint that vstart and vl are both EGS (4) aligned // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vaes_vs_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ - require(insn.rd() != insn.rs2()); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vs2_align_eglmul(128); \ + require_noover_eglmul(insn.rd(), insn.rs2()); \ } while (false) // vaes*.vv instruction constraints. Those are the same as the .vs ones, @@ -30,17 +37,24 @@ // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vaes_vv_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + VI_CHECK_SSS(false) \ } while (false) // vaeskf*.vi instruction constraints. Those are the same as the .vv ones. #define require_vaeskf_vi_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvkned; \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + VI_CHECK_SSS(false) \ } while (false) #define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0)) diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h index b50818b..98236b0 100644 --- a/riscv/zvknh_ext_macros.h +++ b/riscv/zvknh_ext_macros.h @@ -15,6 +15,7 @@ // macros. #define require_vsha2_common_constraints \ do { \ + VI_CHECK_SSS(true) \ require(P.VU.vsew == 32 || P.VU.vsew == 64); \ require(insn.rd() != insn.rs1()); \ require(insn.rd() != insn.rs2()); \ diff --git a/riscv/zvksed_ext_macros.h b/riscv/zvksed_ext_macros.h index 46e399b..3ffa272 100644 --- a/riscv/zvksed_ext_macros.h +++ b/riscv/zvksed_ext_macros.h @@ -16,9 +16,12 @@ // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. #define require_vsm4_constraints \ do { \ + const uint32_t EGS = 4; \ require_zvksed; \ require(P.VU.vsew == 32); \ require_egw_fits(128); \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ } while (false) // Returns a uint32_t value constructed from the 4 bytes (uint8_t) diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h index 71c5a09..c4549da 100644 --- a/riscv/zvksh_ext_macros.h +++ b/riscv/zvksh_ext_macros.h @@ -16,9 +16,12 @@ // is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros. #define require_vsm3_constraints \ do { \ + const uint32_t EGS = 8; \ require_zvksh; \ require(P.VU.vsew == 32); \ require_egw_fits(256); \ + require(P.VU.vstart->read() % EGS == 0); \ + require(P.VU.vl->read() % EGS == 0); \ require(insn.rd() != insn.rs2()); \ } while (false) diff --git a/softfloat/fall_maxmin.c b/softfloat/fall_maxmin.c index 8d1196e..f1efa87 100644 --- a/softfloat/fall_maxmin.c +++ b/softfloat/fall_maxmin.c @@ -72,13 +72,13 @@ float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _ } \ } -COMPARE_MAX(a, b, 16); -COMPARE_MAX(a, b, 32); -COMPARE_MAX(a, b, 64); +COMPARE_MAX(a, b, 16) +COMPARE_MAX(a, b, 32) +COMPARE_MAX(a, b, 64) -COMPARE_MIN(a, b, 16); -COMPARE_MIN(a, b, 32); -COMPARE_MIN(a, b, 64); +COMPARE_MIN(a, b, 16) +COMPARE_MIN(a, b, 32) +COMPARE_MIN(a, b, 64) bfloat16_t bf16_max( bfloat16_t a, bfloat16_t b ) { diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in index 0a38404..899f00a 100644 --- a/softfloat/softfloat.mk.in +++ b/softfloat/softfloat.mk.in @@ -252,3 +252,5 @@ softfloat_test_srcs = softfloat_install_hdrs = \ softfloat.h \ softfloat_types.h \ + +softfloat_CFLAGS = -Wno-sign-compare -Wno-implicit-fallthrough diff --git a/spike_main/spike-log-parser.cc b/spike_main/spike-log-parser.cc index 2c9a543..21166ad 100644 --- a/spike_main/spike-log-parser.cc +++ b/spike_main/spike-log-parser.cc @@ -38,7 +38,6 @@ int main(int UNUSED argc, char** argv) std::regex reg("^core\\s+\\d+:\\s+0x[0-9a-f]+\\s+\\(0x([0-9a-f]+)\\)", std::regex_constants::icase); std::smatch m; - std::ssub_match sm ; while (getline(cin,s)){ if (regex_search(s, m, reg)){ diff --git a/spike_main/spike.cc b/spike_main/spike.cc index b8a1b5c..3b0e004 100644 --- a/spike_main/spike.cc +++ b/spike_main/spike.cc @@ -451,6 +451,7 @@ int main(int argc, char** argv) min_blocksz, max_blocksz); exit(-1); } + cfg.cache_blocksz = blocksz; }); parser.option(0, "instructions", 1, [&](const char* s){ instructions = strtoull(s, 0, 0); @@ -541,7 +542,6 @@ int main(int argc, char** argv) if (dc) s.get_core(i)->get_mmu()->register_memtracer(&*dc); for (auto e : extensions) s.get_core(i)->register_extension(e()); - s.get_core(i)->get_mmu()->set_cache_blocksz(blocksz); } s.set_debug(debug); |