46 files changed, 766 insertions, 136 deletions
diff --git a/README.md b/README.md
index b930631..d857cc0 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@ Spike supports the following RISC-V ISA features:
   - Zicond extension, v1.0
   - Zilsd extension, v1.0
   - Zclsd extension, v1.0
+  - Zimop extension, v1.0
 
 Versioning and APIs
 -------------------
diff --git a/ci-tests/atomics.c b/ci-tests/atomics.c
new file mode 100644
index 0000000..ece5a38
--- /dev/null
+++ b/ci-tests/atomics.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdatomic.h>
+
+atomic_int acnt = 0;
+atomic_int bcnt = 0;
+
+int foo() {
+  for(int n = 0; n < 1000; ++n) {
+    ++acnt;
+    if(acnt % 10 == 0)
+      ++bcnt;
+  }
+  return acnt;
+}
+
+int main(void) {
+  int acnt = foo();
+  printf("First atomic counter is %u, second is %u\n", acnt, bcnt);
+  return 0;
+}
diff --git a/ci-tests/create-ci-binary-tarball b/ci-tests/create-ci-binary-tarball
index 73a549e..1080d0a 100755
--- a/ci-tests/create-ci-binary-tarball
+++ b/ci-tests/create-ci-binary-tarball
@@ -20,10 +20,16 @@ mkdir -p build/dummycsr && cd "$_"
 riscv64-unknown-elf-gcc -O2 -o customcsr `git rev-parse --show-toplevel`/ci-tests/customcsr.c
 cd -
 
+mkdir -p build/atomics && cd "$_"
+riscv64-unknown-elf-gcc -O2 -o atomics `git rev-parse --show-toplevel`/ci-tests/atomics.c
+cd -
+
+
 mv build/pk/pk .
 mv build/hello/hello .
 mv build/dummy-slliuw/dummy-slliuw .
 mv build/dummycsr/customcsr .
-tar -cf spike-ci.tar pk hello dummy-slliuw customcsr
+mv build/atomics/atomics .
+tar -cf spike-ci.tar pk hello dummy-slliuw customcsr atomics
 
-rm pk hello dummy-slliuw customcsr
+rm pk hello dummy-slliuw customcsr atomics
diff --git a/ci-tests/test-spike b/ci-tests/test-spike
index 36b748a..6fe5bdb 100755
--- a/ci-tests/test-spike
+++ b/ci-tests/test-spike
@@ -11,6 +11,7 @@ cd run
 wget https://github.com/riscv-software-src/riscv-isa-sim/releases/download/dummy-tag-for-ci-storage/spike-ci.tar
 tar xf spike-ci.tar
 time ../install/bin/spike --isa=rv64gc pk hello | grep "Hello, world!  Pi is approximately 3.141588."
+../install/bin/spike --log-commits --isa=rv64gc pk atomics | grep "First atomic counter is 1000, second is 100"
 
 # check that including sim.h in an external project works
 g++ -std=c++2a -I../install/include -L../install/lib $DIR/testlib.cc -lriscv -o test-libriscv
diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc
index baedc3f..24eb5f2 100644
--- a/disasm/isa_parser.cc
+++ b/disasm/isa_parser.cc
@@ -140,6 +140,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
       // HINTs encoded in base-ISA instructions are always present.
     } else if (ext_str == "zihintntl") {
       // HINTs encoded in base-ISA instructions are always present.
+    } else if (ext_str == "ziccid") {
+      extension_table[EXT_ZICCID] = true;
+    } else if (ext_str == "ziccif") {
+      // aligned instruction fetch is always atomic in Spike
     } else if (ext_str == "zaamo") {
       extension_table[EXT_ZAAMO] = true;
     } else if (ext_str == "zalrsc") {
@@ -386,6 +390,14 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
       extension_table[EXT_SSDBLTRP] = true;
     } else if (ext_str == "smdbltrp") {
       extension_table[EXT_SMDBLTRP] = true;
+    } else if (ext_str == "smaia") {
+      extension_table[EXT_SMAIA] = true;
+      extension_table[EXT_SSAIA] = true;
+      extension_table[EXT_SMCSRIND] = true;
+      extension_table[EXT_SSCSRIND] = true;
+    } else if (ext_str == "ssaia") {
+      extension_table[EXT_SSAIA] = true;
+      extension_table[EXT_SSCSRIND] = true;
     } else if (ext_str[0] == 'x') {
       extension_table['X'] = true;
       if (ext_str.size() == 1) {
diff --git a/riscv/cfg.cc b/riscv/cfg.cc
index 2f9a229..cc39a54 100644
--- a/riscv/cfg.cc
+++ b/riscv/cfg.cc
@@ -47,4 +47,5 @@ cfg_t::cfg_t()
   explicit_hartids = false;
   real_time_clint  = false;
   trigger_count    = 4;
+  cache_blocksz    = 64;
 }
diff --git a/riscv/cfg.h b/riscv/cfg.h
index 388030b..8032856 100644
--- a/riscv/cfg.h
+++ b/riscv/cfg.h
@@ -78,6 +78,7 @@ public:
   bool                    explicit_hartids;
   bool                    real_time_clint;
   reg_t                   trigger_count;
+  reg_t                   cache_blocksz;
   std::optional<abstract_sim_if_t*> external_simulator;
 
   size_t nprocs() const { return hartids.size(); }
diff --git a/riscv/csr_init.cc b/riscv/csr_init.cc
index cabb7c2..0acd1c7 100644
--- a/riscv/csr_init.cc
+++ b/riscv/csr_init.cc
@@ -12,6 +12,24 @@ void state_t::add_csr(reg_t addr, const csr_t_p& csr)
 #define add_supervisor_csr(addr, csr) add_const_ext_csr('S', addr, csr)
 #define add_hypervisor_csr(addr, csr) add_ext_csr('H', addr, csr)
 
+void state_t::add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg)
+{
+  // This assumes xlen is always max_xlen, which is true today (see
+  // mstatus_csr_t::unlogged_write()):
+  auto xlen = proc->get_isa().get_max_xlen();
+
+  const reg_t iprio0_addr = 0x30;
+  for (int i=0; i<16; i+=2) {
+    csr_t_p iprio = std::make_shared<aia_csr_t>(proc, iprio0_addr + i, 0, 0);
+    if (xlen == 32) {
+      ireg->add_ireg_proxy(iprio0_addr + i, std::make_shared<rv32_low_csr_t>(proc, iprio0_addr + i, iprio));
+      ireg->add_ireg_proxy(iprio0_addr + i + 1, std::make_shared<rv32_high_csr_t>(proc, iprio0_addr + i + 1, iprio));
+    } else {
+      ireg->add_ireg_proxy(iprio0_addr + i, iprio);
+    }
+  }
+}
+
 void state_t::csr_init(processor_t* const proc, reg_t max_isa)
 {
   // This assumes xlen is always max_xlen, which is true today (see
@@ -87,8 +105,17 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
     }
   }
   add_const_ext_csr(EXT_SSCOFPMF, CSR_SCOUNTOVF, std::make_shared<scountovf_csr_t>(proc, CSR_SCOUNTOVF));
-  add_csr(CSR_MIE, mie = std::make_shared<mie_csr_t>(proc, CSR_MIE));
-  add_csr(CSR_MIP, mip = std::make_shared<mip_csr_t>(proc, CSR_MIP));
+  mie = std::make_shared<mie_csr_t>(proc, CSR_MIE);
+  mip = std::make_shared<mip_csr_t>(proc, CSR_MIP);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) {
+    add_csr(CSR_MIE, std::make_shared<rv32_low_csr_t>(proc, CSR_MIE, mie));
+    add_csr(CSR_MIEH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIEH, mie));
+    add_csr(CSR_MIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MIP, mip));
+    add_csr(CSR_MIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MIPH, mip));
+  } else {
+    add_csr(CSR_MIE, mie);
+    add_csr(CSR_MIP, mip);
+  }
   auto sip_sie_accr = std::make_shared<generic_int_accessor_t>(
     this,
     ~MIP_HS_MASK,  // read_mask
@@ -116,21 +143,49 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
     1              // shiftamt
   );
 
-  auto nonvirtual_sip = std::make_shared<mip_proxy_csr_t>(proc, CSR_SIP, sip_sie_accr);
+  nonvirtual_sip = std::make_shared<sip_csr_t>(proc, CSR_SIP, sip_sie_accr);
   auto vsip = std::make_shared<mip_proxy_csr_t>(proc, CSR_VSIP, vsip_vsie_accr);
-  add_hypervisor_csr(CSR_VSIP, vsip);
-  add_supervisor_csr(CSR_SIP, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip));
+  auto sip = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sip, vsip);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+    add_hypervisor_csr(CSR_VSIP, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIP, vsip));
+    add_hypervisor_csr(CSR_VSIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIPH, vsip));
+    add_supervisor_csr(CSR_SIP, std::make_shared<rv32_low_csr_t>(proc, CSR_SIP, sip));
+    add_supervisor_csr(CSR_SIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIPH, sip));
+  } else {
+    add_hypervisor_csr(CSR_VSIP, vsip);
+    add_supervisor_csr(CSR_SIP, sip);
+  }
   add_hypervisor_csr(CSR_HIP, std::make_shared<mip_proxy_csr_t>(proc, CSR_HIP, hip_hie_accr));
-  add_hypervisor_csr(CSR_HVIP, hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0));
+  hvip = std::make_shared<hvip_csr_t>(proc, CSR_HVIP, 0);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+    add_hypervisor_csr(CSR_HVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIP, hvip));
+    add_hypervisor_csr(CSR_HVIPH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HVIPH, hvip));
+  } else {
+    add_hypervisor_csr(CSR_HVIP, hvip);
+  }
 
-  auto nonvirtual_sie = std::make_shared<mie_proxy_csr_t>(proc, CSR_SIE, sip_sie_accr);
+  nonvirtual_sie = std::make_shared<sie_csr_t>(proc, CSR_SIE, sip_sie_accr);
   auto vsie = std::make_shared<mie_proxy_csr_t>(proc, CSR_VSIE, vsip_vsie_accr);
-  add_hypervisor_csr(CSR_VSIE, vsie);
-  add_supervisor_csr(CSR_SIE, std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie));
+  auto sie = std::make_shared<virtualized_csr_t>(proc, nonvirtual_sie, vsie);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+    add_hypervisor_csr(CSR_VSIE, std::make_shared<rv32_low_csr_t>(proc, CSR_VSIE, vsie));
+    add_hypervisor_csr(CSR_VSIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_VSIEH, vsie));
+    add_supervisor_csr(CSR_SIE, std::make_shared<rv32_low_csr_t>(proc, CSR_SIE, sie));
+    add_supervisor_csr(CSR_SIEH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_SIEH, sie));
+  } else {
+    add_hypervisor_csr(CSR_VSIE, vsie);
+    add_supervisor_csr(CSR_SIE, sie);
+  }
   add_hypervisor_csr(CSR_HIE, std::make_shared<mie_proxy_csr_t>(proc, CSR_HIE, hip_hie_accr));
 
   add_supervisor_csr(CSR_MEDELEG, medeleg = std::make_shared<medeleg_csr_t>(proc, CSR_MEDELEG));
-  add_supervisor_csr(CSR_MIDELEG, mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG));
+  mideleg = std::make_shared<mideleg_csr_t>(proc, CSR_MIDELEG);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SMAIA)) {
+    add_supervisor_csr(CSR_MIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_MIDELEG, mideleg));
+    add_supervisor_csr(CSR_MIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_MIDELEGH, mideleg));
+  } else {
+    add_supervisor_csr(CSR_MIDELEG, mideleg);
+  }
   const reg_t counteren_mask = (proc->extension_enabled_const(EXT_ZICNTR) ? 0x7UL : 0x0) | (proc->extension_enabled_const(EXT_ZIHPM) ? 0xfffffff8ULL : 0x0);
   add_user_csr(CSR_MCOUNTEREN, mcounteren = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTEREN, counteren_mask, 0));
   add_csr(CSR_MCOUNTINHIBIT, mcountinhibit = std::make_shared<masked_csr_t>(proc, CSR_MCOUNTINHIBIT, counteren_mask & (~MCOUNTEREN_TIME), 0));
@@ -162,7 +217,13 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
   add_hypervisor_csr(CSR_HSTATUS, hstatus = std::make_shared<hstatus_csr_t>(proc, CSR_HSTATUS));
   add_hypervisor_csr(CSR_HGEIE, std::make_shared<const_csr_t>(proc, CSR_HGEIE, 0));
   add_hypervisor_csr(CSR_HGEIP, std::make_shared<const_csr_t>(proc, CSR_HGEIP, 0));
-  add_hypervisor_csr(CSR_HIDELEG, hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg));
+  hideleg = std::make_shared<hideleg_csr_t>(proc, CSR_HIDELEG, mideleg);
+  if (xlen == 32 && proc->extension_enabled_const(EXT_SSAIA)) {
+    add_hypervisor_csr(CSR_HIDELEG, std::make_shared<rv32_low_csr_t>(proc, CSR_HIDELEG, hideleg));
+    add_hypervisor_csr(CSR_HIDELEGH, std::make_shared<aia_rv32_high_csr_t>(proc, CSR_HIDELEGH, hideleg));
+  } else {
+    add_hypervisor_csr(CSR_HIDELEG, hideleg);
+  }
   const reg_t hedeleg_mask =
     (1 << CAUSE_MISALIGNED_FETCH) |
     (1 << CAUSE_FETCH_ACCESS) |
@@ -251,7 +312,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
                             (proc->extension_enabled(EXT_ZICFILP) ? MENVCFG_LPE : 0) |
                             (proc->extension_enabled(EXT_ZICFISS) ? MENVCFG_SSE : 0) |
                             (proc->extension_enabled(EXT_SSDBLTRP) ? MENVCFG_DTE : 0)|
-                            (proc->extension_enabled(EXT_SMCSRIND) ? MENVCFG_CDE : 0);
+                            (proc->extension_enabled(EXT_SMCDELEG) ? MENVCFG_CDE : 0);
   menvcfg = std::make_shared<envcfg_csr_t>(proc, CSR_MENVCFG, menvcfg_mask, 0);
   if (xlen == 32) {
     add_user_csr(CSR_MENVCFG, std::make_shared<rv32_low_csr_t>(proc, CSR_MENVCFG, menvcfg));
@@ -285,7 +346,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
     const reg_t sstateen0_mask = (proc->extension_enabled(EXT_ZFINX) ? SSTATEEN0_FCSR : 0) |
                                  (proc->extension_enabled(EXT_ZCMT) ? SSTATEEN0_JVT : 0) |
                                  SSTATEEN0_CS;
-    const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN;
+    const reg_t hstateen0_mask = sstateen0_mask | HSTATEEN0_CSRIND | HSTATEEN0_SENVCFG | HSTATEEN_SSTATEEN;
     const reg_t mstateen0_mask = hstateen0_mask | (proc->extension_enabled(EXT_SSQOSID) ?  MSTATEEN0_PRIV114 : 0);
     for (int i = 0; i < 4; i++) {
       const reg_t mstateen_mask = i == 0 ? mstateen0_mask : MSTATEEN_HSTATEEN;
@@ -321,7 +382,7 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
   if (proc->extension_enabled_const(EXT_SSTC)) {
     stimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_STIMECMP, MIP_STIP);
     vstimecmp = std::make_shared<stimecmp_csr_t>(proc, CSR_VSTIMECMP, MIP_VSTIP);
-    auto virtualized_stimecmp = std::make_shared<virtualized_stimecmp_csr_t>(proc, stimecmp, vstimecmp);
+    auto virtualized_stimecmp = std::make_shared<virtualized_with_special_permission_csr_t>(proc, stimecmp, vstimecmp);
     if (xlen == 32) {
       add_supervisor_csr(CSR_STIMECMP, std::make_shared<rv32_low_csr_t>(proc, CSR_STIMECMP, virtualized_stimecmp));
       add_supervisor_csr(CSR_STIMECMPH, std::make_shared<rv32_high_csr_t>(proc, CSR_STIMECMPH, virtualized_stimecmp));
@@ -348,20 +409,30 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
     csr_t_p miselect = std::make_shared<basic_csr_t>(proc, CSR_MISELECT, 0);
     add_csr(CSR_MISELECT, miselect);
 
-    const reg_t mireg_csrs[] = { CSR_MIREG, CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 };
+    sscsrind_reg_csr_t::sscsrind_reg_csr_t_p mireg;
+    add_csr(CSR_MIREG, mireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_MIREG, miselect));
+    add_ireg_proxy(proc, mireg);
+    const reg_t mireg_csrs[] = { CSR_MIREG2, CSR_MIREG3, CSR_MIREG4, CSR_MIREG5, CSR_MIREG6 };
     for (auto csr : mireg_csrs)
       add_csr(csr, std::make_shared<sscsrind_reg_csr_t>(proc, csr, miselect));
   }
 
   if (proc->extension_enabled_const(EXT_SSCSRIND)) {
-    csr_t_p vsiselect = std::make_shared<basic_csr_t>(proc, CSR_VSISELECT, 0);
+    csr_t_p vsiselect = std::make_shared<siselect_csr_t>(proc, CSR_VSISELECT, 0);
     add_hypervisor_csr(CSR_VSISELECT, vsiselect);
 
-    csr_t_p siselect = std::make_shared<basic_csr_t>(proc, CSR_SISELECT, 0);
-    add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_csr_t>(proc, siselect, vsiselect));
+    csr_t_p siselect = std::make_shared<siselect_csr_t>(proc, CSR_SISELECT, 0);
+    add_supervisor_csr(CSR_SISELECT, std::make_shared<virtualized_with_special_permission_csr_t>(proc, siselect, vsiselect));
 
-    const reg_t vsireg_csrs[] = { CSR_VSIREG, CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 };
-    const reg_t sireg_csrs[] = { CSR_SIREG, CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 };
+    auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_VSIREG, vsiselect);
+    add_hypervisor_csr(CSR_VSIREG, vsireg);
+
+    auto sireg = std::make_shared<sscsrind_reg_csr_t>(proc, CSR_SIREG, siselect);
+    add_ireg_proxy(proc, sireg);
+    add_supervisor_csr(CSR_SIREG, std::make_shared<virtualized_indirect_csr_t>(proc, sireg, vsireg));
+
+    const reg_t vsireg_csrs[] = { CSR_VSIREG2, CSR_VSIREG3, CSR_VSIREG4, CSR_VSIREG5, CSR_VSIREG6 };
+    const reg_t sireg_csrs[] = { CSR_SIREG2, CSR_SIREG3, CSR_SIREG4, CSR_SIREG5, CSR_SIREG6 };
     for (size_t i = 0; i < std::size(vsireg_csrs); i++) {
       auto vsireg = std::make_shared<sscsrind_reg_csr_t>(proc, vsireg_csrs[i], vsiselect);
       add_hypervisor_csr(vsireg_csrs[i], vsireg);
@@ -438,4 +509,44 @@ void state_t::csr_init(processor_t* const proc, reg_t max_isa)
 
   const reg_t srmcfg_mask = SRMCFG_MCID | SRMCFG_RCID;
   add_const_ext_csr(EXT_SSQOSID, CSR_SRMCFG, std::make_shared<srmcfg_csr_t>(proc, CSR_SRMCFG, srmcfg_mask, 0));
+
+  mvien = std::make_shared<masked_csr_t>(proc, CSR_MVIEN, MIP_SEIP | MIP_SSIP, 0);
+  mvip = std::make_shared<mvip_csr_t>(proc, CSR_MVIP, 0);
+  if (proc->extension_enabled_const(EXT_SMAIA)) {
+    add_csr(CSR_MTOPI, std::make_shared<mtopi_csr_t>(proc, CSR_MTOPI));
+    if (xlen == 32) {
+      add_supervisor_csr(CSR_MVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIEN, mvien));
+      add_supervisor_csr(CSR_MVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIENH, mvien));
+      add_supervisor_csr(CSR_MVIP, std::make_shared<rv32_low_csr_t>(proc, CSR_MVIP, mvip));
+      add_supervisor_csr(CSR_MVIPH, std::make_shared<rv32_high_csr_t>(proc, CSR_MVIPH, mvip));
+    } else {
+      add_supervisor_csr(CSR_MVIEN, mvien);
+      add_supervisor_csr(CSR_MVIP, mvip);
+    }
+  }
+
+  hvictl = std::make_shared<aia_csr_t>(proc, CSR_HVICTL, HVICTL_VTI | HVICTL_IID | HVICTL_DPR | HVICTL_IPRIOM | HVICTL_IPRIO, 0);
+  vstopi = std::make_shared<vstopi_csr_t>(proc, CSR_VSTOPI);
+  if (proc->extension_enabled_const(EXT_SSAIA)) { // Included by EXT_SMAIA
+    csr_t_p nonvirtual_stopi = std::make_shared<nonvirtual_stopi_csr_t>(proc, CSR_STOPI);
+    add_supervisor_csr(CSR_STOPI, std::make_shared<virtualized_with_special_permission_csr_t>(proc, nonvirtual_stopi, vstopi));
+    add_supervisor_csr(CSR_STOPEI, std::make_shared<inaccessible_csr_t>(proc, CSR_STOPEI));
+    auto hvien = std::make_shared<aia_csr_t>(proc, CSR_HVIEN, 0, 0);
+    auto hviprio1 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO1, 0, 0);
+    auto hviprio2 = std::make_shared<aia_csr_t>(proc, CSR_HVIPRIO2, 0, 0);
+    if (xlen == 32) {
+      add_hypervisor_csr(CSR_HVIEN, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIEN, hvien));
+      add_hypervisor_csr(CSR_HVIENH, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIENH, hvien));
+      add_hypervisor_csr(CSR_HVIPRIO1, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO1, hviprio1));
+      add_hypervisor_csr(CSR_HVIPRIO1H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO1H, hviprio1));
+      add_hypervisor_csr(CSR_HVIPRIO2, std::make_shared<rv32_low_csr_t>(proc, CSR_HVIPRIO2, hviprio2));
+      add_hypervisor_csr(CSR_HVIPRIO2H, std::make_shared<rv32_high_csr_t>(proc, CSR_HVIPRIO2H, hviprio2));
+    } else {
+      add_hypervisor_csr(CSR_HVIEN, hvien);
+      add_hypervisor_csr(CSR_HVIPRIO1, hviprio1);
+      add_hypervisor_csr(CSR_HVIPRIO2, hviprio2);
+    }
+    add_hypervisor_csr(CSR_HVICTL, hvictl);
+    add_hypervisor_csr(CSR_VSTOPI, vstopi);
+  }
 }
diff --git a/riscv/csrs.cc b/riscv/csrs.cc
index 1873f7e..49717e5 100644
--- a/riscv/csrs.cc
+++ b/riscv/csrs.cc
@@ -15,6 +15,8 @@
 #include "insn_macros.h"
 // For CSR_DCSR_V:
 #include "debug_defines.h"
+// For ctz:
+#include "arith.h"
 
 // STATE macro used by require_privilege() macro:
 #undef STATE
@@ -313,31 +315,31 @@ bool mseccfg_csr_t::get_sseed() const noexcept {
 }
 
 bool mseccfg_csr_t::unlogged_write(const reg_t val) noexcept {
-  if (proc->n_pmp == 0)
-    return false;
-
-  // pmpcfg.L is 1 in any rule or entry (including disabled entries)
-  const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp,
-          [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } );
   reg_t new_val = read();
 
-  // When RLB is 0 and pmplock_recorded, RLB is locked to 0.
-  // Otherwise set the RLB bit according val
-  if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) {
-    new_val &= ~MSECCFG_RLB;
-    new_val |= (val & MSECCFG_RLB);
-  }
+  if (proc->n_pmp != 0) {
+    // pmpcfg.L is 1 in any rule or entry (including disabled entries)
+    const bool pmplock_recorded = std::any_of(state->pmpaddr, state->pmpaddr + proc->n_pmp,
+        [](const pmpaddr_csr_t_p & c) { return c->is_locked(); } );
+
+    // When RLB is 0 and pmplock_recorded, RLB is locked to 0.
+    // Otherwise set the RLB bit according val
+    if (!(pmplock_recorded && (read() & MSECCFG_RLB) == 0)) {
+      new_val &= ~MSECCFG_RLB;
+      new_val |= (val & MSECCFG_RLB);
+    }
 
-  new_val |= (val & MSECCFG_MMWP);  //MMWP is sticky
-  new_val |= (val & MSECCFG_MML);   //MML is sticky
+    new_val |= (val & MSECCFG_MMWP);  //MMWP is sticky
+    new_val |= (val & MSECCFG_MML);   //MML is sticky
+
+    proc->get_mmu()->flush_tlb();
+  }
 
   if (proc->extension_enabled(EXT_ZKR)) {
     uint64_t mask = MSECCFG_USEED | MSECCFG_SSEED;
     new_val = (new_val & ~mask) | (val & mask);
   }
 
-  proc->get_mmu()->flush_tlb();
-
   if (proc->extension_enabled(EXT_ZICFILP)) {
     new_val &= ~MSECCFG_MLPE;
     new_val |= (val & MSECCFG_MLPE);
@@ -639,6 +641,22 @@ reg_t rv32_high_csr_t::written_value() const noexcept {
   return (orig->written_value() >> 32) & 0xffffffffU;
 }
 
+aia_rv32_high_csr_t::aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig):
+  rv32_high_csr_t(proc, addr, orig) {
+}
+
+void aia_rv32_high_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+      throw trap_illegal_instruction(insn.bits());
+
+    if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+      throw trap_virtual_instruction(insn.bits());
+  }
+
+  rv32_high_csr_t::verify_permissions(insn, write);
+}
+
 // implement class sstatus_csr_t
 sstatus_csr_t::sstatus_csr_t(processor_t* const proc, sstatus_proxy_csr_t_p orig, vsstatus_csr_t_p virt):
   virtualized_csr_t(proc, orig, virt),
@@ -781,8 +799,14 @@ mip_csr_t::mip_csr_t(processor_t* const proc, const reg_t addr):
   mip_or_mie_csr_t(proc, addr) {
 }
 
+void mip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept {
+  if (!(state->mvien->read() & MIP_SEIP) && (mask & MIP_SEIP))
+    state->mvip->write_with_mask(MIP_SEIP, val); // mvip.SEIP is an alias of mip.SEIP when mvien.SEIP=0
+  mip_or_mie_csr_t::write_with_mask(mask & ~MIP_SEIP, val);
+}
+
 reg_t mip_csr_t::read() const noexcept {
-  return val | state->hvip->basic_csr_t::read();
+  return val | state->hvip->basic_csr_t::read() | ((state->mvien->read() & MIP_SEIP) ? 0 : (state->mvip->basic_csr_t::read() & MIP_SEIP));
 }
 
 void mip_csr_t::backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept {
@@ -864,6 +888,15 @@ mip_proxy_csr_t::mip_proxy_csr_t(processor_t* const proc, const reg_t addr, gene
   accr(accr) {
 }
 
+void mip_proxy_csr_t::verify_permissions(insn_t insn, bool write) const {
+  csr_t::verify_permissions(insn, write);
+  if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+    if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) &&
+        proc->extension_enabled('S') && state->v)
+      throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sip when hvictl.VTI=1
+  }
+}
+
 reg_t mip_proxy_csr_t::read() const noexcept {
   return accr->ip_read();
 }
@@ -879,6 +912,15 @@ mie_proxy_csr_t::mie_proxy_csr_t(processor_t* const proc, const reg_t addr, gene
   accr(accr) {
 }
 
+void mie_proxy_csr_t::verify_permissions(insn_t insn, bool write) const {
+  csr_t::verify_permissions(insn, write);
+  if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+    if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) &&
+        proc->extension_enabled('S') && state->v)
+      throw trap_virtual_instruction(insn.bits()); // VS-mode attempts to access sie when hvictl.VTI=1
+  }
+}
+
 reg_t mie_proxy_csr_t::read() const noexcept {
   return accr->ie_read();
 }
@@ -956,6 +998,38 @@ bool medeleg_csr_t::unlogged_write(const reg_t val) noexcept {
   return basic_csr_t::unlogged_write((read() & ~mask) | (val & mask));
 }
 
+sip_csr_t::sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr):
+  mip_proxy_csr_t(proc, addr, accr) {
+}
+
+reg_t sip_csr_t::read() const noexcept {
+  const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+  return (mip_proxy_csr_t::read() & ~mask) | (state->mvip->read() & mask);
+}
+
+bool sip_csr_t::unlogged_write(const reg_t val) noexcept {
+  const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+  state->mvip->write_with_mask(mask & accr->get_ip_write_mask(), val);
+  return mip_proxy_csr_t::unlogged_write(val & ~mask);
+}
+
+sie_csr_t::sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr):
+  mie_proxy_csr_t(proc, addr, accr),
+  val(0) {
+}
+
+reg_t sie_csr_t::read() const noexcept {
+  const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+  return (mie_proxy_csr_t::read() & ~mask) | (val & mask);
+}
+
+bool sie_csr_t::unlogged_write(const reg_t val) noexcept {
+  const reg_t mask = ~state->mideleg->read() & state->mvien->read();
+  this->val = (this->val & ~mask) | (val & mask);
+  mie_proxy_csr_t::unlogged_write(val & ~mask);
+  return true;
+}
+
 // implement class masked_csr_t
 masked_csr_t::masked_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init):
   basic_csr_t(proc, addr, init),
@@ -1645,10 +1719,6 @@ bool stimecmp_csr_t::unlogged_write(const reg_t val) noexcept {
   return basic_csr_t::unlogged_write(val);
 }
 
-virtualized_stimecmp_csr_t::virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt):
-  virtualized_csr_t(proc, orig, virt) {
-}
-
 void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
   if (!(state->menvcfg->read() & MENVCFG_STCE)) {
     // access to (v)stimecmp with MENVCFG.STCE = 0
@@ -1664,9 +1734,18 @@ void stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
   }
 
   basic_csr_t::verify_permissions(insn, write);
+
+  if (proc->extension_enabled_const(EXT_SSAIA) && proc->extension_enabled('H')) {
+    if ((state->csrmap[CSR_HVICTL]->read() & HVICTL_VTI) && state->v && write)
+      throw trap_virtual_instruction(insn.bits());
+  }
 }
 
-void virtualized_stimecmp_csr_t::verify_permissions(insn_t insn, bool write) const {
+virtualized_with_special_permission_csr_t::virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt):
+  virtualized_csr_t(proc, orig, virt) {
+}
+
+void virtualized_with_special_permission_csr_t::verify_permissions(insn_t insn, bool write) const {
   orig_csr->verify_permissions(insn, write);
 }
 
@@ -1754,14 +1833,16 @@ sscsrind_reg_csr_t::sscsrind_reg_csr_t(processor_t* const proc, const reg_t addr
 }
 
 void sscsrind_reg_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
+      throw trap_illegal_instruction(insn.bits());
+  }
+
   // Don't call base verify_permission for VS registers remapped to S-mode
   if (insn.csr() == address)
     csr_t::verify_permissions(insn, write);
 
   if (proc->extension_enabled(EXT_SMSTATEEN)) {
-    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
-      throw trap_illegal_instruction(insn.bits());
-
     if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND))
       throw trap_virtual_instruction(insn.bits());
   }
@@ -1973,3 +2054,176 @@ bool scntinhibit_csr_t::unlogged_write(const reg_t val) noexcept {
 reg_t scntinhibit_csr_t::read() const noexcept {
   return state->mcounteren->read() & state->mcountinhibit->read();
 }
+
+mtopi_csr_t::mtopi_csr_t(processor_t* const proc, const reg_t addr):
+  csr_t(proc, addr) {
+}
+
+reg_t mtopi_csr_t::read() const noexcept {
+  reg_t enabled_interrupts = state->mip->read() & state->mie->read() & ~state->mideleg->read();
+  if (!enabled_interrupts)
+    return 0; // no enabled pending interrupt to M-mode
+
+  reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+  reg_t identity = ctz(selected_interrupt);
+  return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0
+}
+
+bool mtopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+  return false;
+}
+
+mvip_csr_t::mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init):
+  basic_csr_t(proc, addr, init) {
+}
+
+reg_t mvip_csr_t::read() const noexcept {
+  const reg_t val = basic_csr_t::read();
+  const reg_t mvien = state->mvien->read();
+  const reg_t mip = state->mip->read();
+  const reg_t menvcfg = state->menvcfg->read();
+  return 0
+    | (val & MIP_SEIP)
+    | ((menvcfg & MENVCFG_STCE) ? 0 : (mip & MIP_STIP))
+    | (((mvien & MIP_SSIP) ? val : mip) & MIP_SSIP)
+    ;
+}
+
+bool mvip_csr_t::unlogged_write(const reg_t val) noexcept {
+  if (!(state->menvcfg->read() & MENVCFG_STCE))
+    state->mip->write_with_mask(MIP_STIP, val); // mvip.STIP is an alias of mip.STIP when mip.STIP is writable
+  if (!(state->mvien->read() & MIP_SSIP))
+    state->mip->write_with_mask(MIP_SSIP, val); // mvip.SSIP is an alias of mip.SSIP when mvien.SSIP=0
+
+  const reg_t new_val = (val & MIP_SEIP) | (((state->mvien->read() & MIP_SSIP) ? val : basic_csr_t::read()) & MIP_SSIP);
+  return basic_csr_t::unlogged_write(new_val);
+}
+
+void mvip_csr_t::write_with_mask(const reg_t mask, const reg_t val) noexcept {
+  basic_csr_t::unlogged_write((basic_csr_t::read() & ~mask) | (val & mask));
+  log_write();
+}
+
+nonvirtual_stopi_csr_t::nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr):
+  csr_t(proc, addr) {
+}
+
+void nonvirtual_stopi_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+      throw trap_illegal_instruction(insn.bits());
+
+    if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+      throw trap_virtual_instruction(insn.bits());
+  }
+
+  csr_t::verify_permissions(insn, write);
+}
+
+reg_t nonvirtual_stopi_csr_t::read() const noexcept {
+  reg_t enabled_interrupts = state->nonvirtual_sip->read() & state->nonvirtual_sie->read() & ~state->hideleg->read();
+  if (!enabled_interrupts)
+    return 0; // no enabled pending interrupt to S-mode
+
+  reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+  reg_t identity = ctz(selected_interrupt);
+  return set_field((reg_t)1, MTOPI_IID, identity); // IPRIO always 1 if iprio array is RO0
+}
+
+bool nonvirtual_stopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+  return false;
+}
+
+inaccessible_csr_t::inaccessible_csr_t(processor_t* const proc, const reg_t addr):
+  csr_t(proc, addr) {
+}
+
+void inaccessible_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (state->v)
+    throw trap_virtual_instruction(insn.bits());
+  else
+    throw trap_illegal_instruction(insn.bits());
+}
+
+vstopi_csr_t::vstopi_csr_t(processor_t* const proc, const reg_t addr):
+  csr_t(proc, addr) {
+}
+
+void vstopi_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+      throw trap_illegal_instruction(insn.bits());
+
+    if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+      throw trap_virtual_instruction(insn.bits());
+  }
+
+  csr_t::verify_permissions(insn, write);
+}
+
+reg_t vstopi_csr_t::read() const noexcept {
+  reg_t hvictl = state->hvictl->read();
+  bool vti = hvictl & HVICTL_VTI;
+  reg_t iid = get_field(hvictl, HVICTL_IID);
+  bool dpr = hvictl & HVICTL_DPR;
+  bool ipriom = hvictl & HVICTL_IPRIOM;
+  reg_t iprio = get_field(hvictl, HVICTL_IPRIO);
+
+  reg_t enabled_interrupts = state->mip->read() & state->mie->read() & state->hideleg->read();
+  enabled_interrupts >>= 1; // VSSIP -> SSIP, etc
+  reg_t vgein = get_field(state->hstatus->read(), HSTATUS_VGEIN);
+  reg_t virtual_sei_priority = (vgein == 0 && iid == IRQ_S_EXT && iprio != 0) ? iprio : 255; // vstopi.IPRIO is 255 for priority number 256
+
+  reg_t identity, priority;
+  if (vti) {
+    if (!(enabled_interrupts & MIP_SEIP) && iid == IRQ_S_EXT)
+      return 0;
+
+    identity = ((enabled_interrupts & MIP_SEIP) && (iid == IRQ_S_EXT || dpr)) ? IRQ_S_EXT : iid;
+    priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : ((iprio != 0 || !dpr) ? iprio : 255);
+  } else {
+    if (!enabled_interrupts)
+      return 0; // no enabled pending interrupt to VS-mode
+
+    reg_t selected_interrupt = proc->select_an_interrupt_with_default_priority(enabled_interrupts);
+    identity = ctz(selected_interrupt);
+    priority = (identity == IRQ_S_EXT) ? virtual_sei_priority : 255; // vstopi.IPRIO is 255 for interrupt with default priority lower than VSEI
+  }
+  return set_field((reg_t)(ipriom ? priority : 1), MTOPI_IID, identity);
+}
+
+bool vstopi_csr_t::unlogged_write(const reg_t UNUSED val) noexcept {
+  return false;
+}
+
+siselect_csr_t::siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init):
+  basic_csr_t(proc, addr, init) {
+}
+
+void siselect_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_CSRIND))
+      throw trap_illegal_instruction(insn.bits());
+
+    if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_CSRIND))
+      throw trap_virtual_instruction(insn.bits());
+  }
+
+  basic_csr_t::verify_permissions(insn, write);
+}
+
+aia_csr_t::aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init):
+  masked_csr_t(proc, addr, mask, init) {
+}
+
+void aia_csr_t::verify_permissions(insn_t insn, bool write) const {
+  if (proc->extension_enabled(EXT_SMSTATEEN)) {
+    if ((state->prv < PRV_M) && !(state->mstateen[0]->read() & MSTATEEN0_AIA))
+      throw trap_illegal_instruction(insn.bits());
+
+    if (state->v && !(state->hstateen[0]->read() & HSTATEEN0_AIA))
+      throw trap_virtual_instruction(insn.bits());
+  }
+
+  basic_csr_t::verify_permissions(insn, write);
+}
diff --git a/riscv/csrs.h b/riscv/csrs.h
index 33ac33e..97fd0f1 100644
--- a/riscv/csrs.h
+++ b/riscv/csrs.h
@@ -301,6 +301,12 @@ class rv32_high_csr_t: public csr_t {
   csr_t_p orig;
 };
 
+class aia_rv32_high_csr_t: public rv32_high_csr_t {
+ public:
+  aia_rv32_high_csr_t(processor_t* const proc, const reg_t addr, csr_t_p orig);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+};
+
 // sstatus.sdt is read_only 0 when menvcfg.dte = 0
 class sstatus_proxy_csr_t final: public base_status_csr_t {
  public:
@@ -356,7 +362,7 @@ class mip_or_mie_csr_t: public csr_t {
   mip_or_mie_csr_t(processor_t* const proc, const reg_t addr);
   virtual reg_t read() const noexcept override;
 
-  void write_with_mask(const reg_t mask, const reg_t val) noexcept;
+  virtual void write_with_mask(const reg_t mask, const reg_t val) noexcept;
 
  protected:
   virtual bool unlogged_write(const reg_t val) noexcept override final;
@@ -371,6 +377,8 @@ class mip_csr_t: public mip_or_mie_csr_t {
   mip_csr_t(processor_t* const proc, const reg_t addr);
   virtual reg_t read() const noexcept override final;
 
+  void write_with_mask(const reg_t mask, const reg_t val) noexcept override;
+
   // Does not log. Used by external things (clint) that wiggle bits in mip.
   void backdoor_write_with_mask(const reg_t mask, const reg_t val) noexcept;
  private:
@@ -406,6 +414,7 @@ class generic_int_accessor_t {
   void ip_write(const reg_t val) noexcept;
   reg_t ie_read() const noexcept;
   void ie_write(const reg_t val) noexcept;
+  reg_t get_ip_write_mask() { return ip_write_mask; }
  private:
   state_t* const state;
   const reg_t read_mask;
@@ -423,10 +432,10 @@ typedef std::shared_ptr<generic_int_accessor_t> generic_int_accessor_t_p;
 class mip_proxy_csr_t: public csr_t {
  public:
   mip_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
   virtual reg_t read() const noexcept override;
  protected:
   virtual bool unlogged_write(const reg_t val) noexcept override;
- private:
   generic_int_accessor_t_p accr;
 };
 
@@ -434,6 +443,7 @@ class mip_proxy_csr_t: public csr_t {
 class mie_proxy_csr_t: public csr_t {
  public:
   mie_proxy_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
   virtual reg_t read() const noexcept override;
  protected:
   virtual bool unlogged_write(const reg_t val) noexcept override;
@@ -460,6 +470,24 @@ class medeleg_csr_t: public basic_csr_t {
   const reg_t hypervisor_exceptions;
 };
 
+class sip_csr_t: public mip_proxy_csr_t {
+ public:
+  sip_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+  virtual reg_t read() const noexcept override;
+ protected:
+  virtual bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class sie_csr_t: public mie_proxy_csr_t {
+ public:
+  sie_csr_t(processor_t* const proc, const reg_t addr, generic_int_accessor_t_p accr);
+  virtual reg_t read() const noexcept override;
+ protected:
+  virtual bool unlogged_write(const reg_t val) noexcept override;
+ private:
+  reg_t val;
+};
+
 // For CSRs with certain bits hardwired
 class masked_csr_t: public basic_csr_t {
  public:
@@ -805,9 +833,9 @@ class stimecmp_csr_t: public basic_csr_t {
   reg_t intr_mask;
 };
 
-class virtualized_stimecmp_csr_t: public virtualized_csr_t {
+class virtualized_with_special_permission_csr_t: public virtualized_csr_t {
  public:
-  virtualized_stimecmp_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt);
+  virtualized_with_special_permission_csr_t(processor_t* const proc, csr_t_p orig, csr_t_p virt);
   virtual void verify_permissions(insn_t insn, bool write) const override;
 };
 
@@ -909,4 +937,63 @@ class scntinhibit_csr_t: public basic_csr_t {
   virtual bool unlogged_write(const reg_t val) noexcept override;
 };
 
+class mtopi_csr_t: public csr_t {
+ public:
+  mtopi_csr_t(processor_t* const proc, const reg_t addr);
+  virtual reg_t read() const noexcept override;
+ protected:
+  bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class mvip_csr_t : public basic_csr_t {
+ public:
+  mvip_csr_t(processor_t* const proc, const reg_t addr, const reg_t init);
+  reg_t read() const noexcept override;
+
+  void write_with_mask(const reg_t mask, const reg_t val) noexcept;
+
+ protected:
+  virtual bool unlogged_write(const reg_t val) noexcept override;
+};
+
+typedef std::shared_ptr<mvip_csr_t> mvip_csr_t_p;
+
+class nonvirtual_stopi_csr_t: public csr_t {
+ public:
+  nonvirtual_stopi_csr_t(processor_t* const proc, const reg_t addr);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+  virtual reg_t read() const noexcept override;
+ protected:
+  bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class inaccessible_csr_t: public csr_t {
+ public:
+  inaccessible_csr_t(processor_t* const proc, const reg_t addr);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+  reg_t read() const noexcept override { return 0; }
+ protected:
+  bool unlogged_write(const reg_t UNUSED val) noexcept override { return false; }
+};
+
+class vstopi_csr_t: public csr_t {
+ public:
+  vstopi_csr_t(processor_t* const proc, const reg_t addr);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+  virtual reg_t read() const noexcept override;
+ protected:
+  bool unlogged_write(const reg_t val) noexcept override;
+};
+
+class siselect_csr_t: public basic_csr_t {
+ public:
+  siselect_csr_t(processor_t* const proc, const reg_t addr, const reg_t init);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+};
+
+class aia_csr_t: public masked_csr_t {
+ public:
+  aia_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask, const reg_t init);
+  virtual void verify_permissions(insn_t insn, bool write) const override;
+};
 #endif
diff --git a/riscv/decode.h b/riscv/decode.h
index d17cb6b..51ecbeb 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -79,6 +79,10 @@ public:
   insn_t(insn_bits_t bits) : b(bits) {}
   insn_bits_t bits() { return b; }
   int length() { return insn_length(b); }
+  [[maybe_unused]] int64_t opcode() { return x(0, 7); }
+  [[maybe_unused]] int64_t funct7() { return x(25, 7); }
+  [[maybe_unused]] int64_t funct3() { return x(12, 3); }
+  [[maybe_unused]] int64_t funct2() { return x(25, 2); }
   int64_t i_imm() { return xs(20, 12); }
   int64_t shamt() { return x(20, 6); }
   int64_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); }
@@ -95,6 +99,7 @@ public:
   uint64_t bs() { return x(30, 2); } // Crypto ISE - SM4/AES32 byte select.
   uint64_t rcon() { return x(20, 4); } // Crypto ISE - AES64 round const.
 
+  [[maybe_unused]] int64_t rvc_opcode() { return x(0, 2); }
   int64_t rvc_imm() { return x(2, 5) + (xs(12, 1) << 5); }
   int64_t rvc_zimm() { return x(2, 5) + (x(12, 1) << 5); }
   int64_t rvc_addi4spn_imm() { return (x(6, 1) << 2) + (x(5, 1) << 3) + (x(11, 2) << 4) + (x(7, 4) << 6); }
diff --git a/riscv/devices.cc b/riscv/devices.cc
index fb5bb5a..b816ca1 100644
--- a/riscv/devices.cc
+++ b/riscv/devices.cc
@@ -156,21 +156,21 @@ void mem_t::dump(std::ostream& o) {
   }
 }
 
-external_sim_device_t::external_sim_device_t(void* sim) 
+external_sim_device_t::external_sim_device_t(abstract_sim_if_t* sim) 
   : external_simulator(sim) {}
 
-void external_sim_device_t::set_simulator(void* sim) {
+void external_sim_device_t::set_simulator(abstract_sim_if_t* sim) {
   external_simulator = sim;
 }
 
 bool external_sim_device_t::load(reg_t addr, size_t len, uint8_t* bytes) {
   if (unlikely(external_simulator == nullptr)) return false;
-  return static_cast<abstract_sim_if_t*>(external_simulator)->load(addr, len, bytes);
+  return external_simulator->load(addr, len, bytes);
 }
 
 bool external_sim_device_t::store(reg_t addr, size_t len, const uint8_t* bytes) {
   if (unlikely(external_simulator == nullptr)) return false;
-  return static_cast<abstract_sim_if_t*>(external_simulator)->store(addr, len, bytes);
+  return external_simulator->store(addr, len, bytes);
 }
 
 reg_t external_sim_device_t::size() {
diff --git a/riscv/devices.h b/riscv/devices.h
index e7b80ad..ccb5c9b 100644
--- a/riscv/devices.h
+++ b/riscv/devices.h
@@ -80,14 +80,14 @@ public:
 
 class external_sim_device_t : public abstract_device_t {
 public:
-  external_sim_device_t(void* sim);
-  void set_simulator(void* sim);
+  external_sim_device_t(abstract_sim_if_t* sim);
+  void set_simulator(abstract_sim_if_t* sim);
   bool load(reg_t addr, size_t len, uint8_t* bytes) override;
   bool store(reg_t addr, size_t len, const uint8_t* bytes) override;
   reg_t size() override;
 
 private:
-  void* external_simulator;
+  abstract_sim_if_t* external_simulator;
 };
 
 class clint_t : public abstract_device_t {
diff --git a/riscv/execute.cc b/riscv/execute.cc
index 39d5ca4..1b572a7 100644
--- a/riscv/execute.cc
+++ b/riscv/execute.cc
@@ -201,7 +201,7 @@ static inline reg_t execute_insn_logged(processor_t* p, reg_t pc, insn_fetch_t f
   return npc;
 }
 
-bool processor_t::slow_path()
+bool processor_t::slow_path() const
 {
   return debug || state.single_step != state.STEP_NONE || state.debug_mode ||
          log_commits_enabled || histogram_enabled || in_wfi || check_triggers_icount;
@@ -210,6 +210,8 @@ bool processor_t::slow_path()
 // fetch/decode/execute loop
 void processor_t::step(size_t n)
 {
+  mmu_t* _mmu = mmu;
+
   if (!state.debug_mode) {
     if (halt_request == HR_REGULAR) {
       enter_debug_mode(DCSR_CAUSE_DEBUGINT, 0);
@@ -221,10 +223,18 @@ void processor_t::step(size_t n)
     }
   }
 
+  if (extension_enabled(EXT_ZICCID)) {
+    // Ziccid requires stores eventually become visible to instruction fetch,
+    // so periodically flush the I$
+    if (ziccid_flush_count-- == 0) {
+      ziccid_flush_count += ZICCID_FLUSH_PERIOD;
+      _mmu->flush_icache();
+    }
+  }
+
   while (n > 0) {
     size_t instret = 0;
     reg_t pc = state.pc;
-    mmu_t* _mmu = mmu;
     state.prv_changed = false;
     state.v_changed = false;
 
diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h
index bcbfe74..728678c 100644
--- a/riscv/insns/vghsh_vv.h
+++ b/riscv/insns/vghsh_vv.h
@@ -2,9 +2,13 @@
 
 #include "zvk_ext_macros.h"
 
+const uint32_t EGS = 4;
+
 require_zvkg;
 require(P.VU.vsew == 32);
 require_egw_fits(128);
+require(P.VU.vl->read() % EGS == 0);
+VI_CHECK_SSS(true)
 
 VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h
index 820b396..0d223e8 100644
--- a/riscv/insns/vgmul_vv.h
+++ b/riscv/insns/vgmul_vv.h
@@ -2,9 +2,13 @@
 
 #include "zvk_ext_macros.h"
 
+const uint32_t EGS = 4;
+
 require_zvkg;
 require(P.VU.vsew == 32);
 require_egw_fits(128);
+require(P.VU.vl->read() % EGS == 0);
+VI_CHECK_SSS(false)
 
 VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
   {},
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index 49c804c..00155db 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -21,23 +21,22 @@ for (reg_t i = 0; i < vl; ++i) {
     }
   }
 
-  bool use_ori = (insn.v_vm() == 0) && !do_mask;
+  // Bypass masked-off elements
+  if ((insn.v_vm() == 0) && !do_mask)
+    continue;
+
   switch (sew) {
   case e8:
-    P.VU.elt<uint8_t>(rd_num, i, true) = use_ori ?
-                                   P.VU.elt<uint8_t>(rd_num, i) : cnt;
+    P.VU.elt<uint8_t>(rd_num, i, true) = cnt;
     break;
   case e16:
-    P.VU.elt<uint16_t>(rd_num, i, true) = use_ori ?
-                                    P.VU.elt<uint16_t>(rd_num, i) : cnt;
+    P.VU.elt<uint16_t>(rd_num, i, true) = cnt;
     break;
   case e32:
-    P.VU.elt<uint32_t>(rd_num, i, true) = use_ori ?
-                                    P.VU.elt<uint32_t>(rd_num, i) : cnt;
+    P.VU.elt<uint32_t>(rd_num, i, true) = cnt;
     break;
   default:
-    P.VU.elt<uint64_t>(rd_num, i, true) = use_ori ?
-                                    P.VU.elt<uint64_t>(rd_num, i) : cnt;
+    P.VU.elt<uint64_t>(rd_num, i, true) = cnt;
     break;
   }
 
diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h
index e9a87cf..49129f7 100644
--- a/riscv/insns/vmandn_mm.h
+++ b/riscv/insns/vmandn_mm.h
@@ -1,2 +1,2 @@
 // vmandn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 & ~vs1);
+VI_LOOP_MASK(vs2 & !vs1);
diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h
index 5a3ab09..4659e2f 100644
--- a/riscv/insns/vmnand_mm.h
+++ b/riscv/insns/vmnand_mm.h
@@ -1,2 +1,2 @@
 // vmnand.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 & vs1));
+VI_LOOP_MASK(!(vs2 & vs1));
diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h
index ab93378..37327c0 100644
--- a/riscv/insns/vmnor_mm.h
+++ b/riscv/insns/vmnor_mm.h
@@ -1,2 +1,2 @@
 // vmnor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 | vs1));
+VI_LOOP_MASK(!(vs2 | vs1));
diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h
index 23026f5..71acc05 100644
--- a/riscv/insns/vmorn_mm.h
+++ b/riscv/insns/vmorn_mm.h
@@ -1,2 +1,2 @@
 // vmorn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 | ~vs1);
+VI_LOOP_MASK(vs2 | !vs1);
diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h
index 0736d5b..8db61c2 100644
--- a/riscv/insns/vmxnor_mm.h
+++ b/riscv/insns/vmxnor_mm.h
@@ -1,2 +1,2 @@
 // vmnxor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 ^ vs1));
+VI_LOOP_MASK(!(vs2 ^ vs1));
diff --git a/riscv/insns/vsm3c_vi.h b/riscv/insns/vsm3c_vi.h
index b3e8121..f9375a5 100644
--- a/riscv/insns/vsm3c_vi.h
+++ b/riscv/insns/vsm3c_vi.h
@@ -3,6 +3,7 @@
 #include "zvksh_ext_macros.h"
 
 require_vsm3_constraints;
+VI_CHECK_SSS(false)
 
 VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vsm3me_vv.h b/riscv/insns/vsm3me_vv.h
index dd6cb52..388b79f 100644
--- a/riscv/insns/vsm3me_vv.h
+++ b/riscv/insns/vsm3me_vv.h
@@ -13,6 +13,7 @@
   (ZVKSH_P1((M16) ^  (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6))
 
 require_vsm3_constraints;
+VI_CHECK_SSS(true)
 
 VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vsm4k_vi.h b/riscv/insns/vsm4k_vi.h
index 8f52e68..dd6f67d 100644
--- a/riscv/insns/vsm4k_vi.h
+++ b/riscv/insns/vsm4k_vi.h
@@ -15,6 +15,7 @@ static constexpr uint32_t zvksed_ck[32] = {
 };
 
 require_vsm4_constraints;
+VI_CHECK_SSS(false)
 
 VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h
index 44011eb..8db1050 100644
--- a/riscv/insns/vsm4r_vs.h
+++ b/riscv/insns/vsm4r_vs.h
@@ -3,8 +3,10 @@
 #include "zvksed_ext_macros.h"
 
 require_vsm4_constraints;
+require_align(insn.rd(), P.VU.vflmul);
+require_vs2_align_eglmul(128);
 // No overlap of vd and vs2.
-require(insn.rd() != insn.rs2());
+require_noover_eglmul(insn.rd(), insn.rs2());
 
 VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vsm4r_vv.h b/riscv/insns/vsm4r_vv.h
index 9a18cec..18afee6 100644
--- a/riscv/insns/vsm4r_vv.h
+++ b/riscv/insns/vsm4r_vv.h
@@ -2,7 +2,9 @@
 
 #include "zvksed_ext_macros.h"
 
+
 require_vsm4_constraints;
+VI_CHECK_SSS(false)
 
 VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
   {},
diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h
index 5c58927..4cf616d 100644
--- a/riscv/insns/vsra_vi.h
+++ b/riscv/insns/vsra_vi.h
@@ -1,5 +1,5 @@
 // vsra.vi vd, vs2, zimm5
 VI_VI_LOOP
 ({
-  vd = vs2 >> (simm5 & (sew - 1) & 0x1f);
+  vd = vs2 >> (insn.v_zimm5() & (sew - 1));
 })
diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h
index cbdf47a..12f1240 100644
--- a/riscv/insns/vssra_vi.h
+++ b/riscv/insns/vssra_vi.h
@@ -1,8 +1,8 @@
-// vssra.vi vd, vs2, simm5
+// vssra.vi vd, vs2, zimm5
 VI_VI_LOOP
 ({
   VRM xrm = P.VU.get_vround_mode();
-  int sh = simm5 & (sew - 1);
+  int sh = insn.v_zimm5() & (sew - 1);
   int128_t val = vs2;
 
   INT_ROUNDING(val, xrm, sh);
diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h
index 74fa37c..a2de49e 100644
--- a/riscv/insns/vssrl_vi.h
+++ b/riscv/insns/vssrl_vi.h
@@ -1,4 +1,4 @@
-// vssra.vi vd, vs2, simm5
+// vssra.vi vd, vs2, zimm5
 VI_VI_ULOOP
 ({
   VRM xrm = P.VU.get_vround_mode();
diff --git a/riscv/insns/vwsll_vi.h b/riscv/insns/vwsll_vi.h
index 13b5eb4..866cd78 100644
--- a/riscv/insns/vwsll_vi.h
+++ b/riscv/insns/vwsll_vi.h
@@ -3,6 +3,7 @@
 #include "zvk_ext_macros.h"
 
 require_zvbb;
+VI_CHECK_DSS(false);
 
 VI_ZVK_VI_WIDENING_ULOOP({
   const reg_t shift = zimm5 & ((2 * sew) - 1);
diff --git a/riscv/insns/vwsll_vv.h b/riscv/insns/vwsll_vv.h
index 5a64c6c..180fe97 100644
--- a/riscv/insns/vwsll_vv.h
+++ b/riscv/insns/vwsll_vv.h
@@ -3,6 +3,7 @@
 #include "zvk_ext_macros.h"
 
 require_zvbb;
+VI_CHECK_DSS(true);
 
 VI_ZVK_VV_WIDENING_ULOOP({
   const reg_t shift = (vs1 & ((2 * sew) - 1));
diff --git a/riscv/insns/vwsll_vx.h b/riscv/insns/vwsll_vx.h
index 5264e80..4137d39 100644
--- a/riscv/insns/vwsll_vx.h
+++ b/riscv/insns/vwsll_vx.h
@@ -3,6 +3,7 @@
 #include "zvk_ext_macros.h"
 
 require_zvbb;
+VI_CHECK_DSS(false);
 
 VI_ZVK_VX_WIDENING_ULOOP({
   const reg_t shift = (rs1 & ((2 * sew) - 1));
diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h
index ea64660..e99f720 100644
--- a/riscv/isa_parser.h
+++ b/riscv/isa_parser.h
@@ -50,6 +50,7 @@ typedef enum {
   EXT_ZFINX,
   EXT_ZHINX,
   EXT_ZHINXMIN,
+  EXT_ZICCID,
   EXT_ZICBOM,
   EXT_ZICBOZ,
   EXT_ZICNTR,
@@ -90,6 +91,8 @@ typedef enum {
   EXT_SMMPM,
   EXT_SMNPM,
   EXT_SSNPM,
+  EXT_SMAIA,
+  EXT_SSAIA,
   NUM_ISA_EXTENSIONS
 } isa_extension_t;
 
diff --git a/riscv/mmu.cc b/riscv/mmu.cc
index 01017f6..30fc47a 100644
--- a/riscv/mmu.cc
+++ b/riscv/mmu.cc
@@ -7,8 +7,8 @@
 #include "processor.h"
 #include "decode_macros.h"
 
-mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc)
- : sim(sim), proc(proc),
+mmu_t::mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz)
+ : sim(sim), proc(proc), blocksz(cache_blocksz),
 #ifdef RISCV_ENABLE_DUAL_ENDIAN
   target_big_endian(endianness == endianness_big),
 #endif
@@ -369,7 +369,7 @@ void mmu_t::store_slow_path(reg_t original_addr, reg_t len, const uint8_t* bytes
     store_slow_path_intrapage(len, bytes, access_info, actually_store);
   }
 
-  if (proc && unlikely(proc->get_log_commits_enabled()))
+  if (actually_store && proc && unlikely(proc->get_log_commits_enabled()))
     proc->state.log_mem_write.push_back(std::make_tuple(original_addr, reg_from_bytes(len, bytes), len));
 }
 
diff --git a/riscv/mmu.h b/riscv/mmu.h
index 86f06ab..305d502 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -89,7 +89,7 @@ private:
   mem_access_info_t generate_access_info(reg_t addr, access_type type, xlate_flags_t xlate_flags);
 
 public:
-  mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc);
+  mmu_t(simif_t* sim, endianness_t endianness, processor_t* proc, reg_t cache_blocksz);
   ~mmu_t();
 
   template<typename T>
@@ -303,7 +303,7 @@ public:
   T ALWAYS_INLINE fetch_jump_table(reg_t addr) {
     T res = 0;
     for (size_t i = 0; i < sizeof(T) / sizeof(insn_parcel_t); i++)
-      res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t));
+      res |= (T)fetch_insn_parcel(addr + i * sizeof(insn_parcel_t)) << (i * sizeof(insn_parcel_t) * 8);
 
     // table accesses use data endianness, not instruction (little) endianness
     return target_big_endian ? to_be(res) : res;
@@ -397,11 +397,6 @@ public:
     return target_big_endian? target_endian<T>::to_be(n) : target_endian<T>::to_le(n);
   }
 
-  void set_cache_blocksz(reg_t size)
-  {
-    blocksz = size;
-  }
-
 private:
   simif_t* sim;
   processor_t* proc;
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 7f2603a..6fe64ab 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -34,7 +34,8 @@ processor_t::processor_t(const char* isa_str, const char* priv_str,
                          const cfg_t *cfg,
                          simif_t* sim, uint32_t id, bool halt_on_reset,
                          FILE* log_file, std::ostream& sout_)
-: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg), sim(sim), id(id), xlen(0),
+: debug(false), halt_request(HR_NONE), isa(isa_str, priv_str), cfg(cfg),
+  sim(sim), id(id), xlen(isa.get_max_xlen()),
   histogram_enabled(false), log_commits_enabled(false),
   log_file(log_file), sout_(sout_.rdbuf()), halt_on_reset(halt_on_reset),
   in_wfi(false), check_triggers_icount(false),
@@ -62,7 +63,7 @@ processor_t::processor_t(const char* isa_str, const char* priv_str,
   VU.vstart_alu = 0;
 
   register_base_instructions();
-  mmu = new mmu_t(sim, cfg->endianness, this);
+  mmu = new mmu_t(sim, cfg->endianness, this, cfg->cache_blocksz);
 
   disassembler = new disassembler_t(&isa);
   for (auto e : isa.get_extensions())
@@ -241,10 +242,79 @@ void processor_t::set_mmu_capability(int cap)
   }
 }
 
+reg_t processor_t::select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const
+{
+  // nonstandard interrupts have highest priority
+  if (enabled_interrupts >> (IRQ_LCOF + 1))
+    enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1);
+  // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI
+  else if (enabled_interrupts & MIP_MEIP)
+    enabled_interrupts = MIP_MEIP;
+  else if (enabled_interrupts & MIP_MSIP)
+    enabled_interrupts = MIP_MSIP;
+  else if (enabled_interrupts & MIP_MTIP)
+    enabled_interrupts = MIP_MTIP;
+  else if (enabled_interrupts & MIP_SEIP)
+    enabled_interrupts = MIP_SEIP;
+  else if (enabled_interrupts & MIP_SSIP)
+    enabled_interrupts = MIP_SSIP;
+  else if (enabled_interrupts & MIP_STIP)
+    enabled_interrupts = MIP_STIP;
+  else if (enabled_interrupts & MIP_LCOFIP)
+    enabled_interrupts = MIP_LCOFIP;
+  else if (enabled_interrupts & MIP_VSEIP)
+    enabled_interrupts = MIP_VSEIP;
+  else if (enabled_interrupts & MIP_VSSIP)
+    enabled_interrupts = MIP_VSSIP;
+  else if (enabled_interrupts & MIP_VSTIP)
+    enabled_interrupts = MIP_VSTIP;
+
+  return enabled_interrupts;
+}
+
+bool processor_t::is_handled_in_vs()
+{
+  reg_t pending_interrupts = state.mip->read() & state.mie->read();
+
+  const reg_t s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read();
+  const reg_t vstopi = state.vstopi->read();
+  const reg_t vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0; // SSIP -> VSSIP, etc
+
+  // M-ints have higher priority over HS-ints and VS-ints
+  const reg_t mie = get_field(state.mstatus->read(), MSTATUS_MIE);
+  const reg_t m_enabled = state.prv < PRV_M || (state.prv == PRV_M && mie);
+  reg_t enabled_interrupts = pending_interrupts & ~state.mideleg->read() & -m_enabled;
+  if (enabled_interrupts == 0) {
+    // HS-ints have higher priority over VS-ints
+    const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read();
+    const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE);
+    const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie);
+    enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled;
+    if (state.v && enabled_interrupts == 0) {
+      // VS-ints have least priority and can only be taken with virt enabled
+      const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie);
+      enabled_interrupts = vs_pending_interrupt & -vs_enabled;
+      if (enabled_interrupts)
+        return true;
+    }
+  }
+  return false;
+}
+
 void processor_t::take_interrupt(reg_t pending_interrupts)
 {
+  reg_t s_pending_interrupts = 0;
+  reg_t vstopi = 0;
+  reg_t vs_pending_interrupt = 0;
+
+  if (extension_enable_table[EXT_SSAIA]) {
+    s_pending_interrupts = state.nonvirtual_sip->read() & state.nonvirtual_sie->read();
+    vstopi = state.vstopi->read();
+    vs_pending_interrupt = vstopi ? (reg_t(1) << get_field(vstopi, MTOPI_IID)) : 0;
+  }
+
   // Do nothing if no pending interrupts
-  if (!pending_interrupts) {
+  if (!pending_interrupts && !s_pending_interrupts && !vs_pending_interrupt) {
     return;
   }
 
@@ -260,46 +330,20 @@ void processor_t::take_interrupt(reg_t pending_interrupts)
     const reg_t deleg_to_hs = state.mideleg->read() & ~state.hideleg->read();
     const reg_t sie = get_field(state.sstatus->read(), MSTATUS_SIE);
     const reg_t hs_enabled = state.v || state.prv < PRV_S || (state.prv == PRV_S && sie);
-    enabled_interrupts = pending_interrupts & deleg_to_hs & -hs_enabled;
+    enabled_interrupts = ((pending_interrupts & deleg_to_hs) | (s_pending_interrupts & ~state.hideleg->read())) & -hs_enabled;
     if (state.v && enabled_interrupts == 0) {
       // VS-ints have least priority and can only be taken with virt enabled
-      const reg_t deleg_to_vs = state.hideleg->read();
       const reg_t vs_enabled = state.prv < PRV_S || (state.prv == PRV_S && sie);
-      enabled_interrupts = pending_interrupts & deleg_to_vs & -vs_enabled;
+      enabled_interrupts = vs_pending_interrupt & -vs_enabled;
     }
   }
 
   const bool nmie = !(state.mnstatus && !get_field(state.mnstatus->read(), MNSTATUS_NMIE));
   if (!state.debug_mode && nmie && enabled_interrupts) {
-    // nonstandard interrupts have highest priority
-    if (enabled_interrupts >> (IRQ_LCOF + 1))
-      enabled_interrupts = enabled_interrupts >> (IRQ_LCOF + 1) << (IRQ_LCOF + 1);
-    // standard interrupt priority is MEI, MSI, MTI, SEI, SSI, STI
-    else if (enabled_interrupts & MIP_MEIP)
-      enabled_interrupts = MIP_MEIP;
-    else if (enabled_interrupts & MIP_MSIP)
-      enabled_interrupts = MIP_MSIP;
-    else if (enabled_interrupts & MIP_MTIP)
-      enabled_interrupts = MIP_MTIP;
-    else if (enabled_interrupts & MIP_SEIP)
-      enabled_interrupts = MIP_SEIP;
-    else if (enabled_interrupts & MIP_SSIP)
-      enabled_interrupts = MIP_SSIP;
-    else if (enabled_interrupts & MIP_STIP)
-      enabled_interrupts = MIP_STIP;
-    else if (enabled_interrupts & MIP_LCOFIP)
-      enabled_interrupts = MIP_LCOFIP;
-    else if (enabled_interrupts & MIP_VSEIP)
-      enabled_interrupts = MIP_VSEIP;
-    else if (enabled_interrupts & MIP_VSSIP)
-      enabled_interrupts = MIP_VSSIP;
-    else if (enabled_interrupts & MIP_VSTIP)
-      enabled_interrupts = MIP_VSTIP;
-    else
-      abort();
+    reg_t selected_interrupt = select_an_interrupt_with_default_priority(enabled_interrupts);
 
     if (check_triggers_icount) TM.detect_icount_match();
-    throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(enabled_interrupts));
+    throw trap_t(((reg_t)1 << (isa.get_max_xlen() - 1)) | ctz(selected_interrupt));
   }
 }
 
@@ -327,7 +371,7 @@ void processor_t::set_privilege(reg_t prv, bool virt)
   state.v_changed = state.v != state.prev_v;
 }
 
-const char* processor_t::get_privilege_string()
+const char* processor_t::get_privilege_string() const
 {
   if (state.debug_mode)
     return "D";
@@ -403,7 +447,8 @@ void processor_t::take_trap(trap_t& t, reg_t epc)
   bool supv_double_trap = false;
   if (interrupt) {
     vsdeleg = (curr_virt && state.prv <= PRV_S) ? state.hideleg->read() : 0;
-    hsdeleg = (state.prv <= PRV_S) ? state.mideleg->read() : 0;
+    vsdeleg >>= 1;
+    hsdeleg = (state.prv <= PRV_S) ? (state.mideleg->read() | state.nonvirtual_sip->read()) : 0;
     bit &= ~((reg_t)1 << (max_xlen - 1));
   } else {
     vsdeleg = (curr_virt && state.prv <= PRV_S) ? (state.medeleg->read() & state.hedeleg->read()) : 0;
@@ -420,9 +465,9 @@ void processor_t::take_trap(trap_t& t, reg_t epc)
     if (supv_double_trap)
       vsdeleg = hsdeleg = 0;
   }
-  if (state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) {
+  if ((state.prv <= PRV_S && bit < max_xlen && ((vsdeleg >> bit) & 1)) || (state.v && interrupt && is_handled_in_vs())) {
     // Handle the trap in VS-mode
-    const reg_t adjusted_cause = interrupt ? bit - 1 : bit;  // VSSIP -> SSIP, etc
+    const reg_t adjusted_cause = bit;
     reg_t vector = (state.vstvec->read() & 1) && interrupt ? 4 * adjusted_cause : 0;
     state.pc = (state.vstvec->read() & ~(reg_t)1) + vector;
     state.vscause->write(adjusted_cause | (interrupt ? interrupt_bit : 0));
diff --git a/riscv/processor.h b/riscv/processor.h
index 6b611d7..a6e9eeb 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -70,6 +70,7 @@ typedef std::vector<std::tuple<reg_t, uint64_t, uint8_t>> commit_log_mem_t;
 // architectural state of a RISC-V hart
 struct state_t
 {
+  void add_ireg_proxy(processor_t* const proc, sscsrind_reg_csr_t::sscsrind_reg_csr_t_p ireg);
   void reset(processor_t* const proc, reg_t max_isa);
   void add_csr(reg_t addr, const csr_t_p& csr);
 
@@ -96,6 +97,8 @@ struct state_t
   wide_counter_csr_t_p mcycle;
   mie_csr_t_p mie;
   mip_csr_t_p mip;
+  csr_t_p nonvirtual_sip;
+  csr_t_p nonvirtual_sie;
   csr_t_p medeleg;
   csr_t_p mideleg;
   csr_t_p mcounteren;
@@ -173,6 +176,11 @@ struct state_t
 
   csr_t_p ssp;
 
+  csr_t_p mvien;
+  mvip_csr_t_p mvip;
+  csr_t_p hvictl;
+  csr_t_p vstopi;
+
   bool serialized; // whether timer CSRs are in a well-defined state
 
   // When true, execute a single instruction and then enter debug mode.  This
@@ -249,8 +257,8 @@ public:
               FILE *log_file, std::ostream& sout_); // because of command line option --log and -s we need both
   ~processor_t();
 
-  const isa_parser_t &get_isa() { return isa; }
-  const cfg_t &get_cfg() { return *cfg; }
+  const isa_parser_t &get_isa() const & { return isa; }
+  const cfg_t &get_cfg() const & { return *cfg; }
 
   void set_debug(bool value);
   void set_histogram(bool value);
@@ -327,7 +335,7 @@ public:
   }
   reg_t legalize_privilege(reg_t);
   void set_privilege(reg_t, bool);
-  const char* get_privilege_string();
+  const char* get_privilege_string() const;
   void update_histogram(reg_t pc);
   const disassembler_t* get_disassembler() { return disassembler; }
 
@@ -349,8 +357,8 @@ public:
   // When true, display disassembly of each instruction that's executed.
   bool debug;
   // When true, take the slow simulation path.
-  bool slow_path();
-  bool halted() { return state.debug_mode; }
+  bool slow_path() const;
+  bool halted() const { return state.debug_mode; }
   enum {
     HR_NONE,    /* Halt request is inactive. */
     HR_REGULAR, /* Regular halt request/debug interrupt. */
@@ -370,6 +378,8 @@ public:
 
   void check_if_lpad_required();
 
+  reg_t select_an_interrupt_with_default_priority(reg_t enabled_interrupts) const;
+
 private:
   const isa_parser_t isa;
   const cfg_t * const cfg;
@@ -402,6 +412,10 @@ private:
   static const size_t OPCODE_CACHE_SIZE = 4095;
   opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE];
 
+  unsigned ziccid_flush_count = 0;
+  static const unsigned ZICCID_FLUSH_PERIOD = 10;
+
+  bool is_handled_in_vs();
   void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); }
   void take_interrupt(reg_t mask); // take first enabled interrupt in mask
   void take_trap(trap_t& t, reg_t epc); // take an exception
diff --git a/riscv/sim.cc b/riscv/sim.cc
index fd1c6fb..388d729 100644
--- a/riscv/sim.cc
+++ b/riscv/sim.cc
@@ -96,7 +96,7 @@ sim_t::sim_t(const cfg_t *cfg, bool halted,
   }
 #endif
 
-  debug_mmu = new mmu_t(this, cfg->endianness, NULL);
+  debug_mmu = new mmu_t(this, cfg->endianness, NULL, cfg->cache_blocksz);
 
   // When running without using a dtb, skip the fdt-based configuration steps
   if (!dtb_enabled) {
diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h
index b6a4b92..1e33232 100644
--- a/riscv/v_ext_macros.h
+++ b/riscv/v_ext_macros.h
@@ -200,7 +200,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
   require_vector(true); \
   reg_t vl = P.VU.vl->read(); \
   reg_t UNUSED sew = P.VU.vsew; \
-  reg_t rd_num = insn.rd(); \
+  reg_t UNUSED rd_num = insn.rd(); \
   reg_t UNUSED rs1_num = insn.rs1(); \
   reg_t rs2_num = insn.rs2(); \
   for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
@@ -336,7 +336,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
 
 #define VI_PARAMS(x) \
   type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
-  type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
+  type_sew_t<x>::type UNUSED simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
   type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
 
 #define XV_PARAMS(x) \
diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h
index f094629..702ad91 100644
--- a/riscv/zvk_ext_macros.h
+++ b/riscv/zvk_ext_macros.h
@@ -86,6 +86,32 @@
 //    (LMUL * VLEN) <= EGW
 #define require_egw_fits(EGW)  require((EGW) <= (P.VU.VLEN * P.VU.vflmul))
 
+// Ensures that a register index is aligned to EMUL
+// evaluated as EGW / VLEN.
+// The check is only enabled if this value is greater
+// than one (no index alignment check required for fractional EMUL)
+#define require_vreg_align_eglmul(EGW, VREG_NUM) \
+  do { \
+    float vfeglmul = EGW / P.VU.VLEN; \
+    if (vfeglmul > 1) { \
+      require_align(VREG_NUM, vfeglmul); \
+    }\
+  } while (0)
+
+#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2())
+
+// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide
+// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register
+// group.
+// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits.
+#define require_noover_eglmul(vd, vs2) \
+  do { \
+    int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \
+    int aligned_vd = vd / vd_emul; \
+    int aligned_vs2 = vs2 / vd_emul; \
+    require(aligned_vd != aligned_vs2); \
+  } while (0)
+
 // Checks that the vector unit state (vtype and vl) can be interpreted
 // as element groups with EEW=32, EGS=4 (four 32-bits elements per group),
 // for an effective element group width of EGW=128 bits.
diff --git a/riscv/zvkned_ext_macros.h b/riscv/zvkned_ext_macros.h
index db705c7..d94ddc2 100644
--- a/riscv/zvkned_ext_macros.h
+++ b/riscv/zvkned_ext_macros.h
@@ -2,6 +2,7 @@
 // the RISC-V Zvkned extension (vector AES single round).
 
 #include "insns/aes_common.h"
+#include "zvk_ext_macros.h"
 
 #ifndef RISCV_ZVKNED_EXT_MACROS_H_
 #define RISCV_ZVKNED_EXT_MACROS_H_
@@ -9,16 +10,22 @@
 // vaes*.vs instruction constraints:
 //  - Zvkned is enabled
 //  - EGW (128) <= LMUL * VLEN
+//  - vd is LMUL aligned
+//  - vs2 is ceil(EGW / VLEN) aligned
 //  - vd and vs2 cannot overlap
 //
 // The constraint that vstart and vl are both EGS (4) aligned
 // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
 #define require_vaes_vs_constraints \
   do { \
+    const uint32_t EGS = 4; \
     require_zvkned; \
+    require(P.VU.vl->read() % EGS == 0); \
     require(P.VU.vsew == 32); \
     require_egw_fits(128); \
-    require(insn.rd() != insn.rs2()); \
+    require_align(insn.rd(), P.VU.vflmul); \
+    require_vs2_align_eglmul(128); \
+    require_noover_eglmul(insn.rd(), insn.rs2()); \
   } while (false)
 
 // vaes*.vv instruction constraints. Those are the same as the .vs ones,
@@ -30,17 +37,24 @@
 // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
 #define require_vaes_vv_constraints \
   do { \
+    const uint32_t EGS = 4; \
     require_zvkned; \
+    require(P.VU.vl->read() % EGS == 0); \
     require(P.VU.vsew == 32); \
     require_egw_fits(128); \
+    VI_CHECK_SSS(false) \
   } while (false)
 
 // vaeskf*.vi instruction constraints. Those are the same as the .vv ones.
 #define require_vaeskf_vi_constraints \
   do { \
+    const uint32_t EGS = 4; \
     require_zvkned; \
+    require(P.VU.vstart->read() % EGS == 0); \
+    require(P.VU.vl->read() % EGS == 0); \
     require(P.VU.vsew == 32); \
     require_egw_fits(128); \
+    VI_CHECK_SSS(false) \
   } while (false)
 
 #define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0))
diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h
index b50818b..98236b0 100644
--- a/riscv/zvknh_ext_macros.h
+++ b/riscv/zvknh_ext_macros.h
@@ -15,6 +15,7 @@
 // macros.
 #define require_vsha2_common_constraints \
   do { \
+    VI_CHECK_SSS(true) \
     require(P.VU.vsew == 32 || P.VU.vsew == 64); \
     require(insn.rd() != insn.rs1()); \
     require(insn.rd() != insn.rs2()); \
diff --git a/riscv/zvksed_ext_macros.h b/riscv/zvksed_ext_macros.h
index 46e399b..3ffa272 100644
--- a/riscv/zvksed_ext_macros.h
+++ b/riscv/zvksed_ext_macros.h
@@ -16,9 +16,12 @@
 // is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
 #define require_vsm4_constraints \
   do { \
+    const uint32_t EGS = 4; \
     require_zvksed; \
     require(P.VU.vsew == 32); \
     require_egw_fits(128); \
+    require(P.VU.vstart->read() % EGS == 0); \
+    require(P.VU.vl->read() % EGS == 0); \
   } while (false)
 
 // Returns a uint32_t value constructed from the 4 bytes (uint8_t)
diff --git a/riscv/zvksh_ext_macros.h b/riscv/zvksh_ext_macros.h
index 71c5a09..c4549da 100644
--- a/riscv/zvksh_ext_macros.h
+++ b/riscv/zvksh_ext_macros.h
@@ -16,9 +16,12 @@
 // is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros.
 #define require_vsm3_constraints \
   do { \
+    const uint32_t EGS = 8; \
     require_zvksh; \
     require(P.VU.vsew == 32); \
     require_egw_fits(256); \
+    require(P.VU.vstart->read() % EGS == 0); \
+    require(P.VU.vl->read() % EGS == 0); \
     require(insn.rd() != insn.rs2()); \
   } while (false)
 
diff --git a/spike_main/spike.cc b/spike_main/spike.cc
index b8a1b5c..3b0e004 100644
--- a/spike_main/spike.cc
+++ b/spike_main/spike.cc
@@ -451,6 +451,7 @@ int main(int argc, char** argv)
         min_blocksz, max_blocksz);
       exit(-1);
     }
+    cfg.cache_blocksz = blocksz;
   });
   parser.option(0, "instructions", 1, [&](const char* s){
     instructions = strtoull(s, 0, 0);
@@ -541,7 +542,6 @@ int main(int argc, char** argv)
     if (dc) s.get_core(i)->get_mmu()->register_memtracer(&*dc);
     for (auto e : extensions)
       s.get_core(i)->register_extension(e());
-    s.get_core(i)->get_mmu()->set_cache_blocksz(blocksz);
   }
 
   s.set_debug(debug);